From ce1ec9c17876ba1a514506b7b2c865a2f50e3f91 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Mon, 18 Dec 2017 18:38:00 +0000 Subject: [PATCH 001/115] When we're disabling the nvme device, some drives have a controller bug that requires 'hands off' for a period of time (2.3s) before we check the RDY bit. Sicne this is a very odd quirk for a very limited selection of drives, do this as a quirk. This prevented a successful reset of the card when the card wedged. Also, make sure that we comply with the advice from section 3.1.5 of the 1.3 spec says that transitioning CC.EN from 0 to 1 when CSTS.RDY is 1 or transitioning CC.EN from 1 to 0 when CSTS.RDY is 0 "has undefined results". Short circuit when EN == RDY == desired state. Finally, fail the reset if the disable fails. This will lead to a failed device, which is what we want. (note: nda device needs work for coping with a failed device). Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D13389 --- sys/dev/nvme/nvme.c | 19 +++++++++++ sys/dev/nvme/nvme_ctrlr.c | 64 ++++++++++++++++++++++++++----------- sys/dev/nvme/nvme_private.h | 2 ++ 3 files changed, 67 insertions(+), 18 deletions(-) diff --git a/sys/dev/nvme/nvme.c b/sys/dev/nvme/nvme.c index ffb076b297f..9b9040b3112 100644 --- a/sys/dev/nvme/nvme.c +++ b/sys/dev/nvme/nvme.c @@ -90,6 +90,7 @@ static struct _pcsid int match_subdevice; uint16_t subdevice; const char *desc; + uint32_t quirks; } pci_ids[] = { { 0x01118086, 0, 0, "NVMe Controller" }, { IDT32_PCI_ID, 0, 0, "IDT NVMe Controller (32 channel)" }, @@ -100,6 +101,11 @@ static struct _pcsid { 0x09538086, 1, 0x3705, "DC P3500 SSD [2.5\" SFF]" }, { 0x09538086, 1, 0x3709, "DC P3600 SSD [Add-in Card]" }, { 0x09538086, 1, 0x370a, "DC P3600 SSD [2.5\" SFF]" }, + { 0x00031c58, 0, 0, "HGST SN100", QUIRK_DELAY_B4_CHK_RDY }, + { 0x00231c58, 0, 0, "WDC SN200", QUIRK_DELAY_B4_CHK_RDY }, + { 0x05401c5f, 0, 0, "Memblaze Pblaze4", QUIRK_DELAY_B4_CHK_RDY }, + { 0xa821144d, 0, 0, "Samsung PM1725", QUIRK_DELAY_B4_CHK_RDY }, + { 0xa822144d, 0, 0, "Samsung PM1725a", QUIRK_DELAY_B4_CHK_RDY }, { 0x00000000, 0, 0, NULL } }; @@ -240,6 +246,19 @@ nvme_attach(device_t dev) { struct nvme_controller *ctrlr = DEVICE2SOFTC(dev); int status; + struct _pcsid *ep; + uint32_t devid; + uint16_t subdevice; + + devid = pci_get_devid(dev); + subdevice = pci_get_subdevice(dev); + ep = pci_ids; + while (ep->devid) { + if (nvme_match(devid, subdevice, ep)) + break; + ++ep; + } + ctrlr->quirks = ep->quirks; status = nvme_ctrlr_construct(ctrlr, dev); diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c index 34d6ff83b69..715d233f12e 100644 --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -46,6 +46,8 @@ __FBSDID("$FreeBSD$"); #include "nvme_private.h" +#define B4_CHK_RDY_DELAY_MS 2300 /* work arond controller bug */ + static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr, struct nvme_async_event_request *aer); static void nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr); @@ -241,49 +243,65 @@ static int nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr, int desired_val) { int ms_waited; - union cc_register cc; union csts_register csts; - cc.raw = nvme_mmio_read_4(ctrlr, cc); csts.raw = nvme_mmio_read_4(ctrlr, csts); - if (cc.bits.en != desired_val) { - nvme_printf(ctrlr, "%s called with desired_val = %d " - "but cc.en = %d\n", __func__, desired_val, cc.bits.en); - return (ENXIO); - } - ms_waited = 0; - while (csts.bits.rdy != desired_val) { - DELAY(1000); if (ms_waited++ > ctrlr->ready_timeout_in_ms) { nvme_printf(ctrlr, "controller ready did not become %d " "within %d ms\n", desired_val, ctrlr->ready_timeout_in_ms); return (ENXIO); } + DELAY(1000); csts.raw = nvme_mmio_read_4(ctrlr, csts); } return (0); } -static void +static int nvme_ctrlr_disable(struct nvme_controller *ctrlr) { union cc_register cc; union csts_register csts; + int err; cc.raw = nvme_mmio_read_4(ctrlr, cc); csts.raw = nvme_mmio_read_4(ctrlr, csts); - if (cc.bits.en == 1 && csts.bits.rdy == 0) - nvme_ctrlr_wait_for_ready(ctrlr, 1); + /* + * Per 3.1.5 in NVME 1.3 spec, transitioning CC.EN from 0 to 1 + * when CSTS.RDY is 1 or transitioning CC.EN from 1 to 0 when + * CSTS.RDY is 0 "has undefined results" So make sure that CSTS.RDY + * isn't the desired value. Short circuit if we're already disabled. + */ + if (cc.bits.en == 1) { + if (csts.bits.rdy == 0) { + /* EN == 1, wait for RDY == 1 or fail */ + err = nvme_ctrlr_wait_for_ready(ctrlr, 1); + if (err != 0) + return (err); + } + } else { + /* EN == 0 already wait for RDY == 0 */ + if (csts.bits.rdy == 0) + return (0); + else + return (nvme_ctrlr_wait_for_ready(ctrlr, 0)); + } cc.bits.en = 0; nvme_mmio_write_4(ctrlr, cc, cc.raw); - DELAY(5000); - nvme_ctrlr_wait_for_ready(ctrlr, 0); + /* + * Some drives have issues with accessing the mmio after we + * disable, so delay for a bit after we write the bit to + * cope with these issues. + */ + if (ctrlr->quirks) + pause("nvmeR", B4_CHK_RDY_DELAY_MS * hz / 1000); + return (nvme_ctrlr_wait_for_ready(ctrlr, 0)); } static int @@ -292,15 +310,24 @@ nvme_ctrlr_enable(struct nvme_controller *ctrlr) union cc_register cc; union csts_register csts; union aqa_register aqa; + int err; cc.raw = nvme_mmio_read_4(ctrlr, cc); csts.raw = nvme_mmio_read_4(ctrlr, csts); + /* + * See note in nvme_ctrlr_disable. Short circuit if we're already enabled. + */ if (cc.bits.en == 1) { if (csts.bits.rdy == 1) return (0); else return (nvme_ctrlr_wait_for_ready(ctrlr, 1)); + } else { + /* EN == 0 already wait for RDY == 0 or fail */ + err = nvme_ctrlr_wait_for_ready(ctrlr, 0); + if (err != 0) + return (err); } nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr); @@ -326,7 +353,6 @@ nvme_ctrlr_enable(struct nvme_controller *ctrlr) cc.bits.mps = (PAGE_SIZE >> 13); nvme_mmio_write_4(ctrlr, cc, cc.raw); - DELAY(5000); return (nvme_ctrlr_wait_for_ready(ctrlr, 1)); } @@ -334,7 +360,7 @@ nvme_ctrlr_enable(struct nvme_controller *ctrlr) int nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr) { - int i; + int i, err; nvme_admin_qpair_disable(&ctrlr->adminq); /* @@ -349,7 +375,9 @@ nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr) DELAY(100*1000); - nvme_ctrlr_disable(ctrlr); + err = nvme_ctrlr_disable(ctrlr); + if (err != 0) + return err; return (nvme_ctrlr_enable(ctrlr)); } diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index f61b824da81..19825ac6d33 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -246,6 +246,8 @@ struct nvme_controller { struct mtx lock; uint32_t ready_timeout_in_ms; + uint32_t quirks; +#define QUIRK_DELAY_B4_CHK_RDY 1 /* Can't touch MMIO on disable */ bus_space_tag_t bus_tag; bus_space_handle_t bus_handle; From 989c7f0b7c4a7fe9ac072d47d06e3773652654c8 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Mon, 18 Dec 2017 20:11:21 +0000 Subject: [PATCH 002/115] Although we only have one quirk at the moment, guard against the day we have more than one by checking the actual quirk bit before delaying the reset. Noticed by: rpokala@ --- sys/dev/nvme/nvme_ctrlr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c index 715d233f12e..fdd58279378 100644 --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -299,7 +299,7 @@ nvme_ctrlr_disable(struct nvme_controller *ctrlr) * disable, so delay for a bit after we write the bit to * cope with these issues. */ - if (ctrlr->quirks) + if (ctrlr->quirks & QUIRK_DELAY_B4_CHK_RDY) pause("nvmeR", B4_CHK_RDY_DELAY_MS * hz / 1000); return (nvme_ctrlr_wait_for_ready(ctrlr, 0)); } From 3a014c56c1efc90f8031882409db72d7deb7e19c Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Mon, 18 Dec 2017 23:35:14 +0000 Subject: [PATCH 003/115] Catch up to r325719 which makes the kern.proc.pid sysctl "work" for zombies. Some of the ptrace tests need to wait for a child process to become a zombie before preceding. The parent process polls the child process via the kern.proc.pid sysctl to wait for it to become a zombie. Previously the code polled until the sysctl failed with ESRCH. Now it will poll until either the sysctl fails with ESRCH (for compatiblity with older kernels) or returns a kinfo_proc structure with the ki_stat field set to SZOMB. Reported by: Jenkins Tested by: markj Discussed with: mjg MFC after: 1 week --- tests/sys/kern/ptrace_test.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/sys/kern/ptrace_test.c b/tests/sys/kern/ptrace_test.c index ce648b99c08..2d8524317f3 100644 --- a/tests/sys/kern/ptrace_test.c +++ b/tests/sys/kern/ptrace_test.c @@ -104,6 +104,10 @@ wait_for_zombie(pid_t pid) /* * Wait for a process to exit. This is kind of gross, but * there is not a better way. + * + * Prior to r325719, the kern.proc.pid. sysctl failed + * with ESRCH. After that change, a valid struct kinfo_proc + * is returned for zombies with ki_stat set to SZOMB. */ for (;;) { struct kinfo_proc kp; @@ -116,10 +120,11 @@ wait_for_zombie(pid_t pid) mib[3] = pid; len = sizeof(kp); if (sysctl(mib, nitems(mib), &kp, &len, NULL, 0) == -1) { - /* The KERN_PROC_PID sysctl fails for zombies. */ ATF_REQUIRE(errno == ESRCH); break; } + if (kp.ki_stat == SZOMB) + break; usleep(5000); } } From 22cf6021fce330a2e856eef764b842c1403a603b Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Tue, 19 Dec 2017 00:18:17 +0000 Subject: [PATCH 004/115] Support more images (but still no geli) Print a qemu line to a shell script to ease testing each image Start to support multiple architectures (still very green) Create /etc/rc that echos success and halts the system for better automation (also include halt) Create /etc/fstab on a per-boot type to test loader's passing root to kernel. This lets me run a test, connect to it with telnet and get either a timeout, or a report of success. Sponsored by: Netflix --- tools/boot/rootgen.sh | 233 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 216 insertions(+), 17 deletions(-) diff --git a/tools/boot/rootgen.sh b/tools/boot/rootgen.sh index e58c780fb0b..b0b4571a76b 100755 --- a/tools/boot/rootgen.sh +++ b/tools/boot/rootgen.sh @@ -33,28 +33,41 @@ mk_nogeli_gpt_ufs_legacy() { src=$1 img=$2 - rm -f ${img} ${img}.p2 + cat > ${src}/etc/fstab < ${src}/etc/fstab < ${src}/etc/fstab < ${src}/etc/fstab < $sh +# https://wiki.freebsd.org/arm64/QEMU also has +# -device virtio-net-device,netdev=net0 +# -netdev user,id=net0 +} + +# Amd64 qemu +qemu_amd64_legacy() +{ + img=$1 + sh=$2 + + echo "qemu-system-x86_64 --drive file=${img},format=raw ${qser}" > $sh +} + +qemu_amd64_uefi() +{ + img=$1 + sh=$2 + + echo "qemu-system-x86_64 -bios ~/bios/OVMF-X64.fd --drive file=${img},format=raw ${qser}" > $sh +} + +qemu_amd64_both() +{ + img=$1 + sh=$2 + + echo "qemu-system-x86_64 --drive file=${img},format=raw ${qser}" > $sh + echo "qemu-system-x86_64 -bios ~/bios/OVMF-X64.fd --drive file=${img},format=raw ${qser}" > $sh +} + +# arm +# nothing listed? + +# i386 +qemu_i386_legacy() +{ + img=$1 + sh=$2 + + echo "qemu-system-i386 --drive file=${img},format=raw ${qser}" > $sh +} + +# Not yet supported +qemu_i386_uefi() +{ + img=$1 + sh=$2 + + echo "qemu-system-i386 -bios ~/bios/OVMF-X32.fd --drive file=${img},format=raw ${qser}" > $sh +} + +# Needs UEFI to be supported +qemu_i386_both() +{ + img=$1 + sh=$2 + + echo "qemu-system-i386 --drive file=${img},format=raw ${qser}" > $sh + echo "qemu-system-i386 -bios ~/bios/OVMF-X32.fd --drive file=${img},format=raw ${qser}" > $sh +} + +# mips +# qemu-system-mips -kernel /path/to/rootfs/boot/kernel/kernel -nographic -hda /path/to/disk.img -m 2048 + +# Powerpc -- doesn't work but maybe it would enough for testing -- needs details +# powerpc64 +# qemu-system-ppc64 -drive file=/path/to/disk.img,format=raw + +# sparc64 +# 10.3 works, 12-current (which one?) hangs +# qemu-system-sparc64 -drive file=/path/to/disk.img,format=raw + # Misc variables SRCTOP=$(make -v SRCTOP) @@ -188,27 +299,115 @@ echo -h -D -S115200 > ${DESTDIR}/boot.config cp /boot/device.hints ${DESTDIR}/boot/device.hints # Assume we're already built make install DESTDIR=${DESTDIR} MK_MAN=no MK_INSTALL_AS_USER=yes -# Copy init, /bin/sh and minimal libraries -mkdir -p ${DESTDIR}/sbin ${DESTDIR}/bin ${DESTDIR}/lib ${DESTDIR}/libexec -for f in /sbin/init /bin/sh $(ldd /bin/sh | awk 'NF == 4 { print $3; }') /libexec/ld-elf.so.1; do +# Copy init, /bin/sh, minimal libraries and testing /etc/rc +mkdir -p ${DESTDIR}/sbin ${DESTDIR}/bin \ + ${DESTDIR}/lib ${DESTDIR}/libexec \ + ${DESTDIR}/etc ${DESTDIR}/dev +for f in /sbin/halt /sbin/init /bin/sh $(ldd /bin/sh | awk 'NF == 4 { print $3; }') /libexec/ld-elf.so.1; do cp $f ${DESTDIR}/$f done -mkdir ${DESTDIR}/dev +cat > ${DESTDIR}/etc/rc < Date: Tue, 19 Dec 2017 02:49:11 +0000 Subject: [PATCH 005/115] Implement ACPI CPU support when Processor object is not present By the ACPI standard (ACPI 5 chapter 8.4 Declaring Processors) Processors can be implemented in 2 distinct ways: * Through a Processor object type (which provides P_BLK) * Through a Device object type Prior to this change, the FreeBSD driver only supported the former. AMD Epyc / Poweredge systems we are testing both implement the latter only. Add the missing support. Because P_BLK is not defined in the device object case, C-states entering must be completely controlled via _CST methods rather than P_LVL2/3. John Baldwin points out that ACPI 6.0 formally deprecates the Processor keyword, so eventually processors will only be enumerated as Device objects. Submitted by: attilio Reviewed by: jhb, markj, Anton Rang Relnotes: maybe Sponsored by: Dell EMC Isilon Differential Revision: https://reviews.freebsd.org/D13457 --- sys/dev/acpica/acpi_cpu.c | 104 +++++++++++++++++++++++++------------- 1 file changed, 68 insertions(+), 36 deletions(-) diff --git a/sys/dev/acpica/acpi_cpu.c b/sys/dev/acpica/acpi_cpu.c index 855e57b45d2..2be10d3ea83 100644 --- a/sys/dev/acpica/acpi_cpu.c +++ b/sys/dev/acpica/acpi_cpu.c @@ -140,6 +140,8 @@ struct acpi_cpu_device { #define CST_FFH_MWAIT_HW_COORD 0x0001 #define CST_FFH_MWAIT_BM_AVOID 0x0002 +#define CPUDEV_DEVICE_ID "ACPI0007" + /* Allow users to ignore processor orders in MADT. */ static int cpu_unordered; SYSCTL_INT(_debug_acpi, OID_AUTO, cpu_unordered, CTLFLAG_RDTUN, @@ -236,14 +238,21 @@ MODULE_DEPEND(cpu, acpi, 1, 1, 1); static int acpi_cpu_probe(device_t dev) { + static char *cpudev_ids[] = { CPUDEV_DEVICE_ID, NULL }; int acpi_id, cpu_id; ACPI_BUFFER buf; ACPI_HANDLE handle; ACPI_OBJECT *obj; ACPI_STATUS status; + ACPI_OBJECT_TYPE type; - if (acpi_disabled("cpu") || acpi_get_type(dev) != ACPI_TYPE_PROCESSOR || - acpi_cpu_disabled) + if (acpi_disabled("cpu") || acpi_cpu_disabled) + return (ENXIO); + type = acpi_get_type(dev); + if (type != ACPI_TYPE_PROCESSOR && type != ACPI_TYPE_DEVICE) + return (ENXIO); + if (type == ACPI_TYPE_DEVICE && + ACPI_ID_PROBE(device_get_parent(dev), dev, cpudev_ids) == NULL) return (ENXIO); handle = acpi_get_handle(dev); @@ -251,29 +260,39 @@ acpi_cpu_probe(device_t dev) cpu_softc = malloc(sizeof(struct acpi_cpu_softc *) * (mp_maxid + 1), M_TEMP /* XXX */, M_WAITOK | M_ZERO); - /* Get our Processor object. */ - buf.Pointer = NULL; - buf.Length = ACPI_ALLOCATE_BUFFER; - status = AcpiEvaluateObject(handle, NULL, NULL, &buf); - if (ACPI_FAILURE(status)) { - device_printf(dev, "probe failed to get Processor obj - %s\n", - AcpiFormatException(status)); - return (ENXIO); - } - obj = (ACPI_OBJECT *)buf.Pointer; - if (obj->Type != ACPI_TYPE_PROCESSOR) { - device_printf(dev, "Processor object has bad type %d\n", obj->Type); - AcpiOsFree(obj); - return (ENXIO); - } + if (type == ACPI_TYPE_PROCESSOR) { + /* Get our Processor object. */ + buf.Pointer = NULL; + buf.Length = ACPI_ALLOCATE_BUFFER; + status = AcpiEvaluateObject(handle, NULL, NULL, &buf); + if (ACPI_FAILURE(status)) { + device_printf(dev, "probe failed to get Processor obj - %s\n", + AcpiFormatException(status)); + return (ENXIO); + } + obj = (ACPI_OBJECT *)buf.Pointer; + if (obj->Type != ACPI_TYPE_PROCESSOR) { + device_printf(dev, "Processor object has bad type %d\n", + obj->Type); + AcpiOsFree(obj); + return (ENXIO); + } - /* - * Find the processor associated with our unit. We could use the - * ProcId as a key, however, some boxes do not have the same values - * in their Processor object as the ProcId values in the MADT. - */ - acpi_id = obj->Processor.ProcId; - AcpiOsFree(obj); + /* + * Find the processor associated with our unit. We could use the + * ProcId as a key, however, some boxes do not have the same values + * in their Processor object as the ProcId values in the MADT. + */ + acpi_id = obj->Processor.ProcId; + AcpiOsFree(obj); + } else { + status = acpi_GetInteger(handle, "_UID", &acpi_id); + if (ACPI_FAILURE(status)) { + device_printf(dev, "Device object has bad value - %s\n", + AcpiFormatException(status)); + return (ENXIO); + } + } if (acpi_pcpu_get_id(dev, &acpi_id, &cpu_id) != 0) return (ENXIO); @@ -325,19 +344,32 @@ acpi_cpu_attach(device_t dev) cpu_smi_cmd = AcpiGbl_FADT.SmiCommand; cpu_cst_cnt = AcpiGbl_FADT.CstControl; - buf.Pointer = NULL; - buf.Length = ACPI_ALLOCATE_BUFFER; - status = AcpiEvaluateObject(sc->cpu_handle, NULL, NULL, &buf); - if (ACPI_FAILURE(status)) { - device_printf(dev, "attach failed to get Processor obj - %s\n", - AcpiFormatException(status)); - return (ENXIO); + if (acpi_get_type(dev) == ACPI_TYPE_PROCESSOR) { + buf.Pointer = NULL; + buf.Length = ACPI_ALLOCATE_BUFFER; + status = AcpiEvaluateObject(sc->cpu_handle, NULL, NULL, &buf); + if (ACPI_FAILURE(status)) { + device_printf(dev, "attach failed to get Processor obj - %s\n", + AcpiFormatException(status)); + return (ENXIO); + } + obj = (ACPI_OBJECT *)buf.Pointer; + sc->cpu_p_blk = obj->Processor.PblkAddress; + sc->cpu_p_blk_len = obj->Processor.PblkLength; + sc->cpu_acpi_id = obj->Processor.ProcId; + AcpiOsFree(obj); + } else { + KASSERT(acpi_get_type(dev) == ACPI_TYPE_DEVICE, + ("Unexpected ACPI object")); + status = acpi_GetInteger(sc->cpu_handle, "_UID", &sc->cpu_acpi_id); + if (ACPI_FAILURE(status)) { + device_printf(dev, "Device object has bad value - %s\n", + AcpiFormatException(status)); + return (ENXIO); + } + sc->cpu_p_blk = 0; + sc->cpu_p_blk_len = 0; } - obj = (ACPI_OBJECT *)buf.Pointer; - sc->cpu_p_blk = obj->Processor.PblkAddress; - sc->cpu_p_blk_len = obj->Processor.PblkLength; - sc->cpu_acpi_id = obj->Processor.ProcId; - AcpiOsFree(obj); ACPI_DEBUG_PRINT((ACPI_DB_INFO, "acpi_cpu%d: P_BLK at %#x/%d\n", device_get_unit(dev), sc->cpu_p_blk, sc->cpu_p_blk_len)); From a5d5fd9ffd6699e8bd09fd1098867e91004a313f Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Tue, 19 Dec 2017 03:15:20 +0000 Subject: [PATCH 006/115] lld: Don't write preemptible symbol values to the .got. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is not necessary and matches what bfd and gold do. This was a regression from [LLVM] r315658. Obtained from: LLVM r321023 by Rafael EspĂ­ndola --- contrib/llvm/tools/lld/ELF/Relocations.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/llvm/tools/lld/ELF/Relocations.cpp b/contrib/llvm/tools/lld/ELF/Relocations.cpp index 70ca76a726d..ce8ec6f995c 100644 --- a/contrib/llvm/tools/lld/ELF/Relocations.cpp +++ b/contrib/llvm/tools/lld/ELF/Relocations.cpp @@ -812,7 +812,7 @@ static void addGotEntry(SymbolBody &Sym, bool Preemptible) { // // This is ugly -- the difference between REL and RELA should be // handled in a better way. It's a TODO. - if (!Config->IsRela) + if (!Config->IsRela && !Preemptible) InX::Got->Relocations.push_back({R_ABS, Target->GotRel, Off, 0, &Sym}); } } From 8fbecf7b2eaa04cb6dc07eb1327a685c43672cd1 Mon Sep 17 00:00:00 2001 From: Eitan Adler Date: Tue, 19 Dec 2017 03:35:39 +0000 Subject: [PATCH 007/115] arclint: revert in prep for recommitting --- .arclint | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.arclint b/.arclint index ea99eb54a5f..31bda09b1a8 100644 --- a/.arclint +++ b/.arclint @@ -9,8 +9,7 @@ "type": "spelling" }, "chmod": { - "type": "chmod", - "exclude": "(/tests/)" + "type": "chmod" }, "merge-conflict": { "type": "merge-conflict" @@ -24,4 +23,3 @@ } } } - From 9ff7bf7a93bddc79a3dc5616c82cdd3b806bf606 Mon Sep 17 00:00:00 2001 From: Eitan Adler Date: Tue, 19 Dec 2017 03:38:06 +0000 Subject: [PATCH 008/115] arc lint: ignore /tests/ in chmod shell scripts in scripts don't need to be chmod +x to work. In fact most are not. Of the tests I found from a simple search: 65 are chmod +x 84 are chmod -x simply disable the check for test shell scripts. Recommit requested by: cem, rgrimes --- .arclint | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.arclint b/.arclint index 31bda09b1a8..5078cba32c3 100644 --- a/.arclint +++ b/.arclint @@ -9,7 +9,8 @@ "type": "spelling" }, "chmod": { - "type": "chmod" + "type": "chmod", + "exclude": "(/tests/)" }, "merge-conflict": { "type": "merge-conflict" From 0ff3f28b2d3886b23c9c228add0fb2d540c3fbe4 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Tue, 19 Dec 2017 04:05:43 +0000 Subject: [PATCH 009/115] Simplify things a little. The RETURN macro isn't required. It's only used once, inside an #ifdef where it would be defined to be return. Sponsored by: Netflix --- stand/common/interp.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/stand/common/interp.c b/stand/common/interp.c index f4117b9b3e5..5cf4124660d 100644 --- a/stand/common/interp.c +++ b/stand/common/interp.c @@ -39,11 +39,7 @@ __FBSDID("$FreeBSD$"); #ifdef BOOT_FORTH #include "ficl.h" -#define RETURN(x) stackPushINT(bf_vm->pStack,!x); return(x) - extern FICL_VM *bf_vm; -#else -#define RETURN(x) return(x) #endif #define MAXARGS 20 /* maximum number of arguments allowed */ @@ -51,12 +47,10 @@ extern FICL_VM *bf_vm; static void prompt(void); #ifndef BOOT_FORTH -static int perform(int argc, char *argv[]); - /* * Perform the command */ -int +static int perform(int argc, char *argv[]) { int result; @@ -82,7 +76,7 @@ perform(int argc, char *argv[]) } else { command_errmsg = "unknown command"; } - RETURN(result); + return(result); } #endif /* ! BOOT_FORTH */ From 6bc860372dd7b2be0227ca213af631bdc51c2339 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Tue, 19 Dec 2017 04:05:55 +0000 Subject: [PATCH 010/115] Interact is always called with NULL. Simplify code a little by removing this argument, and expanding when rc is NULL. This effectively completes the back out of custom scripts for tftp booted loaders from r269153 that was started in r292344 with the new path tricks that obsoleted it. Submitted by: Netflix --- stand/common/bootstrap.h | 4 ++-- stand/common/interp.c | 11 ++++------- stand/common/interp_forth.c | 13 ++++--------- stand/efi/loader/main.c | 2 +- stand/i386/loader/main.c | 2 +- stand/mips/beri/loader/main.c | 2 +- stand/ofw/common/main.c | 2 +- stand/powerpc/kboot/main.c | 2 +- stand/powerpc/ps3/main.c | 2 +- stand/sparc64/loader/main.c | 2 +- stand/uboot/common/main.c | 2 +- stand/userboot/userboot/main.c | 2 +- 12 files changed, 19 insertions(+), 27 deletions(-) diff --git a/stand/common/bootstrap.h b/stand/common/bootstrap.h index 5d6a63db2b1..a570720b425 100644 --- a/stand/common/bootstrap.h +++ b/stand/common/bootstrap.h @@ -45,7 +45,7 @@ extern char command_errbuf[COMMAND_ERRBUFSZ]; #define CMD_FATAL 4 /* interp.c */ -void interact(const char *rc); +void interact(void); int include(const char *filename); /* interp_backslash.c */ @@ -55,7 +55,7 @@ char *backslash(const char *str); int parse(int *argc, char ***argv, const char *str); /* interp_forth.c */ -void bf_init(const char *rc); +void bf_init(void); int bf_run(char *line); /* boot.c */ diff --git a/stand/common/interp.c b/stand/common/interp.c index 5cf4124660d..6b1b8bbf0db 100644 --- a/stand/common/interp.c +++ b/stand/common/interp.c @@ -84,7 +84,7 @@ perform(int argc, char *argv[]) * Interactive mode */ void -interact(const char *rc) +interact(void) { static char input[256]; /* big enough? */ #ifndef BOOT_FORTH @@ -93,14 +93,11 @@ interact(const char *rc) #endif #ifdef BOOT_FORTH - bf_init((rc) ? "" : NULL); + bf_init(); #endif - if (rc == NULL) { - /* Read our default configuration. */ - include("/boot/loader.rc"); - } else if (*rc != '\0') - include(rc); + /* Read our default configuration. */ + include("/boot/loader.rc"); printf("\n"); diff --git a/stand/common/interp_forth.c b/stand/common/interp_forth.c index a3b77769d38..d617ce85842 100644 --- a/stand/common/interp_forth.c +++ b/stand/common/interp_forth.c @@ -250,7 +250,7 @@ bf_command(FICL_VM *vm) * Initialise the Forth interpreter, create all our commands as words. */ void -bf_init(const char *rc) +bf_init(void) { struct bootblk_command **cmdp; char create_buf[41]; /* 31 characters-long builtins */ @@ -280,14 +280,9 @@ bf_init(const char *rc) ficlSetEnv(bf_sys, "loader_version", bootprog_rev); /* try to load and run init file if present */ - if (rc == NULL) - rc = "/boot/boot.4th"; - if (*rc != '\0') { - fd = open(rc, O_RDONLY); - if (fd != -1) { - (void)ficlExecFD(bf_vm, fd); - close(fd); - } + if ((fd = open("/boot/boot.4th", O_RDONLY)) != -1) { + (void)ficlExecFD(bf_vm, fd); + close(fd); } } diff --git a/stand/efi/loader/main.c b/stand/efi/loader/main.c index f367f292d38..516c47e3770 100644 --- a/stand/efi/loader/main.c +++ b/stand/efi/loader/main.c @@ -501,7 +501,7 @@ main(int argc, CHAR16 *argv[]) #endif } - interact(NULL); /* doesn't return */ + interact(); /* doesn't return */ return (EFI_SUCCESS); /* keep compiler happy */ } diff --git a/stand/i386/loader/main.c b/stand/i386/loader/main.c index 81bc2ff55dd..543dae1af71 100644 --- a/stand/i386/loader/main.c +++ b/stand/i386/loader/main.c @@ -232,7 +232,7 @@ main(void) bios_getsmap(); - interact(NULL); + interact(); /* if we ever get here, it is an error */ return (1); diff --git a/stand/mips/beri/loader/main.c b/stand/mips/beri/loader/main.c index 2d201d8011e..71b69b78157 100644 --- a/stand/mips/beri/loader/main.c +++ b/stand/mips/beri/loader/main.c @@ -149,7 +149,7 @@ main(int argc, char *argv[], char *envv[], struct bootinfo *bootinfop) printf("bootpath=\"%s\"\n", bootpath); #endif - interact(NULL); + interact(); return (0); } diff --git a/stand/ofw/common/main.c b/stand/ofw/common/main.c index 3c0bbdf97a2..128542c922f 100644 --- a/stand/ofw/common/main.c +++ b/stand/ofw/common/main.c @@ -157,7 +157,7 @@ main(int (*openfirm)(void *)) archsw.arch_readin = ofw_readin; archsw.arch_autoload = ofw_autoload; - interact(NULL); /* doesn't return */ + interact(); /* doesn't return */ OF_exit(); diff --git a/stand/powerpc/kboot/main.c b/stand/powerpc/kboot/main.c index 7a24c163d5c..e72a7482046 100644 --- a/stand/powerpc/kboot/main.c +++ b/stand/powerpc/kboot/main.c @@ -122,7 +122,7 @@ main(int argc, const char **argv) setenv("loaddev", bootdev, 1); setenv("LINES", "24", 1); - interact(NULL); /* doesn't return */ + interact(); /* doesn't return */ return (0); } diff --git a/stand/powerpc/ps3/main.c b/stand/powerpc/ps3/main.c index be8708aadef..db9ea50959f 100644 --- a/stand/powerpc/ps3/main.c +++ b/stand/powerpc/ps3/main.c @@ -140,7 +140,7 @@ main(void) setenv("LINES", "24", 1); setenv("hw.platform", "ps3", 1); - interact(NULL); /* doesn't return */ + interact(); /* doesn't return */ return (0); } diff --git a/stand/sparc64/loader/main.c b/stand/sparc64/loader/main.c index d3c17c5ea6c..140885d50e8 100644 --- a/stand/sparc64/loader/main.c +++ b/stand/sparc64/loader/main.c @@ -902,7 +902,7 @@ main(int (*openfirm)(void *)) printf("bootpath=\"%s\"\n", bootpath); /* Give control to the machine independent loader code. */ - interact(NULL); + interact(); return (1); } diff --git a/stand/uboot/common/main.c b/stand/uboot/common/main.c index c4efb1f69bd..5540059797c 100644 --- a/stand/uboot/common/main.c +++ b/stand/uboot/common/main.c @@ -500,7 +500,7 @@ main(int argc, char **argv) archsw.arch_readin = uboot_readin; archsw.arch_autoload = uboot_autoload; - interact(NULL); /* doesn't return */ + interact(); /* doesn't return */ return (0); } diff --git a/stand/userboot/userboot/main.c b/stand/userboot/userboot/main.c index 7f59eb7c3bd..95bff1745a0 100644 --- a/stand/userboot/userboot/main.c +++ b/stand/userboot/userboot/main.c @@ -142,7 +142,7 @@ loader_main(struct loader_callbacks *cb, void *arg, int version, int ndisks) if (setjmp(jb)) return; - interact(NULL); /* doesn't return */ + interact(); /* doesn't return */ exit(0); } From ca481bffc2c08120b920a2631f06fde1ece3a403 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Tue, 19 Dec 2017 04:06:02 +0000 Subject: [PATCH 011/115] Hoist btx include stuff to i386/Makefile.inc Sponsored by: Netflix --- stand/i386/Makefile.inc | 2 ++ stand/i386/boot2/Makefile | 1 - stand/i386/gptboot/Makefile | 1 - stand/i386/libfirewire/Makefile | 1 - stand/i386/libi386/Makefile | 1 - stand/i386/loader/Makefile | 3 --- stand/i386/zfsboot/Makefile | 1 - 7 files changed, 2 insertions(+), 8 deletions(-) diff --git a/stand/i386/Makefile.inc b/stand/i386/Makefile.inc index 77dbda54f5a..a3140190a13 100644 --- a/stand/i386/Makefile.inc +++ b/stand/i386/Makefile.inc @@ -14,6 +14,8 @@ BTXCRT= ${BTXDIR}/lib/crt0.o BTXSRC= ${BOOTSRC}/i386/btx BTXLIB= ${BTXSRC}/lib +CFLAGS+= -I${BTXLIB} + # compact binary with no padding between text, data, bss LDSCRIPT= ${BOOTSRC}/i386/boot.ldscript # LDFLAGS_BIN=-e start -Ttext ${ORG} -Wl,-T,${LDSCRIPT},-S,--oformat,binary diff --git a/stand/i386/boot2/Makefile b/stand/i386/boot2/Makefile index c3146b3e906..53bfd9e478b 100644 --- a/stand/i386/boot2/Makefile +++ b/stand/i386/boot2/Makefile @@ -31,7 +31,6 @@ CFLAGS+=-fomit-frame-pointer \ -DSIOFMT=${B2SIOFMT} \ -DSIOSPD=${BOOT_COMCONSOLE_SPEED} \ -I${LDRSRC} \ - -I${BTXLIB} \ -Wall -Waggregate-return -Wbad-function-cast -Wno-cast-align \ -Wmissing-declarations -Wmissing-prototypes -Wnested-externs \ -Wpointer-arith -Wshadow -Wstrict-prototypes -Wwrite-strings \ diff --git a/stand/i386/gptboot/Makefile b/stand/i386/gptboot/Makefile index 49643e7bbec..171a2ed55e8 100644 --- a/stand/i386/gptboot/Makefile +++ b/stand/i386/gptboot/Makefile @@ -33,7 +33,6 @@ CFLAGS+=-DBOOTPROG=\"gptboot\" \ -DSIOSPD=${BOOT_COMCONSOLE_SPEED} \ -I${LDRSRC} \ -I${BOOTSRC}/i386/common \ - -I${BTXLIB} \ -I${BOOTSRC}/i386/boot2 \ -Wall -Waggregate-return -Wbad-function-cast -Wno-cast-align \ -Wmissing-declarations -Wmissing-prototypes -Wnested-externs \ diff --git a/stand/i386/libfirewire/Makefile b/stand/i386/libfirewire/Makefile index d3f88c448b6..f6e594f83d2 100644 --- a/stand/i386/libfirewire/Makefile +++ b/stand/i386/libfirewire/Makefile @@ -12,7 +12,6 @@ SRCS+= dcons.c fwcrom.c CFLAGS+= -D_BOOT CFLAGS+= -I${LDRSRC} -CFLAGS+= -I${BTXLIB} CFLAGS+= -I${BOOTSRC}/i386/libi386 CFLAGS+= -Wformat -Wall diff --git a/stand/i386/libi386/Makefile b/stand/i386/libi386/Makefile index f058521a3a2..f54052cd89d 100644 --- a/stand/i386/libi386/Makefile +++ b/stand/i386/libi386/Makefile @@ -47,7 +47,6 @@ CFLAGS+= -Dalloca=__builtin_alloca CFLAGS+= -I${BOOTSRC}/ficl -I${BOOTSRC}/ficl/i386 \ -I${LDRSRC} -I${BOOTSRC}/i386/common \ - -I${BTXLIB} \ -I${SYSDIR}/contrib/dev/acpica/include # Handle FreeBSD specific %b and %D printf format specifiers diff --git a/stand/i386/loader/Makefile b/stand/i386/loader/Makefile index 7733c58684c..ca0d822d890 100644 --- a/stand/i386/loader/Makefile +++ b/stand/i386/loader/Makefile @@ -56,9 +56,6 @@ LDFLAGS+= -static -Ttext 0x0 LIBI386= ${BOOTOBJ}/i386/libi386/libi386.a CFLAGS+= -I${BOOTSRC}/i386 -# BTX components -CFLAGS+= -I${BTXLIB} - # Debug me! #CFLAGS+= -g #LDFLAGS+= -g diff --git a/stand/i386/zfsboot/Makefile b/stand/i386/zfsboot/Makefile index bd734172558..35185de6964 100644 --- a/stand/i386/zfsboot/Makefile +++ b/stand/i386/zfsboot/Makefile @@ -31,7 +31,6 @@ CFLAGS+=-DBOOTPROG=\"zfsboot\" \ -I${ZFSSRC} \ -I${SYSDIR}/crypto/skein \ -I${SYSDIR}/cddl/boot/zfs \ - -I${BTXLIB} \ -I${BOOTSRC}/i386/boot2 \ -Wall -Waggregate-return -Wbad-function-cast -Wno-cast-align \ -Wmissing-declarations -Wmissing-prototypes -Wnested-externs \ From fc1340fb40b0866d2f04ce08f8c600877a973f41 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Tue, 19 Dec 2017 04:06:07 +0000 Subject: [PATCH 012/115] No need to use relative paths like this here. Sponsored by: Netflix --- stand/i386/libi386/multiboot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stand/i386/libi386/multiboot.c b/stand/i386/libi386/multiboot.c index 9aac64010a1..252a1f53771 100644 --- a/stand/i386/libi386/multiboot.c +++ b/stand/i386/libi386/multiboot.c @@ -49,8 +49,8 @@ __FBSDID("$FreeBSD$"); #include "bootstrap.h" #include "multiboot.h" -#include "../i386/libi386/libi386.h" -#include "../i386/btx/lib/btxv86.h" +#include "libi386.h" +#include #define MULTIBOOT_SUPPORTED_FLAGS \ (MULTIBOOT_PAGE_ALIGN|MULTIBOOT_MEMORY_INFO) From 5cf3cd108f77fec0c2505b00fb58efae71c4fc42 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Tue, 19 Dec 2017 04:13:22 +0000 Subject: [PATCH 013/115] When doing a dump, the scheduler is normally not running, so this changed worked to capture dumps for me. However, the test for SCHEDULER_STOPPED() isn't right. We can also call the dump routine from ddb, in which case the scheduler is still running. This leads to an assertion panic that we're sleeping when we shouldn't. Instead, use the proper test for dumping or not. This brings us in line with other places that do special things while we're doing polled I/O like this. Noticed by: pho@ Differential Revision: https://reviews.freebsd.org/D13531 --- sys/cam/cam_periph.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/sys/cam/cam_periph.c b/sys/cam/cam_periph.c index c43088d1cd6..3dacabd83ca 100644 --- a/sys/cam/cam_periph.c +++ b/sys/cam/cam_periph.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -1158,7 +1159,7 @@ cam_periph_runccb(union ccb *ccb, struct bintime *starttime; struct bintime ltime; int error; - bool sched_stopped; + bool must_poll; struct mtx *periph_mtx; struct cam_periph *periph; uint32_t timeout = 1; @@ -1182,7 +1183,13 @@ cam_periph_runccb(union ccb *ccb, devstat_start_transaction(ds, starttime); } - sched_stopped = SCHEDULER_STOPPED(); + /* + * We must poll the I/O while we're dumping. The scheduler is normally + * stopped for dumping, except when we call doadump from ddb. While the + * scheduler is running in this case, we still need to poll the I/O to + * avoid sleeping waiting for the ccb to complete. + */ + must_poll = dumping; ccb->ccb_h.cbfcnp = cam_periph_done; periph = xpt_path_periph(ccb->ccb_h.path); periph_mtx = cam_periph_mtx(periph); @@ -1193,7 +1200,7 @@ cam_periph_runccb(union ccb *ccb, * cam_periph_error can reschedule the ccb by calling xpt_action and returning * ERESTART, so we have to effect the polling in the do loop below. */ - if (sched_stopped) { + if (must_poll) { mtx_unlock(periph_mtx); timeout = xpt_poll_setup(ccb); } @@ -1204,11 +1211,11 @@ cam_periph_runccb(union ccb *ccb, } else { xpt_action(ccb); do { - if (!sched_stopped) - cam_periph_ccbwait(ccb); - else { + if (must_poll) { xpt_pollwait(ccb, timeout); timeout = ccb->ccb_h.timeout * 10; + } else { + cam_periph_ccbwait(ccb); } if ((ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) error = 0; @@ -1220,7 +1227,7 @@ cam_periph_runccb(union ccb *ccb, } while (error == ERESTART); } - if (sched_stopped) + if (must_poll) mtx_lock(periph_mtx); if ((ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) { From 30d4f9e8883099cbdbcb732b68372b7c2e0ec979 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 19 Dec 2017 09:59:20 +0000 Subject: [PATCH 014/115] Add atomic_load(9) and atomic_store(9) operations. They provide relaxed-ordered atomic access semantic. Due to the FreeBSD memory model, the operations are syntaxical wrappers around the volatile accesses. The volatile qualifier is used to ensure that the access not optimized out and in turn depends on the volatile semantic as implemented by supported compilers. The motivation for adding the operation is to help people coming from other systems or knowing the C11/C++ standards where atomics have special type and require use of the special access operations. It is still the case that FreeBSD requires plain load and stores of aligned integer types to be atomic. Suggested by: jhb Reviewed by: alc, jhb Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D13534 --- sys/amd64/include/atomic.h | 2 + sys/arm/include/atomic.h | 2 + sys/arm64/include/atomic.h | 2 + sys/i386/include/atomic.h | 2 + sys/mips/include/atomic.h | 2 + sys/powerpc/include/atomic.h | 2 + sys/riscv/include/atomic.h | 2 + sys/sparc64/include/atomic.h | 2 + sys/sys/atomic_common.h | 73 ++++++++++++++++++++++++++++++++++++ 9 files changed, 89 insertions(+) create mode 100644 sys/sys/atomic_common.h diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h index 84c02718073..3847b09cff4 100644 --- a/sys/amd64/include/atomic.h +++ b/sys/amd64/include/atomic.h @@ -57,6 +57,8 @@ #define wmb() __asm __volatile("sfence;" : : : "memory") #define rmb() __asm __volatile("lfence;" : : : "memory") +#include + /* * Various simple operations on memory, each of which is atomic in the * presence of interrupts and multiple processors. diff --git a/sys/arm/include/atomic.h b/sys/arm/include/atomic.h index 55273296605..3c7b145813a 100644 --- a/sys/arm/include/atomic.h +++ b/sys/arm/include/atomic.h @@ -41,6 +41,8 @@ #ifndef _MACHINE_ATOMIC_H_ #define _MACHINE_ATOMIC_H_ +#include + #include #ifndef _KERNEL diff --git a/sys/arm64/include/atomic.h b/sys/arm64/include/atomic.h index 08bb1036414..a870f40a23c 100644 --- a/sys/arm64/include/atomic.h +++ b/sys/arm64/include/atomic.h @@ -29,6 +29,8 @@ #ifndef _MACHINE_ATOMIC_H_ #define _MACHINE_ATOMIC_H_ +#include + #define isb() __asm __volatile("isb" : : : "memory") /* diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h index dc697f7cecd..270b5eaa963 100644 --- a/sys/i386/include/atomic.h +++ b/sys/i386/include/atomic.h @@ -34,6 +34,8 @@ #error this file needs sys/cdefs.h as a prerequisite #endif +#include + #ifdef _KERNEL #include #include diff --git a/sys/mips/include/atomic.h b/sys/mips/include/atomic.h index 22050dad88a..668d311d24a 100644 --- a/sys/mips/include/atomic.h +++ b/sys/mips/include/atomic.h @@ -36,6 +36,8 @@ #error this file needs sys/cdefs.h as a prerequisite #endif +#include + /* * Note: All the 64-bit atomic operations are only atomic when running * in 64-bit mode. It is assumed that code compiled for n32 and n64 diff --git a/sys/powerpc/include/atomic.h b/sys/powerpc/include/atomic.h index acd78579a63..1b3c084db19 100644 --- a/sys/powerpc/include/atomic.h +++ b/sys/powerpc/include/atomic.h @@ -38,6 +38,8 @@ #error this file needs sys/cdefs.h as a prerequisite #endif +#include + /* * The __ATOMIC_REL/ACQ() macros provide memory barriers only in conjunction * with the atomic lXarx/stXcx. sequences below. They are not exposed outside diff --git a/sys/riscv/include/atomic.h b/sys/riscv/include/atomic.h index f82d0dd4fee..ec61691f09d 100644 --- a/sys/riscv/include/atomic.h +++ b/sys/riscv/include/atomic.h @@ -37,6 +37,8 @@ #ifndef _MACHINE_ATOMIC_H_ #define _MACHINE_ATOMIC_H_ +#include + #define fence() __asm __volatile("fence" ::: "memory"); #define mb() fence() #define rmb() fence() diff --git a/sys/sparc64/include/atomic.h b/sys/sparc64/include/atomic.h index be499096303..e34853b5399 100644 --- a/sys/sparc64/include/atomic.h +++ b/sys/sparc64/include/atomic.h @@ -39,6 +39,8 @@ #define wmb() mb() #define rmb() mb() +#include + /* Userland needs different ASI's. */ #ifdef _KERNEL #define __ASI_ATOMIC ASI_N diff --git a/sys/sys/atomic_common.h b/sys/sys/atomic_common.h new file mode 100644 index 00000000000..9aa30fa24a2 --- /dev/null +++ b/sys/sys/atomic_common.h @@ -0,0 +1,73 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2017 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +#ifndef _SYS_ATOMIC_COMMON_H_ +#define _SYS_ATOMIC_COMMON_H_ + +#ifndef _MACHINE_ATOMIC_H_ +#error do not include this header, use machine/atomic.h +#endif + +#define atomic_load_char(p) (*(volatile u_char *)(p)) +#define atomic_load_short(p) (*(volatile u_short *)(p)) +#define atomic_load_int(p) (*(volatile u_int *)(p)) +#define atomic_load_long(p) (*(volatile u_long *)(p)) +#define atomic_load_ptr(p) (*(volatile uintptr_t*)(p)) +#define atomic_load_8(p) (*(volatile uint8_t *)(p)) +#define atomic_load_16(p) (*(volatile uint16_t *)(p)) +#define atomic_load_32(p) (*(volatile uint32_t *)(p)) +#ifdef _LP64 +#define atomic_load_64(p) (*(volatile uint64_t *)(p)) +#endif + +#define atomic_store_char(p, v) \ + (*(volatile u_char *)(p) = (u_char)(v)) +#define atomic_store_short(p, v) \ + (*(volatile u_short *)(p) = (u_short)(v)) +#define atomic_store_int(p, v) \ + (*(volatile u_int *)(p) = (u_int)(v)) +#define atomic_store_long(p, v) \ + (*(volatile u_long *)(p) = (u_long)(v)) +#define atomic_store_ptr(p, v) \ + (*(uintptr_t *)(p) = (uintptr_t)(v)) +#define atomic_store_8(p, v) \ + (*(volatile uint8_t *)(p) = (uint8_t)(v)) +#define atomic_store_16(p, v) \ + (*(volatile uint16_t *)(p) = (uint16_t)(v)) +#define atomic_store_32(p, v) \ + (*(volatile uint32_t *)(p) = (uint32_t)(v)) +#ifdef _LP64 +#define atomic_store_64(p, v) \ + (*(volatile uint64_t *)(p) = (uint64_t)(v)) +#endif + +#endif From 6f697994fd36703d412a100c6e1b626fbf194cd4 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 19 Dec 2017 10:05:45 +0000 Subject: [PATCH 015/115] Use atomic_load(9) to read ppsinfo sequence numbers. In this case volatile qualifiers enusre that a compiler does not optimize the accesses out. Reviewed by: alc, jhb Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D13534 --- sys/kern/kern_tc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c index 377a8b55359..d291bc78fad 100644 --- a/sys/kern/kern_tc.c +++ b/sys/kern/kern_tc.c @@ -1601,10 +1601,10 @@ pps_fetch(struct pps_fetch_args *fapi, struct pps_state *pps) tv.tv_usec = fapi->timeout.tv_nsec / 1000; timo = tvtohz(&tv); } - aseq = pps->ppsinfo.assert_sequence; - cseq = pps->ppsinfo.clear_sequence; - while (aseq == pps->ppsinfo.assert_sequence && - cseq == pps->ppsinfo.clear_sequence) { + aseq = atomic_load_int(&pps->ppsinfo.assert_sequence); + cseq = atomic_load_int(&pps->ppsinfo.clear_sequence); + while (aseq == atomic_load_int(&pps->ppsinfo.assert_sequence) && + cseq == atomic_load_int(&pps->ppsinfo.clear_sequence)) { if (abi_aware(pps, 1) && pps->driver_mtx != NULL) { if (pps->flags & PPSFLAG_MTX_SPIN) { err = msleep_spin(pps, pps->driver_mtx, From 200f8117ba72b9c4e93dd3491f322f0012634309 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 19 Dec 2017 10:06:55 +0000 Subject: [PATCH 016/115] Perform all accesses to uma_reclaim_needed using atomic(9) KPI. Reviewed by: alc, jhb Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D13534 --- sys/vm/uma_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index ee46fcb08e5..81c953a4838 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -3170,14 +3170,14 @@ uma_reclaim_worker(void *arg __unused) for (;;) { sx_xlock(&uma_drain_lock); - while (uma_reclaim_needed == 0) + while (atomic_load_int(&uma_reclaim_needed) == 0) sx_sleep(uma_reclaim, &uma_drain_lock, PVM, "umarcl", hz); sx_xunlock(&uma_drain_lock); EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM); sx_xlock(&uma_drain_lock); uma_reclaim_locked(true); - uma_reclaim_needed = 0; + atomic_store_int(&uma_reclaim_needed, 0); sx_xunlock(&uma_drain_lock); /* Don't fire more than once per-second. */ pause("umarclslp", hz); From e44f4f3547612b57227c366bd57b02f0be7b5001 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 19 Dec 2017 14:11:41 +0000 Subject: [PATCH 017/115] mlx5en: Avoid SFENCe on x86 The IA32 memory model guarantees that all writes are seen in the program order. Also, any access to the uncacheable memory flushes the store buffers. As the consequence, SFENCE instruction is (almost) never needed, in particular, it is not needed to ensure the correct order of updates as seen by a PCIe device. Use atomic_thread_fence_rel() instead of wb() to only emit compiler barriers on x86 there. Other architectures get the right barrier instruction as well. Reviewed by: hselasky Sponsored by: Mellanox Technologies MFC after: 1 week --- sys/dev/mlx5/mlx5_en/mlx5_en_rx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c index 77f674af35f..fb14be43b32 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c @@ -90,7 +90,7 @@ mlx5e_post_rx_wqes(struct mlx5e_rq *rq) } /* ensure wqes are visible to device before updating doorbell record */ - wmb(); + atomic_thread_fence_rel(); mlx5_wq_ll_update_db_record(&rq->wq); } @@ -436,7 +436,7 @@ wq_ll_pop: mlx5_cqwq_update_db_record(&rq->cq.wq); /* ensure cq space is freed before enabling more cqes */ - wmb(); + atomic_thread_fence_rel(); return (i); } From 7cc0ad62e3d947fc0c33ff401efed5a1c6e51f94 Mon Sep 17 00:00:00 2001 From: Nathan Whitehorn Date: Tue, 19 Dec 2017 15:50:46 +0000 Subject: [PATCH 018/115] Make __startkernel line up with KERNBASE, so that the math to compute the applied relocation offset in link_elf.c works as intended. We may want to revisit how that works in future, for example by having elf_reloc_self() actually store the numbers it is using rather than computing them later, but this fixes symbol lookup after r326203. Reported by: andreast@ Pointy hat to: me --- sys/conf/ldscript.powerpc64 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/conf/ldscript.powerpc64 b/sys/conf/ldscript.powerpc64 index 250f54ec209..82836699456 100644 --- a/sys/conf/ldscript.powerpc64 +++ b/sys/conf/ldscript.powerpc64 @@ -11,7 +11,7 @@ SECTIONS /* Read-only sections, merged into text segment: */ . = kernbase; - PROVIDE (begin = . - SIZEOF_HEADERS); + PROVIDE (begin = .); .text : { From d6716aa2af0b8071939779e688092cfceb638b81 Mon Sep 17 00:00:00 2001 From: Nathan Whitehorn Date: Tue, 19 Dec 2017 16:45:40 +0000 Subject: [PATCH 019/115] The highest-order bit of the bootloader cookie is 1, with the result that the 32-bit cookie can be sign-extended on its way out of the loader and through Open Firmware. If sign-extended, the in-kernel check of its value would fail on 64-bit systems, resulting in a mountroot prompt. Solve this by telling the kernel to ignore the high-order bits. PR: kern/224437 Submitted by: Gustavo Romero --- sys/powerpc/booke/booke_machdep.c | 2 +- sys/powerpc/powerpc/machdep.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/powerpc/booke/booke_machdep.c b/sys/powerpc/booke/booke_machdep.c index 5512cc491e6..418d4d7327d 100644 --- a/sys/powerpc/booke/booke_machdep.c +++ b/sys/powerpc/booke/booke_machdep.c @@ -201,7 +201,7 @@ extern void *int_performance_counter; mtspr(ivor, (uintptr_t)(&handler) & 0xffffUL); uintptr_t powerpc_init(vm_offset_t fdt, vm_offset_t, vm_offset_t, void *mdp, - vm_offset_t mdp_cookie); + uint32_t mdp_cookie); void booke_cpu_init(void); void diff --git a/sys/powerpc/powerpc/machdep.c b/sys/powerpc/powerpc/machdep.c index 85db11ba1d5..1f594306683 100644 --- a/sys/powerpc/powerpc/machdep.c +++ b/sys/powerpc/powerpc/machdep.c @@ -155,7 +155,7 @@ SYSCTL_INT(_machdep, CPU_CACHELINE, cacheline_size, CTLFLAG_RD, &cacheline_size, 0, ""); uintptr_t powerpc_init(vm_offset_t, vm_offset_t, vm_offset_t, void *, - vm_offset_t); + uint32_t); long Maxmem = 0; long realmem = 0; @@ -234,7 +234,7 @@ void booke_cpu_init(void); uintptr_t powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp, - vm_offset_t mdp_cookie) + uint32_t mdp_cookie) { struct pcpu *pc; struct cpuref bsp; From 22fd1b5dc64492d62a5d194cb0a3c25e74265eb6 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 19 Dec 2017 17:07:50 +0000 Subject: [PATCH 020/115] Document the semantics of atomic_thread_fence operations. Add atomic_load_ and atomic_store_, and explain why they exist. Define the synchronizes-with relationship and its effects. Reorder and revise some of the existing text. For example, more precisely describe when ordinary accesses are atomic. Reviewed by: jhb, kib MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D13522 --- share/man/man9/atomic.9 | 176 ++++++++++++++++++++++++++-------------- 1 file changed, 116 insertions(+), 60 deletions(-) diff --git a/share/man/man9/atomic.9 b/share/man/man9/atomic.9 index e57ce5e7bb1..3eff2e11c6d 100644 --- a/share/man/man9/atomic.9 +++ b/share/man/man9/atomic.9 @@ -23,7 +23,7 @@ .\" .\" $FreeBSD$ .\" -.Dd March 23, 2017 +.Dd December 19, 2017 .Dt ATOMIC 9 .Os .Sh NAME @@ -36,7 +36,8 @@ .Nm atomic_readandclear , .Nm atomic_set , .Nm atomic_subtract , -.Nm atomic_store +.Nm atomic_store , +.Nm atomic_thread_fence .Nd atomic operations .Sh SYNOPSIS .In sys/types.h @@ -60,7 +61,7 @@ .Ft .Fn atomic_fetchadd_ "volatile *p" " v" .Ft -.Fn atomic_load_acq_ "volatile *p" +.Fn atomic_load_[acq_] "volatile *p" .Ft .Fn atomic_readandclear_ "volatile *p" .Ft void @@ -68,19 +69,33 @@ .Ft void .Fn atomic_subtract_[acq_|rel_] "volatile *p" " v" .Ft void -.Fn atomic_store_rel_ "volatile *p" " v" +.Fn atomic_store_[rel_] "volatile *p" " v" .Ft .Fn atomic_swap_ "volatile *p" " v" .Ft int .Fn atomic_testandclear_ "volatile *p" "u_int v" .Ft int .Fn atomic_testandset_ "volatile *p" "u_int v" +.Ft void +.Fn atomic_thread_fence_[acq|acq_rel|rel|seq_cst] "void" .Sh DESCRIPTION -All of these operations are performed atomically across multiple -threads and in the presence of interrupts, meaning that they are -performed in an indivisible manner from the perspective of concurrently +Atomic operations are commonly used to implement reference counts and as +building blocks for synchronization primitives, such as mutexes. +.Pp +All of these operations are performed +.Em atomically +across multiple threads and in the presence of interrupts, meaning that they +are performed in an indivisible manner from the perspective of concurrently running threads and interrupt handlers. .Pp +On all architectures supported by +.Fx , +ordinary loads and stores of integers in cache-coherent memory are +inherently atomic if the integer is naturally aligned and its size does not +exceed the processor's word size. +However, such loads and stores may be elided from the program by +the compiler, whereas atomic operations are always performed. +.Pp When atomic operations are performed on cache-coherent memory, all operations on the same location are totally ordered. .Pp @@ -93,29 +108,16 @@ interrupt handler will observe a .Em torn write , or partial modification of the location. .Pp -On all architectures supported by -.Fx , -ordinary loads and stores of naturally aligned integer types -are atomic, as executed by the processor. -.Pp -Atomic operations can be used to implement reference counts or as -building blocks for synchronization primitives such as mutexes. -.Pp -The semantics of -.Fx Ns 's -atomic operations are almost identical to those of the similarly named -C11 operations. -The one important difference is that the C11 standard does not -require ordinary loads and stores to ever be atomic. -This is is why the -.Fn atomic_load_explicit memory_order_relaxed -operation exists in the C11 standard, but is not provided by -.In machine/atomic.h . +Except as noted below, the semantics of these operations are almost +identical to the semantics of similarly named C11 atomic operations. .Ss Types -Each atomic operation operates on a specific +Most atomic operations act upon a specific .Fa type . -The type to use is indicated in the function name. -The available types that can be used are: +That type is indicated in the function name. +In contrast to C11 atomic operations, +.Fx Ns 's +atomic operations are performed on ordinary integer types. +The available types are: .Pp .Bl -tag -offset indent -width short -compact .It Li int @@ -147,8 +149,7 @@ unsigned 8-bit integer unsigned 16-bit integer .El .Pp -These must not be used in MI code because the instructions to implement them -efficiently might not be available. +These types must not be used in machine-independent code. .Ss Acquire and Release Operations By default, a thread's accesses to different memory locations might not be performed in @@ -167,52 +168,64 @@ Moreover, in some cases, such as the implementation of synchronization between threads, arbitrary reordering might result in the incorrect execution of the program. To constrain the reordering that both the compiler and processor might perform -on a thread's accesses, the thread should use atomic operations with +on a thread's accesses, a programmer can use atomic operations with .Em acquire and .Em release semantics. .Pp -Most of the atomic operations on memory have three variants. +Atomic operations on memory have up to three variants. The first variant performs the operation without imposing any ordering constraints on memory accesses to other locations. The second variant has acquire semantics, and the third variant has release semantics. -In effect, operations with acquire and release semantics establish one-way -barriers to reordering. .Pp -When an atomic operation has acquire semantics, the effects of the operation -must have completed before any subsequent load or store (by program order) is +When an atomic operation has acquire semantics, the operation must have +completed before any subsequent load or store (by program order) is performed. Conversely, acquire semantics do not require that prior loads or stores have completed before the atomic operation is performed. +An atomic operation can only have acquire semantics if it performs a load +from memory. To denote acquire semantics, the suffix .Dq Li _acq is inserted into the function name immediately prior to the .Dq Li _ Ns Aq Fa type suffix. -For example, to subtract two integers ensuring that subsequent loads and -stores happen after the subtraction is performed, use +For example, to subtract two integers ensuring that the subtraction is +completed before any subsequent loads and stores are performed, use .Fn atomic_subtract_acq_int . .Pp -When an atomic operation has release semantics, the effects of all prior -loads or stores (by program order) must have completed before the operation -is performed. -Conversely, release semantics do not require that the effects of the -atomic operation must have completed before any subsequent load or store is -performed. +When an atomic operation has release semantics, all prior loads or stores +(by program order) must have completed before the operation is performed. +Conversely, release semantics do not require that the atomic operation must +have completed before any subsequent load or store is performed. +An atomic operation can only have release semantics if it performs a store +to memory. To denote release semantics, the suffix .Dq Li _rel is inserted into the function name immediately prior to the .Dq Li _ Ns Aq Fa type suffix. For example, to add two long integers ensuring that all prior loads and -stores happen before the addition, use +stores are completed before the addition is performed, use .Fn atomic_add_rel_long . .Pp -The one-way barriers provided by acquire and release operations allow the -implementations of common synchronization primitives to express their -ordering requirements without also imposing unnecessary ordering. +When a release operation by one thread +.Em synchronizes with +an acquire operation by another thread, usually meaning that the acquire +operation reads the value written by the release operation, then the effects +of all prior stores by the releasing thread must become visible to +subsequent loads by the acquiring thread. +Moreover, the effects of all stores (by other threads) that were visible to +the releasing thread must also become visible to the acquiring thread. +These rules only apply to the synchronizing threads. +Other threads might observe these stores in a different order. +.Pp +In effect, atomic operations with acquire and release semantics establish +one-way barriers to reordering that enable the implementations of +synchronization primitives to express their ordering requirements without +also imposing unnecessary ordering. For example, for a critical section guarded by a mutex, an acquire operation when the mutex is locked and a release operation when the mutex is unlocked will prevent any loads or stores from moving outside of the critical @@ -220,6 +233,61 @@ section. However, they will not prevent the compiler or processor from moving loads or stores into the critical section, which does not violate the semantics of a mutex. +.Ss Thread Fence Operations +Alternatively, a programmer can use atomic thread fence operations to +constrain the reordering of accesses. +In contrast to other atomic operations, fences do not, themselves, access +memory. +.Pp +When a fence has acquire semantics, all prior loads (by program order) must +have completed before any subsequent load or store is performed. +Thus, an acquire fence is a two-way barrier for load operations. +To denote acquire semantics, the suffix +.Dq Li _acq +is appended to the function name, for example, +.Fn atomic_thread_fence_acq . +.Pp +When a fence has release semantics, all prior loads or stores (by program +order) must have completed before any subsequent store operation is +performed. +Thus, a release fence is a two-way barrier for store operations. +To denote release semantics, the suffix +.Dq Li _rel +is appended to the function name, for example, +.Fn atomic_thread_fence_rel . +.Pp +Although +.Fn atomic_thread_fence_acq_rel +implements both acquire and release semantics, it is not a full barrier. +For example, a store prior to the fence (in program order) may be completed +after a load subsequent to the fence. +In contrast, +.Fn atomic_thread_fence_seq_cst +implements a full barrier. +Neither loads nor stores may cross this barrier in either direction. +.Pp +In C11, a release fence by one thread synchronizes with an acquire fence by +another thread when an atomic load that is prior to the acquire fence (by +program order) reads the value written by an atomic store that is subsequent +to the release fence. +In constrast, in FreeBSD, because of the atomicity of ordinary, naturally +aligned loads and stores, fences can also be synchronized by ordinary loads +and stores. +This simplifies the implementation and use of some synchronization +primitives in +.Fx . +.Pp +Since neither a compiler nor a processor can foresee which (atomic) load +will read the value written by an (atomic) store, the ordering constraints +imposed by fences must be more restrictive than acquire loads and release +stores. +Essentially, this is why fences are two-way barriers. +.Pp +Although fences impose more restrictive ordering than acquire loads and +release stores, by separating access from ordering, they can sometimes +facilitate more efficient implementations of synchronization primitives. +For example, they can be used to avoid executing a memory barrier until a +memory access shows that some condition is satisfied. .Ss Multiple Processors In multiprocessor systems, the atomicity of the atomic operations on memory depends on support for cache coherence in the underlying architecture. @@ -326,12 +394,6 @@ and do not have any variants with memory barriers at this time. .Bd -literal -compact return (*p); .Ed -.El -.Pp -The -.Fn atomic_load -functions are only provided with acquire memory barriers. -.Bl -hang .It Fn atomic_readandclear p .Bd -literal -compact tmp = *p; @@ -363,12 +425,6 @@ and do not have any variants with memory barriers at this time. .Bd -literal -compact *p = v; .Ed -.El -.Pp -The -.Fn atomic_store -functions are only provided with release memory barriers. -.Bl -hang .It Fn atomic_swap p v .Bd -literal -compact tmp = *p; From 9abe2e7e981a343898027c29ea5f9446eb1a6278 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Tue, 19 Dec 2017 17:13:04 +0000 Subject: [PATCH 021/115] Avoid using bioq_* in gmirror. gmirror does not perform any sorting of I/O requests, so the bioq API doesn't provide any advantages over plain TAILQs. The API also does not provide operations needed by an upcoming change. No functional change intended. The diff shrinks the geom_mirror.ko text and the gmirror softc slightly. Tested by: pho (part of a larger patch) MFC after: 1 week Sponsored by: Dell EMC Isilon --- sys/geom/mirror/g_mirror.c | 92 +++++++++++++++++++++----------------- sys/geom/mirror/g_mirror.h | 15 +++---- 2 files changed, 58 insertions(+), 49 deletions(-) diff --git a/sys/geom/mirror/g_mirror.c b/sys/geom/mirror/g_mirror.c index a3df6f5a2a9..94c8abe8c38 100644 --- a/sys/geom/mirror/g_mirror.c +++ b/sys/geom/mirror/g_mirror.c @@ -307,7 +307,7 @@ g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp) u_int nreqs = 0; mtx_lock(&sc->sc_queue_mtx); - TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { + TAILQ_FOREACH(bp, &sc->sc_queue, bio_queue) { if (bp->bio_from == cp) nreqs++; } @@ -920,7 +920,7 @@ g_mirror_done(struct bio *bp) sc = bp->bio_from->geom->softc; bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR; mtx_lock(&sc->sc_queue_mtx); - bioq_insert_tail(&sc->sc_queue, bp); + TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue); mtx_unlock(&sc->sc_queue_mtx); wakeup(sc); } @@ -965,7 +965,7 @@ g_mirror_regular_request(struct bio *bp) pbp->bio_completed = pbp->bio_length; if (pbp->bio_cmd == BIO_WRITE || pbp->bio_cmd == BIO_DELETE) { - bioq_remove(&sc->sc_inflight, pbp); + TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue); /* Release delayed sync requests if possible. */ g_mirror_sync_release(sc); } @@ -1020,7 +1020,7 @@ g_mirror_regular_request(struct bio *bp) else { pbp->bio_error = 0; mtx_lock(&sc->sc_queue_mtx); - bioq_insert_tail(&sc->sc_queue, pbp); + TAILQ_INSERT_TAIL(&sc->sc_queue, pbp, bio_queue); mtx_unlock(&sc->sc_queue_mtx); G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); wakeup(sc); @@ -1040,7 +1040,7 @@ g_mirror_regular_request(struct bio *bp) pbp->bio_error = 0; pbp->bio_completed = pbp->bio_length; } - bioq_remove(&sc->sc_inflight, pbp); + TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue); /* Release delayed sync requests if possible. */ g_mirror_sync_release(sc); g_io_deliver(pbp, pbp->bio_error); @@ -1060,7 +1060,7 @@ g_mirror_sync_done(struct bio *bp) sc = bp->bio_from->geom->softc; bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC; mtx_lock(&sc->sc_queue_mtx); - bioq_insert_tail(&sc->sc_queue, bp); + TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue); mtx_unlock(&sc->sc_queue_mtx); wakeup(sc); } @@ -1117,30 +1117,33 @@ g_mirror_kernel_dump(struct bio *bp) static void g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp) { - struct bio_queue_head queue; + struct bio_queue queue; struct g_mirror_disk *disk; struct g_consumer *cp; struct bio *cbp; - bioq_init(&queue); + TAILQ_INIT(&queue); LIST_FOREACH(disk, &sc->sc_disks, d_next) { if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) continue; cbp = g_clone_bio(bp); if (cbp == NULL) { - while ((cbp = bioq_takefirst(&queue)) != NULL) + while ((cbp = TAILQ_FIRST(&queue)) != NULL) { + TAILQ_REMOVE(&queue, cbp, bio_queue); g_destroy_bio(cbp); + } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_io_deliver(bp, bp->bio_error); return; } - bioq_insert_tail(&queue, cbp); + TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); cbp->bio_done = g_mirror_flush_done; cbp->bio_caller1 = disk; cbp->bio_to = disk->d_consumer->provider; } - while ((cbp = bioq_takefirst(&queue)) != NULL) { + while ((cbp = TAILQ_FIRST(&queue)) != NULL) { + TAILQ_REMOVE(&queue, cbp, bio_queue); G_MIRROR_LOGREQ(3, cbp, "Sending request."); disk = cbp->bio_caller1; cbp->bio_caller1 = NULL; @@ -1194,7 +1197,7 @@ g_mirror_start(struct bio *bp) g_io_deliver(bp, bp->bio_to->error); return; } - bioq_insert_tail(&sc->sc_queue, bp); + TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue); mtx_unlock(&sc->sc_queue_mtx); G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); wakeup(sc); @@ -1246,7 +1249,7 @@ g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp) return (false); sstart = sbp->bio_offset; send = sbp->bio_offset + sbp->bio_length; - TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) { + TAILQ_FOREACH(bp, &sc->sc_inflight, bio_queue) { rstart = bp->bio_offset; rend = bp->bio_offset + bp->bio_length; if (rend > sstart && rstart < send) @@ -1263,7 +1266,7 @@ g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp) { G_MIRROR_LOGREQ(2, bp, "Delaying request."); - bioq_insert_head(&sc->sc_regular_delayed, bp); + TAILQ_INSERT_HEAD(&sc->sc_regular_delayed, bp, bio_queue); } /* @@ -1274,7 +1277,7 @@ g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp) { G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request."); - bioq_insert_tail(&sc->sc_sync_delayed, bp); + TAILQ_INSERT_TAIL(&sc->sc_sync_delayed, bp, bio_queue); } /* @@ -1286,13 +1289,13 @@ g_mirror_regular_release(struct g_mirror_softc *sc) { struct bio *bp, *bp2; - TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) { + TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed, bio_queue, bp2) { if (g_mirror_sync_collision(sc, bp)) continue; - bioq_remove(&sc->sc_regular_delayed, bp); + TAILQ_REMOVE(&sc->sc_regular_delayed, bp, bio_queue); G_MIRROR_LOGREQ(2, bp, "Releasing delayed request (%p).", bp); mtx_lock(&sc->sc_queue_mtx); - bioq_insert_head(&sc->sc_queue, bp); + TAILQ_INSERT_HEAD(&sc->sc_queue, bp, bio_queue); mtx_unlock(&sc->sc_queue_mtx); } } @@ -1306,10 +1309,10 @@ g_mirror_sync_release(struct g_mirror_softc *sc) { struct bio *bp, *bp2; - TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) { + TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed, bio_queue, bp2) { if (g_mirror_regular_collision(sc, bp)) continue; - bioq_remove(&sc->sc_sync_delayed, bp); + TAILQ_REMOVE(&sc->sc_sync_delayed, bp, bio_queue); G_MIRROR_LOGREQ(2, bp, "Releasing delayed synchronization request."); g_io_request(bp, bp->bio_from); @@ -1615,7 +1618,7 @@ g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp) static void g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp) { - struct bio_queue_head queue; + struct bio_queue queue; struct g_mirror_disk *disk; struct g_consumer *cp; struct bio *cbp; @@ -1639,20 +1642,22 @@ g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp) left = bp->bio_length; offset = bp->bio_offset; data = bp->bio_data; - bioq_init(&queue); + TAILQ_INIT(&queue); LIST_FOREACH(disk, &sc->sc_disks, d_next) { if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) continue; cbp = g_clone_bio(bp); if (cbp == NULL) { - while ((cbp = bioq_takefirst(&queue)) != NULL) + while ((cbp = TAILQ_FIRST(&queue)) != NULL) { + TAILQ_REMOVE(&queue, cbp, bio_queue); g_destroy_bio(cbp); + } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_io_deliver(bp, bp->bio_error); return; } - bioq_insert_tail(&queue, cbp); + TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); cbp->bio_done = g_mirror_done; cbp->bio_caller1 = disk; cbp->bio_to = disk->d_consumer->provider; @@ -1665,7 +1670,8 @@ g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp) offset += cbp->bio_length; data += cbp->bio_length; } - while ((cbp = bioq_takefirst(&queue)) != NULL) { + while ((cbp = TAILQ_FIRST(&queue)) != NULL) { + TAILQ_REMOVE(&queue, cbp, bio_queue); G_MIRROR_LOGREQ(3, cbp, "Sending request."); disk = cbp->bio_caller1; cbp->bio_caller1 = NULL; @@ -1704,9 +1710,9 @@ g_mirror_register_request(struct bio *bp) case BIO_WRITE: case BIO_DELETE: { + struct bio_queue queue; struct g_mirror_disk *disk; struct g_mirror_disk_sync *sync; - struct bio_queue_head queue; struct g_consumer *cp; struct bio *cbp; @@ -1736,7 +1742,7 @@ g_mirror_register_request(struct bio *bp) * Allocate all bios before sending any request, so we can * return ENOMEM in nice and clean way. */ - bioq_init(&queue); + TAILQ_INIT(&queue); LIST_FOREACH(disk, &sc->sc_disks, d_next) { sync = &disk->d_sync; switch (disk->d_state) { @@ -1754,14 +1760,16 @@ g_mirror_register_request(struct bio *bp) continue; cbp = g_clone_bio(bp); if (cbp == NULL) { - while ((cbp = bioq_takefirst(&queue)) != NULL) + while ((cbp = TAILQ_FIRST(&queue)) != NULL) { + TAILQ_REMOVE(&queue, cbp, bio_queue); g_destroy_bio(cbp); + } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_io_deliver(bp, bp->bio_error); return; } - bioq_insert_tail(&queue, cbp); + TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); cbp->bio_done = g_mirror_done; cp = disk->d_consumer; cbp->bio_caller1 = cp; @@ -1770,12 +1778,13 @@ g_mirror_register_request(struct bio *bp) ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, cp->acw, cp->ace)); } - if (bioq_first(&queue) == NULL) { + if (TAILQ_EMPTY(&queue)) { g_io_deliver(bp, EOPNOTSUPP); return; } - while ((cbp = bioq_takefirst(&queue)) != NULL) { + while ((cbp = TAILQ_FIRST(&queue)) != NULL) { G_MIRROR_LOGREQ(3, cbp, "Sending request."); + TAILQ_REMOVE(&queue, cbp, bio_queue); cp = cbp->bio_caller1; cbp->bio_caller1 = NULL; cp->index++; @@ -1786,7 +1795,7 @@ g_mirror_register_request(struct bio *bp) * Put request onto inflight queue, so we can check if new * synchronization requests don't collide with it. */ - bioq_insert_tail(&sc->sc_inflight, bp); + TAILQ_INSERT_TAIL(&sc->sc_inflight, bp, bio_queue); return; } default: @@ -1929,8 +1938,10 @@ g_mirror_worker(void *arg) */ /* Get first request from the queue. */ mtx_lock(&sc->sc_queue_mtx); - bp = bioq_takefirst(&sc->sc_queue); - if (bp == NULL) { + bp = TAILQ_FIRST(&sc->sc_queue); + if (bp != NULL) + TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue); + else { if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { mtx_unlock(&sc->sc_queue_mtx); @@ -1940,7 +1951,7 @@ g_mirror_worker(void *arg) kproc_exit(0); } mtx_lock(&sc->sc_queue_mtx); - if (bioq_first(&sc->sc_queue) != NULL) { + if (!TAILQ_EMPTY(&sc->sc_queue)) { mtx_unlock(&sc->sc_queue_mtx); continue; } @@ -2190,7 +2201,8 @@ g_mirror_destroy_provider(struct g_mirror_softc *sc) g_topology_lock(); g_error_provider(sc->sc_provider, ENXIO); mtx_lock(&sc->sc_queue_mtx); - while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) { + while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) { + TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue); /* * Abort any pending I/O that wasn't generated by us. * Synchronization requests and requests destined for individual @@ -3009,11 +3021,11 @@ g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md, sc->sc_writes = 0; sc->sc_refcnt = 1; sx_init(&sc->sc_lock, "gmirror:lock"); - bioq_init(&sc->sc_queue); + TAILQ_INIT(&sc->sc_queue); mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF); - bioq_init(&sc->sc_regular_delayed); - bioq_init(&sc->sc_inflight); - bioq_init(&sc->sc_sync_delayed); + TAILQ_INIT(&sc->sc_regular_delayed); + TAILQ_INIT(&sc->sc_inflight); + TAILQ_INIT(&sc->sc_sync_delayed); LIST_INIT(&sc->sc_disks); TAILQ_INIT(&sc->sc_events); mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF); diff --git a/sys/geom/mirror/g_mirror.h b/sys/geom/mirror/g_mirror.h index 84b31100de5..1db45bd274f 100644 --- a/sys/geom/mirror/g_mirror.h +++ b/sys/geom/mirror/g_mirror.h @@ -193,17 +193,14 @@ struct g_mirror_softc { uint32_t sc_id; /* Mirror unique ID. */ struct sx sc_lock; - struct bio_queue_head sc_queue; + struct bio_queue sc_queue; struct mtx sc_queue_mtx; struct proc *sc_worker; - struct bio_queue_head sc_regular_delayed; /* Delayed I/O requests due - collision with sync - requests. */ - struct bio_queue_head sc_inflight; /* In-flight regular write - requests. */ - struct bio_queue_head sc_sync_delayed; /* Delayed sync requests due - collision with regular - requests. */ + struct bio_queue sc_inflight; /* In-flight regular write requests. */ + struct bio_queue sc_regular_delayed; /* Delayed I/O requests due to + collision with sync requests. */ + struct bio_queue sc_sync_delayed; /* Delayed sync requests due to + collision with regular requests. */ LIST_HEAD(, g_mirror_disk) sc_disks; u_int sc_ndisks; /* Number of disks. */ From 96fc97c81fcb613a3098d5dbf944bdb6567b9e35 Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Tue, 19 Dec 2017 17:59:00 +0000 Subject: [PATCH 022/115] Update Matthew Macy contact info Email address has changed, uses consistent name (Matthew, not Matt) Reported by: Matthew Macy Differential Revision: https://reviews.freebsd.org/D13537 --- share/man/man4/em.4 | 2 +- sys/compat/linuxkpi/common/src/linux_page.c | 2 +- sys/compat/linuxkpi/common/src/linux_rcu.c | 2 +- sys/dev/e1000/em_txrx.c | 2 +- sys/dev/e1000/if_em.c | 2 +- sys/dev/e1000/if_em.h | 2 +- sys/dev/e1000/igb_txrx.c | 2 +- sys/net/ifdi_if.m | 2 +- sys/net/iflib.c | 2 +- sys/net/iflib.h | 2 +- sys/sys/gtaskqueue.h | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/share/man/man4/em.4 b/share/man/man4/em.4 index 2966b26ee1d..9a9249fdcaf 100644 --- a/share/man/man4/em.4 +++ b/share/man/man4/em.4 @@ -319,6 +319,6 @@ The driver was originally written by .An Intel Corporation Aq Mt freebsd@intel.com . It was merged with igb driver and converted to the iflib framework by -.An Matthew Macy Aq Mt mmacy@nextbsd.org +.An Matthew Macy Aq Mt mmacy@mattmacy.io and .An Sean Bruno Aq Mt sbruno@FreeBSD.org . diff --git a/sys/compat/linuxkpi/common/src/linux_page.c b/sys/compat/linuxkpi/common/src/linux_page.c index 2c474b86a82..b7d79da46ce 100644 --- a/sys/compat/linuxkpi/common/src/linux_page.c +++ b/sys/compat/linuxkpi/common/src/linux_page.c @@ -1,6 +1,6 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. - * Copyright (c) 2016 Matt Macy (mmacy@nextbsd.org) + * Copyright (c) 2016 Matthew Macy (mmacy@mattmacy.io) * Copyright (c) 2017 Mellanox Technologies, Ltd. * All rights reserved. * diff --git a/sys/compat/linuxkpi/common/src/linux_rcu.c b/sys/compat/linuxkpi/common/src/linux_rcu.c index 29944090af8..0ece0342aad 100644 --- a/sys/compat/linuxkpi/common/src/linux_rcu.c +++ b/sys/compat/linuxkpi/common/src/linux_rcu.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2016 Matt Macy (mmacy@nextbsd.org) + * Copyright (c) 2016 Matthew Macy (mmacy@mattmacy.io) * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/e1000/em_txrx.c b/sys/dev/e1000/em_txrx.c index 22e983b370a..f81a6b39f0b 100644 --- a/sys/dev/e1000/em_txrx.c +++ b/sys/dev/e1000/em_txrx.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2016-2017 Matt Macy + * Copyright (c) 2016-2017 Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index f17e4369106..f8e6ed00f8c 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2016 Matt Macy + * Copyright (c) 2016 Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/e1000/if_em.h b/sys/dev/e1000/if_em.h index 2435d5674d1..9f789930b53 100644 --- a/sys/dev/e1000/if_em.h +++ b/sys/dev/e1000/if_em.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2016 Matt Macy + * Copyright (c) 2016 Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/e1000/igb_txrx.c b/sys/dev/e1000/igb_txrx.c index 80010369d29..8a56c64b6c1 100644 --- a/sys/dev/e1000/igb_txrx.c +++ b/sys/dev/e1000/igb_txrx.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2016 Matt Macy + * Copyright (c) 2016 Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/net/ifdi_if.m b/sys/net/ifdi_if.m index 81c9ba1fe44..e28ec45444c 100644 --- a/sys/net/ifdi_if.m +++ b/sys/net/ifdi_if.m @@ -1,5 +1,5 @@ # -# Copyright (c) 2014, Matthew Macy (kmacy@freebsd.org) +# Copyright (c) 2014, Matthew Macy (mmacy@mattmacy.io) # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 021ff2ae71b..d476edddc42 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2017, Matthew Macy + * Copyright (c) 2014-2017, Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/net/iflib.h b/sys/net/iflib.h index 14181b90489..d1d707beb72 100644 --- a/sys/net/iflib.h +++ b/sys/net/iflib.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2017, Matthew Macy (mmacy@nextbsd.org) + * Copyright (c) 2014-2017, Matthew Macy (mmacy@mattmacy.io) * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/sys/gtaskqueue.h b/sys/sys/gtaskqueue.h index 61f66f3e899..41094603b41 100644 --- a/sys/sys/gtaskqueue.h +++ b/sys/sys/gtaskqueue.h @@ -2,7 +2,7 @@ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2014 Jeffrey Roberson - * Copyright (c) 2016 Matthew Macy + * Copyright (c) 2016 Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without From 78fcf2de930af71c827151dac2dbd72e976e7c7d Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Tue, 19 Dec 2017 18:12:18 +0000 Subject: [PATCH 023/115] Add byte swapping in bnxt_cfg_async_cr() request The firmware is always in little endian, use htole*() for all request fields larger than one byte. Submitted by: Bhargava Chenna Marreddy Sponsored by: Broadcom Limited --- sys/dev/bnxt/bnxt_hwrm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/dev/bnxt/bnxt_hwrm.c b/sys/dev/bnxt/bnxt_hwrm.c index e25d2cb0213..45530e20010 100644 --- a/sys/dev/bnxt/bnxt_hwrm.c +++ b/sys/dev/bnxt/bnxt_hwrm.c @@ -945,9 +945,9 @@ bnxt_cfg_async_cr(struct bnxt_softc *softc) bnxt_hwrm_cmd_hdr_init(softc, &req, HWRM_FUNC_CFG); - req.fid = 0xffff; + req.fid = htole16(0xffff); req.enables = htole32(HWRM_FUNC_CFG_INPUT_ENABLES_ASYNC_EVENT_CR); - req.async_event_cr = softc->def_cp_ring.ring.phys_id; + req.async_event_cr = htole16(softc->def_cp_ring.ring.phys_id); rc = hwrm_send_message(softc, &req, sizeof(req)); } @@ -957,7 +957,7 @@ bnxt_cfg_async_cr(struct bnxt_softc *softc) bnxt_hwrm_cmd_hdr_init(softc, &req, HWRM_FUNC_VF_CFG); req.enables = htole32(HWRM_FUNC_VF_CFG_INPUT_ENABLES_ASYNC_EVENT_CR); - req.async_event_cr = softc->def_cp_ring.ring.phys_id; + req.async_event_cr = htole16(softc->def_cp_ring.ring.phys_id); rc = hwrm_send_message(softc, &req, sizeof(req)); } From dd688800e1ce85d375190252a245d9b83695575e Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 19 Dec 2017 18:20:38 +0000 Subject: [PATCH 024/115] Add a custom VOP_PATHCONF method for fdescfs. The method handles NAME_MAX and LINK_MAX explicitly. For all other pathconf variables, the method passes the request down to the underlying file descriptor. This requires splitting a kern_fpathconf() syscallsubr routine out of sys_fpathconf(). Also, to avoid lock order reversals with vnode locks, the fdescfs vnode is unlocked around the call to kern_fpathconf(), but with the usecount of the vnode bumped. MFC after: 1 month Sponsored by: Chelsio Communications --- sys/fs/fdescfs/fdesc_vnops.c | 32 +++++++++++++++++++++++++++++++- sys/kern/kern_descrip.c | 15 +++++++++++---- sys/sys/syscallsubr.h | 1 + 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c index c6d0dfbfb06..59ea8d92bfb 100644 --- a/sys/fs/fdescfs/fdesc_vnops.c +++ b/sys/fs/fdescfs/fdesc_vnops.c @@ -55,6 +55,8 @@ #include #include #include +#include +#include #include #include @@ -70,6 +72,7 @@ struct mtx fdesc_hashmtx; static vop_getattr_t fdesc_getattr; static vop_lookup_t fdesc_lookup; static vop_open_t fdesc_open; +static vop_pathconf_t fdesc_pathconf; static vop_readdir_t fdesc_readdir; static vop_readlink_t fdesc_readlink; static vop_reclaim_t fdesc_reclaim; @@ -82,7 +85,7 @@ static struct vop_vector fdesc_vnodeops = { .vop_getattr = fdesc_getattr, .vop_lookup = fdesc_lookup, .vop_open = fdesc_open, - .vop_pathconf = vop_stdpathconf, + .vop_pathconf = fdesc_pathconf, .vop_readdir = fdesc_readdir, .vop_readlink = fdesc_readlink, .vop_reclaim = fdesc_reclaim, @@ -395,6 +398,33 @@ fdesc_open(struct vop_open_args *ap) return (ENODEV); } +static int +fdesc_pathconf(struct vop_pathconf_args *ap) +{ + struct vnode *vp = ap->a_vp; + int error; + + switch (ap->a_name) { + case _PC_NAME_MAX: + *ap->a_retval = NAME_MAX; + return (0); + case _PC_LINK_MAX: + if (VTOFDESC(vp)->fd_type == Froot) + *ap->a_retval = 2; + else + *ap->a_retval = 1; + return (0); + default: + vref(vp); + VOP_UNLOCK(vp, 0); + error = kern_fpathconf(curthread, VTOFDESC(vp)->fd_fd, + ap->a_name); + vn_lock(vp, LK_SHARED | LK_RETRY); + vunref(vp); + return (error); + } +} + static int fdesc_getattr(struct vop_getattr_args *ap) { diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 67253fa4050..c1100b7c8fd 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -1417,27 +1417,34 @@ struct fpathconf_args { /* ARGSUSED */ int sys_fpathconf(struct thread *td, struct fpathconf_args *uap) +{ + + return (kern_fpathconf(td, uap->fd, uap->name)); +} + +int +kern_fpathconf(struct thread *td, int fd, int name) { struct file *fp; struct vnode *vp; cap_rights_t rights; int error; - error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FPATHCONF), &fp); + error = fget(td, fd, cap_rights_init(&rights, CAP_FPATHCONF), &fp); if (error != 0) return (error); - if (uap->name == _PC_ASYNC_IO) { + if (name == _PC_ASYNC_IO) { td->td_retval[0] = _POSIX_ASYNCHRONOUS_IO; goto out; } vp = fp->f_vnode; if (vp != NULL) { vn_lock(vp, LK_SHARED | LK_RETRY); - error = VOP_PATHCONF(vp, uap->name, td->td_retval); + error = VOP_PATHCONF(vp, name, td->td_retval); VOP_UNLOCK(vp, 0); } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { - if (uap->name != _PC_PIPE_BUF) { + if (name != _PC_PIPE_BUF) { error = EINVAL; } else { td->td_retval[0] = PIPE_BUF; diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h index c97b27f1193..d95b2dd7d4b 100644 --- a/sys/sys/syscallsubr.h +++ b/sys/sys/syscallsubr.h @@ -111,6 +111,7 @@ int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg); int kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg); int kern_fhstat(struct thread *td, fhandle_t fh, struct stat *buf); int kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf); +int kern_fpathconf(struct thread *td, int fd, int name); int kern_fstat(struct thread *td, int fd, struct stat *sbp); int kern_fstatfs(struct thread *td, int fd, struct statfs *buf); int kern_fsync(struct thread *td, int fd, bool fullsync); From 697a86b6bf7ea354ae023e79ad12ee35d1faa916 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 19 Dec 2017 19:07:24 +0000 Subject: [PATCH 025/115] Adjust ZFS' link count handling for ino64. - Define a ZFS_LINK_MAX as the ZFS version of LINK_MAX which is set to UINT64_MAX to match the on-disk format. - Enable the currently #if 0'd code to check for link overflows and return EMLINK. - Don't clamp the link count reported in stat() to LINK_MAX as that is still the 16-bit limit, but report the full link counts. Also, avoid possibly overflowing the reported link count to 0 when adjusting the link count to account for ".snapshot". - Update the LINK_MAX reported by pathconf() to report ZFS_LINK_MAX rather than LINK_MAX (but clamped to LONG_MAX for 32-bit systems). Reviewed by: avg (earlier version) Sponsored by: Chelsio Communications --- .../opensolaris/uts/common/fs/zfs/sys/zfs_znode.h | 1 + .../contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c | 8 ++++---- .../opensolaris/uts/common/fs/zfs/zfs_vnops.c | 12 +++++------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h index 6465c50e280..fd5f011af48 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h @@ -201,6 +201,7 @@ typedef struct znode { boolean_t z_is_sa; /* are we native sa? */ } znode_t; +#define ZFS_LINK_MAX UINT64_MAX /* * Range locking rules diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c index 25d4a9bd65e..e0d3c889091 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c @@ -527,10 +527,10 @@ zfs_link_create(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx, ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__); ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); -#if 0 +#ifdef __FreeBSD__ if (zp_is_dir) { error = 0; - if (dzp->z_links >= LINK_MAX) + if (dzp->z_links >= ZFS_LINK_MAX) error = SET_ERROR(EMLINK); return (error); } @@ -540,8 +540,8 @@ zfs_link_create(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx, ASSERT(!(flag & (ZNEW | ZEXISTS))); return (SET_ERROR(ENOENT)); } -#if 0 - if (zp->z_links >= LINK_MAX) { +#ifdef __FreeBSD__ + if (zp->z_links >= ZFS_LINK_MAX - zp_is_dir) { return (SET_ERROR(EMLINK)); } #endif diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index cf4ec5c3408..cb6ba997988 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -2643,7 +2643,6 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, int error = 0; uint32_t blksize; u_longlong_t nblocks; - uint64_t links; uint64_t mtime[2], ctime[2], crtime[2], rdev; xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ xoptattr_t *xoap = NULL; @@ -2695,11 +2694,10 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, vn_fsid(vp, vap); #endif vap->va_nodeid = zp->z_id; - if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) - links = zp->z_links + 1; - else - links = zp->z_links; - vap->va_nlink = MIN(links, LINK_MAX); /* nlink_t limit! */ + vap->va_nlink = zp->z_links; + if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) && + zp->z_links < ZFS_LINK_MAX) + vap->va_nlink++; vap->va_size = zp->z_size; #ifdef illumos vap->va_rdev = vp->v_rdev; @@ -4404,7 +4402,7 @@ zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, switch (cmd) { case _PC_LINK_MAX: - *valp = INT_MAX; + *valp = MIN(LONG_MAX, ZFS_LINK_MAX); return (0); case _PC_FILESIZEBITS: From 746c92e04e6df9290938df9551b09c1e6f24466d Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 19 Dec 2017 19:09:06 +0000 Subject: [PATCH 026/115] Add a custom VOP_PATHCONF method for fuse. This method handles _PC_FILESIZEBITS, _PC_SYMLINK_MAX, and _PC_NO_TRUNC. For other values it defers to vop_stdpathconf(). MFC after: 1 month Sponsored by: Chelsio Communications --- sys/fs/fuse/fuse_vnops.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index dd78ac8d055..1c10ca93984 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -126,6 +126,7 @@ static vop_lookup_t fuse_vnop_lookup; static vop_mkdir_t fuse_vnop_mkdir; static vop_mknod_t fuse_vnop_mknod; static vop_open_t fuse_vnop_open; +static vop_pathconf_t fuse_vnop_pathconf; static vop_read_t fuse_vnop_read; static vop_readdir_t fuse_vnop_readdir; static vop_readlink_t fuse_vnop_readlink; @@ -158,7 +159,7 @@ struct vop_vector fuse_vnops = { .vop_mkdir = fuse_vnop_mkdir, .vop_mknod = fuse_vnop_mknod, .vop_open = fuse_vnop_open, - .vop_pathconf = vop_stdpathconf, + .vop_pathconf = fuse_vnop_pathconf, .vop_read = fuse_vnop_read, .vop_readdir = fuse_vnop_readdir, .vop_readlink = fuse_vnop_readlink, @@ -1175,6 +1176,25 @@ fuse_vnop_open(struct vop_open_args *ap) return error; } +static int +fuse_vnop_pathconf(struct vop_pathconf_args *ap) +{ + + switch (ap->a_name) { + case _PC_FILESIZEBITS: + *ap->a_retval = 64; + return (0); + case _PC_SYMLINK_MAX: + *ap->a_retval = MAXPATHLEN; + return (0); + case _PC_NO_TRUNC: + *ap->a_retval = 1; + return (0); + default: + return (vop_stdpathconf(ap)); + } +} + /* struct vnop_read_args { struct vnode *a_vp; From 853b3a8ae84735cb0b669837b113446bc168e2d6 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 19 Dec 2017 19:10:00 +0000 Subject: [PATCH 027/115] Support _PC_FILESIZEBITS in msdosfs' VOP_PATHCONF(). MFC after: 1 month Sponsored by: Chelsio Communications --- sys/fs/msdosfs/msdosfs_vnops.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c index 7926a809a71..5d6a69d817d 100644 --- a/sys/fs/msdosfs/msdosfs_vnops.c +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -1876,6 +1876,9 @@ msdosfs_pathconf(struct vop_pathconf_args *ap) struct msdosfsmount *pmp = VTODE(ap->a_vp)->de_pmp; switch (ap->a_name) { + case _PC_FILESIZEBITS: + *ap->a_retval = 32; + return (0); case _PC_LINK_MAX: *ap->a_retval = 1; return (0); From 4a627952604a89c352586a71a7e4d281ed3da3a3 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 19 Dec 2017 19:14:01 +0000 Subject: [PATCH 028/115] Handle _PC_FILESIZEBITS and _PC_NO_TRUNC for smbfs' VOP_PATHCONF(). MFC after: 1 month Sponsored by: Chelsio Communications --- sys/fs/smbfs/smbfs_vnops.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sys/fs/smbfs/smbfs_vnops.c b/sys/fs/smbfs/smbfs_vnops.c index 3c22fa9bf19..b3153baa490 100644 --- a/sys/fs/smbfs/smbfs_vnops.c +++ b/sys/fs/smbfs/smbfs_vnops.c @@ -901,6 +901,12 @@ smbfs_pathconf (ap) switch (ap->a_name) { case _PC_LINK_MAX: *retval = 0; + case _PC_FILESIZEBITS: + if (vcp->vc_sopt.sv_caps & (SMB_CAP_LARGE_READX | + SMB_CAP_LARGE_WRITEX)) + *retval = 64; + else + *retval = 32; break; case _PC_NAME_MAX: *retval = (vcp->vc_hflags2 & SMB_FLAGS2_KNOWS_LONG_NAMES) ? 255 : 12; @@ -908,6 +914,9 @@ smbfs_pathconf (ap) case _PC_PATH_MAX: *retval = 800; /* XXX: a correct one ? */ break; + case _PC_NO_TRUNC: + *retval = 1; + break; default: error = vop_stdpathconf(ap); } From a0a073b16d0ca366a263137fb9dda4950fd75720 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 19 Dec 2017 19:18:48 +0000 Subject: [PATCH 029/115] Update NFS to handle larger link counts post ino64. - Define a NFS_LINK_MAX as UINT32_MAX to match the wire protocol. - Use NFS_LINK_MAX instead of LINK_MAX as the fallback value reported for a PATHCONF RPC by the NFS server. - Use NFS_LINK_MAX instead of LINK_MAX as the default value reported by the NFS client pathconf() if not overridden by the NFS server. - When reading the link count out of an RPC reply, read the full 32 bits instead of the lower 16 bits. Reviewed by: rmacklem (earlier version) Sponsored by: Chelsio Communications --- sys/fs/nfs/nfs_commonport.c | 2 +- sys/fs/nfs/nfs_commonsubs.c | 4 ++-- sys/fs/nfs/nfsproto.h | 2 ++ sys/fs/nfsclient/nfs_clcomsubs.c | 2 +- sys/fs/nfsclient/nfs_clvnops.c | 4 ++-- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c index e36bfed8cc8..ece5626d83c 100644 --- a/sys/fs/nfs/nfs_commonport.c +++ b/sys/fs/nfs/nfs_commonport.c @@ -331,7 +331,7 @@ nfsvno_pathconf(struct vnode *vp, int flag, register_t *retf, */ switch (flag) { case _PC_LINK_MAX: - *retf = LINK_MAX; + *retf = NFS_LINK_MAX; break; case _PC_NAME_MAX: *retf = NAME_MAX; diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index 5e91a4942f5..dc441cecef0 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -883,7 +883,7 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, NFSV3_FSFHOMOGENEOUS | NFSV3_FSFCANSETTIME); } if (pc != NULL) { - pc->pc_linkmax = LINK_MAX; + pc->pc_linkmax = NFS_LINK_MAX; pc->pc_namemax = NAME_MAX; pc->pc_notrunc = 0; pc->pc_chownrestricted = 0; @@ -1320,7 +1320,7 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { - if (fxdr_unsigned(int, *tl) != LINK_MAX) + if (fxdr_unsigned(int, *tl) != NFS_LINK_MAX) *retcmpp = NFSERR_NOTSAME; } } else if (pc != NULL) { diff --git a/sys/fs/nfs/nfsproto.h b/sys/fs/nfs/nfsproto.h index 90731ef770d..6c96081df9f 100644 --- a/sys/fs/nfs/nfsproto.h +++ b/sys/fs/nfs/nfsproto.h @@ -785,6 +785,8 @@ struct nfs_fattr { #define fa3_mtime fa_un.fa_nfsv3.nfsv3fa_mtime #define fa3_ctime fa_un.fa_nfsv3.nfsv3fa_ctime +#define NFS_LINK_MAX UINT32_MAX + struct nfsv2_sattr { u_int32_t sa_mode; u_int32_t sa_uid; diff --git a/sys/fs/nfsclient/nfs_clcomsubs.c b/sys/fs/nfsclient/nfs_clcomsubs.c index 94a253f83f1..6194343378b 100644 --- a/sys/fs/nfsclient/nfs_clcomsubs.c +++ b/sys/fs/nfsclient/nfs_clcomsubs.c @@ -433,7 +433,7 @@ nfsm_loadattr(struct nfsrv_descript *nd, struct nfsvattr *nap) nap->na_mode = fxdr_unsigned(u_short, fp->fa_mode); nap->na_rdev = makedev(fxdr_unsigned(u_char, fp->fa3_rdev.specdata1), fxdr_unsigned(u_char, fp->fa3_rdev.specdata2)); - nap->na_nlink = fxdr_unsigned(u_short, fp->fa_nlink); + nap->na_nlink = fxdr_unsigned(uint32_t, fp->fa_nlink); nap->na_uid = fxdr_unsigned(uid_t, fp->fa_uid); nap->na_gid = fxdr_unsigned(gid_t, fp->fa_gid); nap->na_size = fxdr_hyper(&fp->fa3_size); diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index d33162cd6e4..fd9ecb0d210 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -3450,7 +3450,7 @@ nfs_pathconf(struct vop_pathconf_args *ap) * For NFSv2 (or NFSv3 when not one of the above 4 a_names), * just fake them. */ - pc.pc_linkmax = LINK_MAX; + pc.pc_linkmax = NFS_LINK_MAX; pc.pc_namemax = NFS_MAXNAMLEN; pc.pc_notrunc = 1; pc.pc_chownrestricted = 1; @@ -3460,7 +3460,7 @@ nfs_pathconf(struct vop_pathconf_args *ap) } switch (ap->a_name) { case _PC_LINK_MAX: - *ap->a_retval = pc.pc_linkmax; + *ap->a_retval = MIN(LONG_MAX, pc.pc_linkmax); break; case _PC_NAME_MAX: *ap->a_retval = pc.pc_namemax; From 2dd51e16ca5b8a28d5d917c66ebb6998cf2c344d Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Tue, 19 Dec 2017 19:44:06 +0000 Subject: [PATCH 030/115] embed_mfs: support embedding mfs into loader The script originally supported embedding an mfs into ELF files or any other type of file, because it searched for magic strings to mark the beginning and end of the embeddable section. It was later modified to read the section offset and length via readelf, which made it work for ELF only. Restore the ability to update arbitrary file types by using the readelf technique for ELF, and the magic string technique for all others (including PE/COFF files like loader.efi). Submitted by: Zakary Nafziger MFC after: 1 month Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D12746 --- sys/tools/embed_mfs.sh | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/sys/tools/embed_mfs.sh b/sys/tools/embed_mfs.sh index fc6bacb470d..c4fea153d56 100644 --- a/sys/tools/embed_mfs.sh +++ b/sys/tools/embed_mfs.sh @@ -27,10 +27,10 @@ # # $FreeBSD$ # -# Embed the MFS image into the kernel body (expects space reserved via -# MD_ROOT_SIZE) +# Embed an MFS image into the kernel body or the loader body (expects space +# reserved via MD_ROOT_SIZE (kernel) or MD_IMAGE_SIZE (loader)) # -# $1: kernel filename +# $1: kernel or loader filename # $2: MFS image filename # @@ -47,16 +47,39 @@ mfs_size=`stat -f '%z' $2 2> /dev/null` # If we can't determine MFS image size - bail. [ -z ${mfs_size} ] && echo "Can't determine MFS image size" && exit 1 -sec_info=`elfdump -c $1 2> /dev/null | grep -A 5 -E "sh_name: oldmfs$"` -# If we can't find the mfs section within the given kernel - bail. -[ -z "${sec_info}" ] && echo "Can't locate mfs section within $1" && exit 1 +err_no_mfs="Can't locate mfs section within " -sec_size=`echo "${sec_info}" | awk '/sh_size/ {print $2}' 2> /dev/null` -sec_start=`echo "${sec_info}" | awk '/sh_offset/ {print $2}' 2> /dev/null` +if [ `file -b $1 | grep -q '^ELF ..-bit .SB executable'` ]; then + + sec_info=`elfdump -c $1 2> /dev/null | grep -A 5 -E "sh_name: oldmfs$"` + # If we can't find the mfs section within the given kernel - bail. + [ -z "${sec_info}" ] && echo "${err_no_mfs} $1" && exit 1 + + sec_size=`echo "${sec_info}" | awk '/sh_size/ {print $2}' 2>/dev/null` + sec_start=`echo "${sec_info}" | \ + awk '/sh_offset/ {print $2}' 2>/dev/null` + +else + + #try to find start byte of MFS start flag otherwise - bail. + sec_start=`strings -at d $1 | grep "MFS Filesystem goes here"` || \ + { echo "${err_no_mfs} $1"; exit 1; } + sec_start=`echo ${sec_start} | awk '{print $1}'` + + #try to find start byte of MFS end flag otherwise - bail. + sec_end=`strings -at d $1 | \ + grep "MFS Filesystem had better STOP here"` || \ + { echo "${err_no_mfs} $1"; exit 1; } + sec_end=`echo ${sec_end} | awk '{print $1}'` + + #calculate MFS section size + sec_size=`expr ${sec_end} - ${sec_start}` + +fi # If the mfs section size is smaller than the mfs image - bail. [ ${sec_size} -lt ${mfs_size} ] && echo "MFS image too large" && exit 1 # Dump the mfs image into the mfs section dd if=$2 ibs=8192 of=$1 obs=${sec_start} oseek=1 conv=notrunc 2> /dev/null && \ - echo "MFS image embedded into kernel" && exit 0 + echo "MFS image embedded into $1" && exit 0 From 599afe53a8fa0bdef852e2464293801d7b61fe88 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 19 Dec 2017 19:51:36 +0000 Subject: [PATCH 031/115] Move NAME_MAX, LINK_MAX, and CHOWN_RESTRICTED out of vop_stdpathconf(). Having all filesystems fall through to default values isn't always correct and these values can vary for different filesystem implementations. Most of these changes just use the existing default values with a few exceptions: - Don't report CHOWN_RESTRICTED for ZFS since it doesn't do the exact permissions check this claims for chown(). - Use NANDFS_NAME_LEN for NAME_MAX for nandfs. - Don't report a LINK_MAX of 0 on smbfs. Now fail with EINVAL to indicate hard links aren't supported. Requested by: bde (though perhaps not this exact implementation) Reviewed by: kib (earlier version) MFC after: 1 month Sponsored by: Chelsio Communications --- .../opensolaris/uts/common/fs/zfs/zfs_vnops.c | 17 +++++++++++++---- sys/fs/devfs/devfs_vnops.c | 9 +++++++++ sys/fs/ext2fs/ext2_vnops.c | 6 ++++++ sys/fs/fuse/fuse_vnops.c | 6 ++++++ sys/fs/msdosfs/msdosfs_vnops.c | 3 +++ sys/fs/nandfs/nandfs_vnops.c | 9 +++++++++ sys/fs/smbfs/smbfs_vnops.c | 2 -- sys/fs/tmpfs/tmpfs_vnops.c | 12 ++++++++++++ sys/kern/vfs_default.c | 9 --------- sys/ufs/ufs/ufs_vnops.c | 3 +++ 10 files changed, 61 insertions(+), 15 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index cb6ba997988..def820a2f13 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -5403,11 +5403,20 @@ zfs_freebsd_pathconf(ap) int error; error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->td_ucred, NULL); - if (error == 0) + if (error == 0) { *ap->a_retval = val; - else if (error == EOPNOTSUPP) - error = vop_stdpathconf(ap); - return (error); + return (error); + } + if (error != EOPNOTSUPP) + return (error); + + switch (ap->a_name) { + case _PC_NAME_MAX: + *ap->a_retval = NAME_MAX; + return (0); + default: + return (vop_stdpathconf(ap)); + } } static int diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index 6c4f90f387c..f291aa59b8f 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c @@ -1181,6 +1181,12 @@ devfs_pathconf(struct vop_pathconf_args *ap) { switch (ap->a_name) { + case _PC_NAME_MAX: + *ap->a_retval = NAME_MAX; + return (0); + case _PC_LINK_MAX: + *ap->a_retval = LINK_MAX; + return (0); case _PC_MAX_CANON: if (ap->a_vp->v_vflag & VV_ISTTY) { *ap->a_retval = MAX_CANON; @@ -1210,6 +1216,9 @@ devfs_pathconf(struct vop_pathconf_args *ap) *ap->a_retval = 0; #endif return (0); + case _PC_CHOWN_RESTRICTED: + *ap->a_retval = 1; + return (0); default: return (vop_stdpathconf(ap)); } diff --git a/sys/fs/ext2fs/ext2_vnops.c b/sys/fs/ext2fs/ext2_vnops.c index 2e15c35ffc9..83f0281a083 100644 --- a/sys/fs/ext2fs/ext2_vnops.c +++ b/sys/fs/ext2fs/ext2_vnops.c @@ -1633,6 +1633,12 @@ ext2_pathconf(struct vop_pathconf_args *ap) else *ap->a_retval = ext2_max_nlink(VTOI(ap->a_vp)); break; + case _PC_NAME_MAX: + *ap->a_retval = NAME_MAX; + break; + case _PC_CHOWN_RESTRICTED: + *ap->a_retval = 1; + break; case _PC_NO_TRUNC: *ap->a_retval = 1; break; diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index 1c10ca93984..b86e83ec32d 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -1184,6 +1184,12 @@ fuse_vnop_pathconf(struct vop_pathconf_args *ap) case _PC_FILESIZEBITS: *ap->a_retval = 64; return (0); + case _PC_NAME_MAX: + *ap->a_retval = NAME_MAX; + return (0); + case _PC_LINK_MAX: + *ap->a_retval = LINK_MAX; + return (0); case _PC_SYMLINK_MAX: *ap->a_retval = MAXPATHLEN; return (0); diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c index 5d6a69d817d..33db2354dd8 100644 --- a/sys/fs/msdosfs/msdosfs_vnops.c +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -1885,6 +1885,9 @@ msdosfs_pathconf(struct vop_pathconf_args *ap) case _PC_NAME_MAX: *ap->a_retval = pmp->pm_flags & MSDOSFSMNT_LONGNAME ? WIN_MAXLEN : 12; return (0); + case _PC_CHOWN_RESTRICTED: + *ap->a_retval = 1; + return (0); case _PC_NO_TRUNC: *ap->a_retval = 0; return (0); diff --git a/sys/fs/nandfs/nandfs_vnops.c b/sys/fs/nandfs/nandfs_vnops.c index 03b36ab2af6..c0ff86e48cf 100644 --- a/sys/fs/nandfs/nandfs_vnops.c +++ b/sys/fs/nandfs/nandfs_vnops.c @@ -2238,6 +2238,15 @@ nandfs_pathconf(struct vop_pathconf_args *ap) error = 0; switch (ap->a_name) { + case _PC_LINK_MAX: + *ap->a_retval = LINK_MAX; + break; + case _PC_NAME_MAX: + *ap->a_retval = NANDFS_NAME_LEN; + break; + case _PC_CHOWN_RESTRICTED: + *ap->a_retval = 1; + break; case _PC_NO_TRUNC: *ap->a_retval = 1; break; diff --git a/sys/fs/smbfs/smbfs_vnops.c b/sys/fs/smbfs/smbfs_vnops.c index b3153baa490..e6557888820 100644 --- a/sys/fs/smbfs/smbfs_vnops.c +++ b/sys/fs/smbfs/smbfs_vnops.c @@ -899,8 +899,6 @@ smbfs_pathconf (ap) int error = 0; switch (ap->a_name) { - case _PC_LINK_MAX: - *retval = 0; case _PC_FILESIZEBITS: if (vcp->vc_sopt.sv_caps & (SMB_CAP_LARGE_READX | SMB_CAP_LARGE_WRITEX)) diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c index cef4685ac4a..1343ae8742f 100644 --- a/sys/fs/tmpfs/tmpfs_vnops.c +++ b/sys/fs/tmpfs/tmpfs_vnops.c @@ -1348,6 +1348,18 @@ tmpfs_pathconf(struct vop_pathconf_args *v) error = 0; switch (name) { + case _PC_LINK_MAX: + *retval = LINK_MAX; + break; + + case _PC_NAME_MAX: + *retval = NAME_MAX; + break; + + case _PC_CHOWN_RESTRICTED: + *retval = 1; + break; + case _PC_NO_TRUNC: *retval = 1; break; diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index ec37c38509e..7f263d36426 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -479,21 +479,12 @@ vop_stdpathconf(ap) case _PC_ASYNC_IO: *ap->a_retval = _POSIX_ASYNCHRONOUS_IO; return (0); - case _PC_NAME_MAX: - *ap->a_retval = NAME_MAX; - return (0); case _PC_PATH_MAX: *ap->a_retval = PATH_MAX; return (0); - case _PC_LINK_MAX: - *ap->a_retval = LINK_MAX; - return (0); case _PC_PIPE_BUF: *ap->a_retval = PIPE_BUF; return (0); - case _PC_CHOWN_RESTRICTED: - *ap->a_retval = 1; - return (0); default: return (EINVAL); } diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 65e6e8a1a39..f7861fc4aa1 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -2450,6 +2450,9 @@ ufs_pathconf(ap) case _PC_NAME_MAX: *ap->a_retval = UFS_MAXNAMLEN; break; + case _PC_CHOWN_RESTRICTED: + *ap->a_retval = 1; + break; case _PC_NO_TRUNC: *ap->a_retval = 1; break; From 418e621276d6562385374ff16c1a842e49ee8882 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 19 Dec 2017 19:53:34 +0000 Subject: [PATCH 032/115] Handle _PC_FILESIZEBITS and _PC_SYMLINK_MAX for devfs' VOP_PATHCONF(). MFC after: 1 month Sponsored by: Chelsio Communications --- sys/fs/devfs/devfs_vnops.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index f291aa59b8f..d2f1fab85f5 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c @@ -1181,12 +1181,18 @@ devfs_pathconf(struct vop_pathconf_args *ap) { switch (ap->a_name) { + case _PC_FILESIZEBITS: + *ap->a_retval = 64; + return (0); case _PC_NAME_MAX: *ap->a_retval = NAME_MAX; return (0); case _PC_LINK_MAX: *ap->a_retval = LINK_MAX; return (0); + case _PC_SYMLINK_MAX: + *ap->a_retval = MAXPATHLEN; + return (0); case _PC_MAX_CANON: if (ap->a_vp->v_vflag & VV_ISTTY) { *ap->a_retval = MAX_CANON; From a74da9fb83e2b0719fac71bcaf766337321024ab Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 19 Dec 2017 19:57:55 +0000 Subject: [PATCH 033/115] Use FUSE_LINK_MAX for LINK_MAX in fuse' VOP_PATHCONF(). Should have included this in r326993. MFC after: 1 month Sponsored by: Chelsio Communications --- sys/fs/fuse/fuse_vnops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index b86e83ec32d..a0e4056584a 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -1188,7 +1188,7 @@ fuse_vnop_pathconf(struct vop_pathconf_args *ap) *ap->a_retval = NAME_MAX; return (0); case _PC_LINK_MAX: - *ap->a_retval = LINK_MAX; + *ap->a_retval = FUSE_LINK_MAX; return (0); case _PC_SYMLINK_MAX: *ap->a_retval = MAXPATHLEN; From f6e25ec77ced9f683e9e89f66ad7a01e781644bc Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 19 Dec 2017 20:07:57 +0000 Subject: [PATCH 034/115] Report INT_MAX for LINK_MAX for devfs' VOP_PATHCONF(). devfs uses int's for link counts internally and already reports the the full link count via stat() post ino64. Sponsored by: Chelsio Communications --- sys/fs/devfs/devfs_vnops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index d2f1fab85f5..514a5914ab7 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c @@ -1188,7 +1188,7 @@ devfs_pathconf(struct vop_pathconf_args *ap) *ap->a_retval = NAME_MAX; return (0); case _PC_LINK_MAX: - *ap->a_retval = LINK_MAX; + *ap->a_retval = INT_MAX; return (0); case _PC_SYMLINK_MAX: *ap->a_retval = MAXPATHLEN; From c24008cc39340e20de07f6ad1030f836d1c1bdc7 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 19 Dec 2017 20:17:07 +0000 Subject: [PATCH 035/115] Honor NANDFS_LINK_MAX for post-ino64. This uses NANDFS_LINK_MAX instead of LINK_MAX for link overflow checks and the value reported by pathconf() / fpathconf(). Sponsored by: Chelsio Communications --- sys/fs/nandfs/nandfs_vnops.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sys/fs/nandfs/nandfs_vnops.c b/sys/fs/nandfs/nandfs_vnops.c index c0ff86e48cf..a8223db19dd 100644 --- a/sys/fs/nandfs/nandfs_vnops.c +++ b/sys/fs/nandfs/nandfs_vnops.c @@ -1354,7 +1354,7 @@ nandfs_link(struct vop_link_args *ap) struct nandfs_inode *inode = &node->nn_inode; int error; - if (inode->i_links_count >= LINK_MAX) + if (inode->i_links_count >= NANDFS_LINK_MAX) return (EMLINK); if (inode->i_flags & (IMMUTABLE | APPEND)) @@ -1576,7 +1576,7 @@ abortit: fdnode = VTON(fdvp); fnode = VTON(fvp); - if (fnode->nn_inode.i_links_count >= LINK_MAX) { + if (fnode->nn_inode.i_links_count >= NANDFS_LINK_MAX) { VOP_UNLOCK(fvp, 0); error = EMLINK; goto abortit; @@ -1839,7 +1839,7 @@ nandfs_mkdir(struct vop_mkdir_args *ap) if (nandfs_fs_full(dir_node->nn_nandfsdev)) return (ENOSPC); - if (dir_inode->i_links_count >= LINK_MAX) + if (dir_inode->i_links_count >= NANDFS_LINK_MAX) return (EMLINK); error = nandfs_node_create(nmp, &node, mode); @@ -2239,7 +2239,7 @@ nandfs_pathconf(struct vop_pathconf_args *ap) error = 0; switch (ap->a_name) { case _PC_LINK_MAX: - *ap->a_retval = LINK_MAX; + *ap->a_retval = NANDFS_LINK_MAX; break; case _PC_NAME_MAX: *ap->a_retval = NANDFS_NAME_LEN; From 35b1a3abd3aad772f1d5384757c45f6294313845 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 19 Dec 2017 20:19:07 +0000 Subject: [PATCH 036/115] Update tmpfs link count handling for ino64. Add a new TMPFS_LINK_MAX to use in place of LINK_MAX for link overflow checks and pathconf() reporting. Rather than storing a full 64-bit link count, just use a plain int and use INT_MAX as TMPFS_LINK_MAX. Discussed with: bde Reviewed by: kib (part of a larger patch) Sponsored by: Chelsio Communications --- sys/fs/tmpfs/tmpfs.h | 4 +++- sys/fs/tmpfs/tmpfs_subr.c | 4 ++-- sys/fs/tmpfs/tmpfs_vnops.c | 7 ++++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/sys/fs/tmpfs/tmpfs.h b/sys/fs/tmpfs/tmpfs.h index 14afb62ec80..1df8842d973 100644 --- a/sys/fs/tmpfs/tmpfs.h +++ b/sys/fs/tmpfs/tmpfs.h @@ -188,8 +188,8 @@ struct tmpfs_node { uid_t tn_uid; /* (v) */ gid_t tn_gid; /* (v) */ mode_t tn_mode; /* (v) */ + int tn_links; /* (v) */ u_long tn_flags; /* (v) */ - nlink_t tn_links; /* (v) */ struct timespec tn_atime; /* (vi) */ struct timespec tn_mtime; /* (vi) */ struct timespec tn_ctime; /* (vi) */ @@ -297,6 +297,8 @@ LIST_HEAD(tmpfs_node_list, tmpfs_node); #define tn_reg tn_spec.tn_reg #define tn_fifo tn_spec.tn_fifo +#define TMPFS_LINK_MAX INT_MAX + #define TMPFS_NODE_LOCK(node) mtx_lock(&(node)->tn_interlock) #define TMPFS_NODE_UNLOCK(node) mtx_unlock(&(node)->tn_interlock) #define TMPFS_NODE_MTX(node) (&(node)->tn_interlock) diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index abd1eddf4fb..9ebcb7ae84a 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -739,8 +739,8 @@ tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, if (vap->va_type == VDIR) { /* Ensure that we do not overflow the maximum number of links * imposed by the system. */ - MPASS(dnode->tn_links <= LINK_MAX); - if (dnode->tn_links == LINK_MAX) { + MPASS(dnode->tn_links <= TMPFS_LINK_MAX); + if (dnode->tn_links == TMPFS_LINK_MAX) { return (EMLINK); } diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c index 1343ae8742f..15dc86610ea 100644 --- a/sys/fs/tmpfs/tmpfs_vnops.c +++ b/sys/fs/tmpfs/tmpfs_vnops.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -618,8 +619,8 @@ tmpfs_link(struct vop_link_args *v) /* Ensure that we do not overflow the maximum number of links imposed * by the system. */ - MPASS(node->tn_links <= LINK_MAX); - if (node->tn_links == LINK_MAX) { + MPASS(node->tn_links <= TMPFS_LINK_MAX); + if (node->tn_links == TMPFS_LINK_MAX) { error = EMLINK; goto out; } @@ -1349,7 +1350,7 @@ tmpfs_pathconf(struct vop_pathconf_args *v) switch (name) { case _PC_LINK_MAX: - *retval = LINK_MAX; + *retval = TMPFS_LINK_MAX; break; case _PC_NAME_MAX: From 6a0dc418a62f8f840ef0a7a2d08bcc4ea6a895e1 Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Tue, 19 Dec 2017 20:32:45 +0000 Subject: [PATCH 037/115] Don't populate NVRAM sysctls for VFs Only the PF allows NVRAM interaction on bnxt devices. Submitted by: Bhargava Chenna Marreddy Sponsored by: Broadcom Limited --- sys/dev/bnxt/bnxt_sysctl.c | 20 ++++++++++--------- sys/dev/bnxt/if_bnxt.c | 39 ++++++++++++++++++++++---------------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/sys/dev/bnxt/bnxt_sysctl.c b/sys/dev/bnxt/bnxt_sysctl.c index 39d184e68aa..e006260d752 100644 --- a/sys/dev/bnxt/bnxt_sysctl.c +++ b/sys/dev/bnxt/bnxt_sysctl.c @@ -74,14 +74,16 @@ bnxt_init_sysctl_ctx(struct bnxt_softc *softc) return ENOMEM; } - sysctl_ctx_init(&softc->nvm_info->nvm_ctx); - ctx = device_get_sysctl_ctx(softc->dev); - softc->nvm_info->nvm_oid = SYSCTL_ADD_NODE(ctx, - SYSCTL_CHILDREN(device_get_sysctl_tree(softc->dev)), OID_AUTO, - "nvram", CTLFLAG_RD, 0, "nvram information"); - if (!softc->nvm_info->nvm_oid) { - sysctl_ctx_free(&softc->nvm_info->nvm_ctx); - return ENOMEM; + if (BNXT_PF(softc)) { + sysctl_ctx_init(&softc->nvm_info->nvm_ctx); + ctx = device_get_sysctl_ctx(softc->dev); + softc->nvm_info->nvm_oid = SYSCTL_ADD_NODE(ctx, + SYSCTL_CHILDREN(device_get_sysctl_tree(softc->dev)), OID_AUTO, + "nvram", CTLFLAG_RD, 0, "nvram information"); + if (!softc->nvm_info->nvm_oid) { + sysctl_ctx_free(&softc->nvm_info->nvm_ctx); + return ENOMEM; + } } sysctl_ctx_init(&softc->hw_lro_ctx); @@ -127,7 +129,7 @@ bnxt_free_sysctl_ctx(struct bnxt_softc *softc) else softc->ver_info->ver_oid = NULL; } - if (softc->nvm_info->nvm_oid != NULL) { + if (BNXT_PF(softc) && softc->nvm_info->nvm_oid != NULL) { orc = sysctl_ctx_free(&softc->nvm_info->nvm_ctx); if (orc) rc = orc; diff --git a/sys/dev/bnxt/if_bnxt.c b/sys/dev/bnxt/if_bnxt.c index c51b6cc813d..dee60be8cff 100644 --- a/sys/dev/bnxt/if_bnxt.c +++ b/sys/dev/bnxt/if_bnxt.c @@ -715,18 +715,21 @@ bnxt_attach_pre(if_ctx_t ctx) } /* Get NVRAM info */ - softc->nvm_info = malloc(sizeof(struct bnxt_nvram_info), - M_DEVBUF, M_NOWAIT | M_ZERO); - if (softc->nvm_info == NULL) { - rc = ENOMEM; - device_printf(softc->dev, - "Unable to allocate space for NVRAM info\n"); - goto nvm_alloc_fail; + if (BNXT_PF(softc)) { + softc->nvm_info = malloc(sizeof(struct bnxt_nvram_info), + M_DEVBUF, M_NOWAIT | M_ZERO); + if (softc->nvm_info == NULL) { + rc = ENOMEM; + device_printf(softc->dev, + "Unable to allocate space for NVRAM info\n"); + goto nvm_alloc_fail; + } + + rc = bnxt_hwrm_nvm_get_dev_info(softc, &softc->nvm_info->mfg_id, + &softc->nvm_info->device_id, &softc->nvm_info->sector_size, + &softc->nvm_info->size, &softc->nvm_info->reserved_size, + &softc->nvm_info->available_size); } - rc = bnxt_hwrm_nvm_get_dev_info(softc, &softc->nvm_info->mfg_id, - &softc->nvm_info->device_id, &softc->nvm_info->sector_size, - &softc->nvm_info->size, &softc->nvm_info->reserved_size, - &softc->nvm_info->available_size); /* Register the driver with the FW */ rc = bnxt_hwrm_func_drv_rgtr(softc); @@ -859,9 +862,11 @@ bnxt_attach_pre(if_ctx_t ctx) rc = bnxt_init_sysctl_ctx(softc); if (rc) goto init_sysctl_failed; - rc = bnxt_create_nvram_sysctls(softc->nvm_info); - if (rc) - goto failed; + if (BNXT_PF(softc)) { + rc = bnxt_create_nvram_sysctls(softc->nvm_info); + if (rc) + goto failed; + } arc4rand(softc->vnic_info.rss_hash_key, HW_HASH_KEY_SIZE, 0); softc->vnic_info.rss_hash_type = @@ -894,7 +899,8 @@ failed: init_sysctl_failed: bnxt_hwrm_func_drv_unrgtr(softc, false); drv_rgtr_fail: - free(softc->nvm_info, M_DEVBUF); + if (BNXT_PF(softc)) + free(softc->nvm_info, M_DEVBUF); nvm_alloc_fail: ver_fail: free(softc->ver_info, M_DEVBUF); @@ -963,7 +969,8 @@ bnxt_detach(if_ctx_t ctx) for (i = 0; i < softc->nrxqsets; i++) free(softc->rx_rings[i].tpa_start, M_DEVBUF); free(softc->ver_info, M_DEVBUF); - free(softc->nvm_info, M_DEVBUF); + if (BNXT_PF(softc)) + free(softc->nvm_info, M_DEVBUF); bnxt_hwrm_func_drv_unrgtr(softc, false); bnxt_free_hwrm_dma_mem(softc); From 980da9f2f0f04b15d6d4cb67494a42647f77f2e1 Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Tue, 19 Dec 2017 21:07:30 +0000 Subject: [PATCH 038/115] Support short HWRM commands New Stratus bnxt devices require support for short HWRM commands for VFs to function. Enable their use, but only use them if it's both supported and required... prefer the long HWRM commands when possible. Submitted by: Bhargava Chenna Marreddy Sponsored by: Broadcom Limited Differential Revision: https://reviews.freebsd.org/D13269?id=36180 --- sys/dev/bnxt/bnxt.h | 2 ++ sys/dev/bnxt/bnxt_hwrm.c | 33 ++++++++++++++++++++++++++++++++- sys/dev/bnxt/bnxt_hwrm.h | 1 + sys/dev/bnxt/if_bnxt.c | 26 ++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 1 deletion(-) diff --git a/sys/dev/bnxt/bnxt.h b/sys/dev/bnxt/bnxt.h index 543cfdbc109..bd0691487cd 100644 --- a/sys/dev/bnxt/bnxt.h +++ b/sys/dev/bnxt/bnxt.h @@ -561,6 +561,7 @@ struct bnxt_softc { #define BNXT_FLAG_VF 0x0001 #define BNXT_FLAG_NPAR 0x0002 #define BNXT_FLAG_WOL_CAP 0x0004 +#define BNXT_FLAG_SHORT_CMD 0x0008 uint32_t flags; uint32_t total_msix; @@ -572,6 +573,7 @@ struct bnxt_softc { uint16_t hwrm_cmd_seq; uint32_t hwrm_cmd_timeo; /* milliseconds */ struct iflib_dma_info hwrm_cmd_resp; + struct iflib_dma_info hwrm_short_cmd_req_addr; /* Interrupt info for HWRM */ struct if_irq irq; struct mtx hwrm_lock; diff --git a/sys/dev/bnxt/bnxt_hwrm.c b/sys/dev/bnxt/bnxt_hwrm.c index 45530e20010..0ccfb02cc5e 100644 --- a/sys/dev/bnxt/bnxt_hwrm.c +++ b/sys/dev/bnxt/bnxt_hwrm.c @@ -122,12 +122,37 @@ _hwrm_send_message(struct bnxt_softc *softc, void *msg, uint32_t msg_len) uint16_t cp_ring_id; uint8_t *valid; uint16_t err; + uint16_t max_req_len = HWRM_MAX_REQ_LEN; + struct hwrm_short_input short_input = {0}; /* TODO: DMASYNC in here. */ req->seq_id = htole16(softc->hwrm_cmd_seq++); memset(resp, 0, PAGE_SIZE); cp_ring_id = le16toh(req->cmpl_ring); + if (softc->flags & BNXT_FLAG_SHORT_CMD) { + void *short_cmd_req = softc->hwrm_short_cmd_req_addr.idi_vaddr; + + memcpy(short_cmd_req, req, msg_len); + memset((uint8_t *) short_cmd_req + msg_len, 0, softc->hwrm_max_req_len- + msg_len); + + short_input.req_type = req->req_type; + short_input.signature = + htole16(HWRM_SHORT_INPUT_SIGNATURE_SHORT_CMD); + short_input.size = htole16(msg_len); + short_input.req_addr = + htole64(softc->hwrm_short_cmd_req_addr.idi_paddr); + + data = (uint32_t *)&short_input; + msg_len = sizeof(short_input); + + /* Sync memory write before updating doorbell */ + wmb(); + + max_req_len = BNXT_HWRM_SHORT_REQ_LEN; + } + /* Write request msg to hwrm channel */ for (i = 0; i < msg_len; i += 4) { bus_space_write_4(softc->hwrm_bar.tag, @@ -137,7 +162,7 @@ _hwrm_send_message(struct bnxt_softc *softc, void *msg, uint32_t msg_len) } /* Clear to the end of the request buffer */ - for (i = msg_len; i < HWRM_MAX_REQ_LEN; i += 4) + for (i = msg_len; i < max_req_len; i += 4) bus_space_write_4(softc->hwrm_bar.tag, softc->hwrm_bar.handle, i, 0); @@ -248,6 +273,7 @@ bnxt_hwrm_ver_get(struct bnxt_softc *softc) int rc; const char nastr[] = ""; const char naver[] = ""; + uint32_t dev_caps_cfg; softc->hwrm_max_req_len = HWRM_MAX_REQ_LEN; softc->hwrm_cmd_timeo = 1000; @@ -323,6 +349,11 @@ bnxt_hwrm_ver_get(struct bnxt_softc *softc) if (resp->def_req_timeout) softc->hwrm_cmd_timeo = le16toh(resp->def_req_timeout); + dev_caps_cfg = le32toh(resp->dev_caps_cfg); + if ((dev_caps_cfg & HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_SHORT_CMD_SUPPORTED) && + (dev_caps_cfg & HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_SHORT_CMD_REQUIRED)) + softc->flags |= BNXT_FLAG_SHORT_CMD; + fail: BNXT_HWRM_UNLOCK(softc); return rc; diff --git a/sys/dev/bnxt/bnxt_hwrm.h b/sys/dev/bnxt/bnxt_hwrm.h index 28f3c08f608..610ac7ce2e1 100644 --- a/sys/dev/bnxt/bnxt_hwrm.h +++ b/sys/dev/bnxt/bnxt_hwrm.h @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #define BNXT_PAUSE_RX (HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_RX) #define BNXT_AUTO_PAUSE_AUTONEG_PAUSE \ (HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_PAUSE_AUTONEG_PAUSE) +#define BNXT_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input) /* HWRM Function Prototypes */ int bnxt_alloc_hwrm_dma_mem(struct bnxt_softc *softc); diff --git a/sys/dev/bnxt/if_bnxt.c b/sys/dev/bnxt/if_bnxt.c index dee60be8cff..6d2e5330fcc 100644 --- a/sys/dev/bnxt/if_bnxt.c +++ b/sys/dev/bnxt/if_bnxt.c @@ -643,6 +643,23 @@ cp_alloc_fail: return rc; } +static void bnxt_free_hwrm_short_cmd_req(struct bnxt_softc *softc) +{ + if (softc->hwrm_short_cmd_req_addr.idi_vaddr) + iflib_dma_free(&softc->hwrm_short_cmd_req_addr); + softc->hwrm_short_cmd_req_addr.idi_vaddr = NULL; +} + +static int bnxt_alloc_hwrm_short_cmd_req(struct bnxt_softc *softc) +{ + int rc; + + rc = iflib_dma_alloc(softc->ctx, softc->hwrm_max_req_len, + &softc->hwrm_short_cmd_req_addr, BUS_DMA_NOWAIT); + + return rc; +} + /* Device setup and teardown */ static int bnxt_attach_pre(if_ctx_t ctx) @@ -714,6 +731,12 @@ bnxt_attach_pre(if_ctx_t ctx) goto ver_fail; } + if (softc->flags & BNXT_FLAG_SHORT_CMD) { + rc = bnxt_alloc_hwrm_short_cmd_req(softc); + if (rc) + goto hwrm_short_cmd_alloc_fail; + } + /* Get NVRAM info */ if (BNXT_PF(softc)) { softc->nvm_info = malloc(sizeof(struct bnxt_nvram_info), @@ -902,6 +925,8 @@ drv_rgtr_fail: if (BNXT_PF(softc)) free(softc->nvm_info, M_DEVBUF); nvm_alloc_fail: + bnxt_free_hwrm_short_cmd_req(softc); +hwrm_short_cmd_alloc_fail: ver_fail: free(softc->ver_info, M_DEVBUF); ver_alloc_fail: @@ -974,6 +999,7 @@ bnxt_detach(if_ctx_t ctx) bnxt_hwrm_func_drv_unrgtr(softc, false); bnxt_free_hwrm_dma_mem(softc); + bnxt_free_hwrm_short_cmd_req(softc); BNXT_HWRM_LOCK_DESTROY(softc); pci_disable_busmaster(softc->dev); From a0b660301a1e851b29db439a3185f87014d035ad Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Tue, 19 Dec 2017 22:06:25 +0000 Subject: [PATCH 039/115] On Link up & down, update media types It's possible to change the SFP module when link is down, which would change the available media types. This is part of D13358. Submitted by: Bhargava Chenna Marreddy Sponsored by: Broadcom Limited --- sys/dev/bnxt/if_bnxt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sys/dev/bnxt/if_bnxt.c b/sys/dev/bnxt/if_bnxt.c index 6d2e5330fcc..83aa3b91396 100644 --- a/sys/dev/bnxt/if_bnxt.c +++ b/sys/dev/bnxt/if_bnxt.c @@ -2224,6 +2224,10 @@ bnxt_report_link(struct bnxt_softc *softc) link_info->last_flow_ctrl.tx = link_info->flow_ctrl.tx; link_info->last_flow_ctrl.rx = link_info->flow_ctrl.rx; link_info->last_flow_ctrl.autoneg = link_info->flow_ctrl.autoneg; + /* update media types */ + ifmedia_removeall(softc->media); + bnxt_add_media_types(softc); + ifmedia_set(softc->media, IFM_ETHER | IFM_AUTO); } static int From ff46fd16e53f4e3b70c7c5e816a8d62892a41e43 Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Tue, 19 Dec 2017 22:15:46 +0000 Subject: [PATCH 040/115] Add log messages for unknown and unhandled phy types Previously, it silently only supported auto, instead, log a message indicating why only auto is supported. Submitted by: Bhargava Chenna Marreddy Sponsored by: Broadcom Limited Differential Revision: https://reviews.freebsd.org/D13358 --- sys/dev/bnxt/if_bnxt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sys/dev/bnxt/if_bnxt.c b/sys/dev/bnxt/if_bnxt.c index 83aa3b91396..2a0262d4932 100644 --- a/sys/dev/bnxt/if_bnxt.c +++ b/sys/dev/bnxt/if_bnxt.c @@ -2085,8 +2085,13 @@ bnxt_add_media_types(struct bnxt_softc *softc) break; case HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_UNKNOWN: - default: /* Only Autoneg is supported for TYPE_UNKNOWN */ + device_printf(softc->dev, "Unknown phy type\n"); + break; + + default: + /* Only Autoneg is supported for new phy type values */ + device_printf(softc->dev, "phy type %d not supported by driver\n", phy_type); break; } From b501cc5da6f8e10d354ace1befbf59737543eb30 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 19 Dec 2017 22:39:05 +0000 Subject: [PATCH 041/115] Rework pathconf handling for FIFOs. On the one hand, FIFOs should respect other variables not supported by the fifofs vnode operation (such as _PC_NAME_MAX, _PC_LINK_MAX, etc.). These values are fs-specific and must come from a fs-specific method. On the other hand, filesystems that support FIFOs are required to support _PC_PIPE_BUF on directory vnodes that can contain FIFOs. Given this latter requirement, once the fs-specific VOP_PATHCONF method supports _PC_PIPE_BUF for directories, it is also suitable for FIFOs permitting a single VOP_PATHCONF method to be used for both FIFOs and non-FIFOs. To that end, retire all of the FIFO-specific pathconf methods from filesystems and change FIFO-specific vnode operation switches to use the existing fs-specific VOP_PATHCONF method. For fifofs, set it's VOP_PATHCONF to VOP_PANIC since it should no longer be used. While here, move _PC_PIPE_BUF handling out of vop_stdpathconf() so that only filesystems supporting FIFOs will report a value. In addition, only report a valid _PC_PIPE_BUF for directories and FIFOs. Discussed with: bde Reviewed by: kib (part of a larger patch) MFC after: 1 month Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D12572 --- .../opensolaris/uts/common/fs/zfs/zfs_vnops.c | 28 ++++------------ sys/fs/ext2fs/ext2_vnops.c | 7 ++++ sys/fs/fifofs/fifo_vnops.c | 2 +- sys/fs/nandfs/nandfs_vnops.c | 7 ++++ sys/fs/nfsclient/nfs_clvnops.c | 7 ++++ sys/fs/tmpfs/tmpfs_fifoops.c | 1 + sys/fs/tmpfs/tmpfs_vnops.c | 10 +++++- sys/fs/tmpfs/tmpfs_vnops.h | 1 + sys/fs/udf/udf_vnops.c | 7 ++++ sys/kern/vfs_default.c | 3 -- sys/ufs/ufs/ufs_vnops.c | 33 ++++--------------- 11 files changed, 54 insertions(+), 52 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index def820a2f13..861469d93a8 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -5414,31 +5414,17 @@ zfs_freebsd_pathconf(ap) case _PC_NAME_MAX: *ap->a_retval = NAME_MAX; return (0); + case _PC_PIPE_BUF: + if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) { + *ap->a_retval = PIPE_BUF; + return (0); + } + return (EINVAL); default: return (vop_stdpathconf(ap)); } } -static int -zfs_freebsd_fifo_pathconf(ap) - struct vop_pathconf_args /* { - struct vnode *a_vp; - int a_name; - register_t *a_retval; - } */ *ap; -{ - - switch (ap->a_name) { - case _PC_ACL_EXTENDED: - case _PC_ACL_NFS4: - case _PC_ACL_PATH_MAX: - case _PC_MAC_PRESENT: - return (zfs_freebsd_pathconf(ap)); - default: - return (fifo_specops.vop_pathconf(ap)); - } -} - /* * FreeBSD's extended attributes namespace defines file name prefix for ZFS' * extended attribute name: @@ -6050,7 +6036,7 @@ struct vop_vector zfs_fifoops = { .vop_reclaim = zfs_freebsd_reclaim, .vop_setattr = zfs_freebsd_setattr, .vop_write = VOP_PANIC, - .vop_pathconf = zfs_freebsd_fifo_pathconf, + .vop_pathconf = zfs_freebsd_pathconf, .vop_fid = zfs_freebsd_fid, .vop_getacl = zfs_freebsd_getacl, .vop_setacl = zfs_freebsd_setacl, diff --git a/sys/fs/ext2fs/ext2_vnops.c b/sys/fs/ext2fs/ext2_vnops.c index 83f0281a083..5a28c88ced8 100644 --- a/sys/fs/ext2fs/ext2_vnops.c +++ b/sys/fs/ext2fs/ext2_vnops.c @@ -180,6 +180,7 @@ struct vop_vector ext2_fifoops = { .vop_getattr = ext2_getattr, .vop_inactive = ext2_inactive, .vop_kqfilter = ext2fifo_kqfilter, + .vop_pathconf = ext2_pathconf, .vop_print = ext2_print, .vop_read = VOP_PANIC, .vop_reclaim = ext2_reclaim, @@ -1636,6 +1637,12 @@ ext2_pathconf(struct vop_pathconf_args *ap) case _PC_NAME_MAX: *ap->a_retval = NAME_MAX; break; + case _PC_PIPE_BUF: + if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) + *ap->a_retval = PIPE_BUF; + else + error = EINVAL; + break; case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; break; diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c index 8cb1a4062cd..96dd05a1832 100644 --- a/sys/fs/fifofs/fifo_vnops.c +++ b/sys/fs/fifofs/fifo_vnops.c @@ -88,7 +88,7 @@ struct vop_vector fifo_specops = { .vop_mkdir = VOP_PANIC, .vop_mknod = VOP_PANIC, .vop_open = fifo_open, - .vop_pathconf = vop_stdpathconf, + .vop_pathconf = VOP_PANIC, .vop_print = fifo_print, .vop_read = VOP_PANIC, .vop_readdir = VOP_PANIC, diff --git a/sys/fs/nandfs/nandfs_vnops.c b/sys/fs/nandfs/nandfs_vnops.c index a8223db19dd..0e4e5a59389 100644 --- a/sys/fs/nandfs/nandfs_vnops.c +++ b/sys/fs/nandfs/nandfs_vnops.c @@ -2244,6 +2244,12 @@ nandfs_pathconf(struct vop_pathconf_args *ap) case _PC_NAME_MAX: *ap->a_retval = NANDFS_NAME_LEN; break; + case _PC_PIPE_BUF: + if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) + *ap->a_retval = PIPE_BUF; + else + error = EINVAL; + break; case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; break; @@ -2414,6 +2420,7 @@ struct vop_vector nandfs_fifoops = { .vop_close = nandfsfifo_close, .vop_getattr = nandfs_getattr, .vop_inactive = nandfs_inactive, + .vop_pathconf = nandfs_pathconf, .vop_print = nandfs_print, .vop_read = VOP_PANIC, .vop_reclaim = nandfs_reclaim, diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index fd9ecb0d210..27ff9e29470 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -189,6 +189,7 @@ struct vop_vector newnfs_fifoops = { .vop_fsync = nfs_fsync, .vop_getattr = nfs_getattr, .vop_inactive = ncl_inactive, + .vop_pathconf = nfs_pathconf, .vop_print = nfs_print, .vop_read = nfsfifo_read, .vop_reclaim = ncl_reclaim, @@ -3465,6 +3466,12 @@ nfs_pathconf(struct vop_pathconf_args *ap) case _PC_NAME_MAX: *ap->a_retval = pc.pc_namemax; break; + case _PC_PIPE_BUF: + if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) + *ap->a_retval = PIPE_BUF; + else + error = EINVAL; + break; case _PC_CHOWN_RESTRICTED: *ap->a_retval = pc.pc_chownrestricted; break; diff --git a/sys/fs/tmpfs/tmpfs_fifoops.c b/sys/fs/tmpfs/tmpfs_fifoops.c index eb206ff6906..7719fd38cfb 100644 --- a/sys/fs/tmpfs/tmpfs_fifoops.c +++ b/sys/fs/tmpfs/tmpfs_fifoops.c @@ -71,5 +71,6 @@ struct vop_vector tmpfs_fifoop_entries = { .vop_access = tmpfs_access, .vop_getattr = tmpfs_getattr, .vop_setattr = tmpfs_setattr, + .vop_pathconf = tmpfs_pathconf, .vop_print = tmpfs_print, }; diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c index 15dc86610ea..78402f3886b 100644 --- a/sys/fs/tmpfs/tmpfs_vnops.c +++ b/sys/fs/tmpfs/tmpfs_vnops.c @@ -1338,9 +1338,10 @@ tmpfs_print(struct vop_print_args *v) return 0; } -static int +int tmpfs_pathconf(struct vop_pathconf_args *v) { + struct vnode *vp = v->a_vp; int name = v->a_name; register_t *retval = v->a_retval; @@ -1357,6 +1358,13 @@ tmpfs_pathconf(struct vop_pathconf_args *v) *retval = NAME_MAX; break; + case _PC_PIPE_BUF: + if (vp->v_type == VDIR || vp->v_type == VFIFO) + *retval = PIPE_BUF; + else + error = EINVAL; + break; + case _PC_CHOWN_RESTRICTED: *retval = 1; break; diff --git a/sys/fs/tmpfs/tmpfs_vnops.h b/sys/fs/tmpfs/tmpfs_vnops.h index eac37a1a56e..2f89e15629d 100644 --- a/sys/fs/tmpfs/tmpfs_vnops.h +++ b/sys/fs/tmpfs/tmpfs_vnops.h @@ -51,6 +51,7 @@ extern struct vop_vector tmpfs_vnodeop_nonc_entries; vop_access_t tmpfs_access; vop_getattr_t tmpfs_getattr; vop_setattr_t tmpfs_setattr; +vop_pathconf_t tmpfs_pathconf; vop_print_t tmpfs_print; vop_reclaim_t tmpfs_reclaim; diff --git a/sys/fs/udf/udf_vnops.c b/sys/fs/udf/udf_vnops.c index 0e1a3005461..6e7706c5c7c 100644 --- a/sys/fs/udf/udf_vnops.c +++ b/sys/fs/udf/udf_vnops.c @@ -102,6 +102,7 @@ struct vop_vector udf_fifoops = { .vop_default = &fifo_specops, .vop_access = udf_access, .vop_getattr = udf_getattr, + .vop_pathconf = udf_pathconf, .vop_print = udf_print, .vop_reclaim = udf_reclaim, .vop_setattr = udf_setattr, @@ -400,6 +401,12 @@ udf_pathconf(struct vop_pathconf_args *a) case _PC_NO_TRUNC: *a->a_retval = 1; return (0); + case _PC_PIPE_BUF: + if (a->a_vp->v_type == VDIR || a->a_vp->v_type == VFIFO) { + *a->a_retval = PIPE_BUF; + return (0); + } + return (EINVAL); default: return (vop_stdpathconf(a)); } diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 7f263d36426..734c2f2e601 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -482,9 +482,6 @@ vop_stdpathconf(ap) case _PC_PATH_MAX: *ap->a_retval = PATH_MAX; return (0); - case _PC_PIPE_BUF: - *ap->a_retval = PIPE_BUF; - return (0); default: return (EINVAL); } diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index f7861fc4aa1..018a6a0e74e 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -124,7 +124,6 @@ static vop_symlink_t ufs_symlink; static vop_whiteout_t ufs_whiteout; static vop_close_t ufsfifo_close; static vop_kqfilter_t ufsfifo_kqfilter; -static vop_pathconf_t ufsfifo_pathconf; SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem"); @@ -2405,30 +2404,6 @@ ufsfifo_kqfilter(ap) return (error); } -/* - * Return POSIX pathconf information applicable to fifos. - */ -static int -ufsfifo_pathconf(ap) - struct vop_pathconf_args /* { - struct vnode *a_vp; - int a_name; - int *a_retval; - } */ *ap; -{ - - switch (ap->a_name) { - case _PC_ACL_EXTENDED: - case _PC_ACL_NFS4: - case _PC_ACL_PATH_MAX: - case _PC_MAC_PRESENT: - return (ufs_pathconf(ap)); - default: - return (fifo_specops.vop_pathconf(ap)); - } - /* NOTREACHED */ -} - /* * Return POSIX pathconf information applicable to ufs filesystems. */ @@ -2450,6 +2425,12 @@ ufs_pathconf(ap) case _PC_NAME_MAX: *ap->a_retval = UFS_MAXNAMLEN; break; + case _PC_PIPE_BUF: + if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) + *ap->a_retval = PIPE_BUF; + else + error = EINVAL; + break; case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; break; @@ -2803,7 +2784,7 @@ struct vop_vector ufs_fifoops = { .vop_inactive = ufs_inactive, .vop_kqfilter = ufsfifo_kqfilter, .vop_markatime = ufs_markatime, - .vop_pathconf = ufsfifo_pathconf, + .vop_pathconf = ufs_pathconf, .vop_print = ufs_print, .vop_read = VOP_PANIC, .vop_reclaim = ufs_reclaim, From d17aef79bb2cd7646e57f533d7418938edd74971 Mon Sep 17 00:00:00 2001 From: "Pedro F. Giffuni" Date: Tue, 19 Dec 2017 22:40:16 +0000 Subject: [PATCH 042/115] SPDX: These are fundamentally BSD-2-Clause. They just omit the introductory line and numbering. --- sbin/ipfw/altq.c | 4 +++- sbin/ipfw/dummynet.c | 4 +++- sbin/ipfw/ipfw2.c | 4 +++- sbin/ipfw/ipfw2.h | 4 +++- sbin/ipfw/ipv6.c | 4 +++- sbin/ipfw/main.c | 4 +++- sbin/ipfw/nat.c | 4 +++- sys/sys/msg.h | 2 +- sys/sys/snoop.h | 2 +- usr.sbin/watch/watch.c | 2 +- 10 files changed, 24 insertions(+), 10 deletions(-) diff --git a/sbin/ipfw/altq.c b/sbin/ipfw/altq.c index 8398ab611f2..a78852cfd63 100644 --- a/sbin/ipfw/altq.c +++ b/sbin/ipfw/altq.c @@ -1,4 +1,6 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause + * * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich diff --git a/sbin/ipfw/dummynet.c b/sbin/ipfw/dummynet.c index 0aa3bb5c1e3..e3a04b389eb 100644 --- a/sbin/ipfw/dummynet.c +++ b/sbin/ipfw/dummynet.c @@ -1,4 +1,6 @@ -/* +/*. + * SPDX-License-Identifier: BSD-2-Clause + * * Codel/FQ_Codel and PIE/FQ_PIE Code: * Copyright (C) 2016 Centre for Advanced Internet Architectures, * Swinburne University of Technology, Melbourne, Australia. diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c index 7c491466cbb..455669763bf 100644 --- a/sbin/ipfw/ipfw2.c +++ b/sbin/ipfw/ipfw2.c @@ -1,4 +1,6 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause + * * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich diff --git a/sbin/ipfw/ipfw2.h b/sbin/ipfw/ipfw2.h index 8befdc91cc9..274c0008a95 100644 --- a/sbin/ipfw/ipfw2.h +++ b/sbin/ipfw/ipfw2.h @@ -1,4 +1,6 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause + * * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich diff --git a/sbin/ipfw/ipv6.c b/sbin/ipfw/ipv6.c index 4fce1d2a288..5c643ca911e 100644 --- a/sbin/ipfw/ipv6.c +++ b/sbin/ipfw/ipv6.c @@ -1,4 +1,6 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause + * * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich diff --git a/sbin/ipfw/main.c b/sbin/ipfw/main.c index b7ff07a6094..0ea0bb7cb63 100644 --- a/sbin/ipfw/main.c +++ b/sbin/ipfw/main.c @@ -1,4 +1,6 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause + * * Copyright (c) 2002-2003,2010 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich diff --git a/sbin/ipfw/nat.c b/sbin/ipfw/nat.c index 28dac8645bf..70a50de947c 100644 --- a/sbin/ipfw/nat.c +++ b/sbin/ipfw/nat.c @@ -1,4 +1,6 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause + * * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich diff --git a/sys/sys/msg.h b/sys/sys/msg.h index a0a963d551d..d2e9f756d5d 100644 --- a/sys/sys/msg.h +++ b/sys/sys/msg.h @@ -6,7 +6,7 @@ * * Author: Daniel Boulet * - * SPDX-License-Identifier: BSD-1-Clause + * SPDX-License-Identifier: BSD-2-Clause * * Copyright 1993 Daniel Boulet and RTMX Inc. * diff --git a/sys/sys/snoop.h b/sys/sys/snoop.h index f2d6b31a903..1a5e117976f 100644 --- a/sys/sys/snoop.h +++ b/sys/sys/snoop.h @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-1-Clause + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 1995 Ugen J.S.Antsilevich * diff --git a/usr.sbin/watch/watch.c b/usr.sbin/watch/watch.c index e0999757ab6..3585d4555b1 100644 --- a/usr.sbin/watch/watch.c +++ b/usr.sbin/watch/watch.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-1-Clause + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 1995 Ugen J.S.Antsilevich * From f83f3d7986704b68a24d1dc48781b2cb0452ff64 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 19 Dec 2017 22:40:54 +0000 Subject: [PATCH 043/115] Update link count handling in fuse for post-ino64. Set FUSE_LINK_MAX to UINT32_MAX instead of LINK_MAX to match the maximum link count possible in the 'nlink' field of 'struct fuse_attr'. Sponsored by: Chelsio Communications --- sys/fs/fuse/fuse_param.h | 2 +- sys/fs/fuse/fuse_vnops.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/fs/fuse/fuse_param.h b/sys/fs/fuse/fuse_param.h index 1ba68f9e046..fd35d175751 100644 --- a/sys/fs/fuse/fuse_param.h +++ b/sys/fs/fuse/fuse_param.h @@ -77,6 +77,6 @@ #endif -#define FUSE_LINK_MAX LINK_MAX +#define FUSE_LINK_MAX UINT32_MAX #endif /* _FUSE_PARAM_H_ */ diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index a0e4056584a..54796a66d51 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -1188,7 +1188,7 @@ fuse_vnop_pathconf(struct vop_pathconf_args *ap) *ap->a_retval = NAME_MAX; return (0); case _PC_LINK_MAX: - *ap->a_retval = FUSE_LINK_MAX; + *ap->a_retval = MIN(LONG_MAX, FUSE_LINK_MAX); return (0); case _PC_SYMLINK_MAX: *ap->a_retval = MAXPATHLEN; From 5538424353356d7318a7c7deada63d1e6015bf2f Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 19 Dec 2017 22:43:39 +0000 Subject: [PATCH 044/115] Replace one more LINK_MAX with NFS_LINK_MAX missed in r326991. Sponsored by: Chelsio Communications --- sys/fs/nfs/nfs_commonsubs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index dc441cecef0..1856b98d744 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -2301,7 +2301,7 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, break; case NFSATTRBIT_MAXLINK: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); - *tl = txdr_unsigned(LINK_MAX); + *tl = txdr_unsigned(NFS_LINK_MAX); retnum += NFSX_UNSIGNED; break; case NFSATTRBIT_MAXNAME: From f27d3a8a722e83012b9e282344365190ba88021f Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 19 Dec 2017 23:54:44 +0000 Subject: [PATCH 045/115] Don't return early for non-failure for one of the EMLINK checks. r326987 enabled two #if 0'd-out EMLINK checks in zfs_link_create() for link overflow. However, one of the checks (when the vnode adding a link is a directory such as for mkdir) always returned even if the link did not overflow. Change this to only return early if it needs to report an EMLINK error. Reported by: db, shurd Sponsored by: Chelsio Communications --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c index e0d3c889091..0c15a60bbcc 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c @@ -529,10 +529,8 @@ zfs_link_create(znode_t *dzp, const char *name, znode_t *zp, dmu_tx_t *tx, ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); #ifdef __FreeBSD__ if (zp_is_dir) { - error = 0; if (dzp->z_links >= ZFS_LINK_MAX) - error = SET_ERROR(EMLINK); - return (error); + return (SET_ERROR(EMLINK)); } #endif if (!(flag & ZRENAMING)) { From b103855e187d28667b9f94233b1ef9b478ad9f91 Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Wed, 20 Dec 2017 01:03:34 +0000 Subject: [PATCH 046/115] Support attaching tx queues to cpus This will attempt to use a different thread/core on the same L2 cache when possible, or use the same cpu as the rx thread when not. If SMP isn't enabled, don't go looking for cores to use. This is mostly useful when using shared TX/RX queues. Reviewed by: sbruno Sponsored by: Limelight Networks Differential Revision: https://reviews.freebsd.org/D12446 --- sys/net/iflib.c | 142 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 131 insertions(+), 11 deletions(-) diff --git a/sys/net/iflib.c b/sys/net/iflib.c index d476edddc42..663a615a0d9 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_acpi.h" +#include "opt_sched.h" #include #include @@ -5044,25 +5045,136 @@ iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name)); } +#ifdef SMP static int -find_nth(if_ctx_t ctx, cpuset_t *cpus, int qid) +find_nth(if_ctx_t ctx, int qid) { + cpuset_t cpus; int i, cpuid, eqid, count; - CPU_COPY(&ctx->ifc_cpus, cpus); - count = CPU_COUNT(cpus); + CPU_COPY(&ctx->ifc_cpus, &cpus); + count = CPU_COUNT(&cpus); eqid = qid % count; /* clear up to the qid'th bit */ for (i = 0; i < eqid; i++) { - cpuid = CPU_FFS(cpus); + cpuid = CPU_FFS(&cpus); MPASS(cpuid != 0); - CPU_CLR(cpuid-1, cpus); + CPU_CLR(cpuid-1, &cpus); } - cpuid = CPU_FFS(cpus); + cpuid = CPU_FFS(&cpus); MPASS(cpuid != 0); return (cpuid-1); } +#ifdef SCHED_ULE +extern struct cpu_group *cpu_top; /* CPU topology */ + +static int +find_child_with_core(int cpu, struct cpu_group *grp) +{ + int i; + + if (grp->cg_children == 0) + return -1; + + MPASS(grp->cg_child); + for (i = 0; i < grp->cg_children; i++) { + if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask)) + return i; + } + + return -1; +} + +/* + * Find the nth thread on the specified core + */ +static int +find_thread(int cpu, int thread_num) +{ + struct cpu_group *grp; + int i; + cpuset_t cs; + + grp = cpu_top; + if (grp == NULL) + return cpu; + i = 0; + while ((i = find_child_with_core(cpu, grp)) != -1) { + /* If the child only has one cpu, don't descend */ + if (grp->cg_child[i].cg_count <= 1) + break; + grp = &grp->cg_child[i]; + } + + /* If they don't share at least an L2 cache, use the same CPU */ + if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE) + return cpu; + + /* Now pick one */ + CPU_COPY(&grp->cg_mask, &cs); + for (i = thread_num % grp->cg_count; i > 0; i--) { + MPASS(CPU_FFS(&cs)); + CPU_CLR(CPU_FFS(&cs) - 1, &cs); + } + MPASS(CPU_FFS(&cs)); + return CPU_FFS(&cs) - 1; +} +#else +static int +find_thread(int cpu, int thread_num __unused) +{ + return cpu_id +} +#endif + +static int +get_thread_num(if_ctx_t ctx, iflib_intr_type_t type, int qid) +{ + switch (type) { + case IFLIB_INTR_TX: + /* TX queues get threads on the same core as the corresponding RX queue */ + /* XXX handle multiple RX threads per core and more than two threads per core */ + return qid / CPU_COUNT(&ctx->ifc_cpus) + 1; + case IFLIB_INTR_RX: + case IFLIB_INTR_RXTX: + /* RX queues get the first thread on their core */ + return qid / CPU_COUNT(&ctx->ifc_cpus); + default: + return -1; + } +} +#else +#define get_thread_num(ctx, type, qid) CPU_FIRST() +#define find_thread(cpuid, tid) CPU_FIRST() +#define find_nth(ctx, gid) CPU_FIRST() +#endif + +/* Just to avoid copy/paste */ +static inline int +iflib_irq_set_affinity(if_ctx_t ctx, int irq, iflib_intr_type_t type, int qid, + struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, char *name) +{ + int cpuid; + int err, tid; + + cpuid = find_nth(ctx, qid); + tid = get_thread_num(ctx, type, qid); + MPASS(tid >= 0); + cpuid = find_thread(cpuid, tid); + err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, irq, name); + if (err) { + device_printf(ctx->ifc_dev, "taskqgroup_attach_cpu failed %d\n", err); + return (err); + } +#ifdef notyet + if (cpuid > ctx->ifc_cpuid_highest) + ctx->ifc_cpuid_highest = cpuid; +#endif + MPASS(gtask->gt_taskqueue != NULL); + return 0; +} + int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, @@ -5071,9 +5183,8 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, struct grouptask *gtask; struct taskqgroup *tqg; iflib_filter_info_t info; - cpuset_t cpus; gtask_fn_t *fn; - int tqrid, err, cpuid; + int tqrid, err; driver_filter_t *intr_fast; void *q; @@ -5136,8 +5247,9 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, return (0); if (tqrid != -1) { - cpuid = find_nth(ctx, &cpus, qid); - taskqgroup_attach_cpu(tqg, gtask, q, cpuid, rman_get_start(irq->ii_res), name); + err = iflib_irq_set_affinity(ctx, rman_get_start(irq->ii_res), type, qid, gtask, tqg, q, name); + if (err) + return (err); } else { taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name); } @@ -5153,6 +5265,7 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, gtask_fn_t *fn; void *q; int irq_num = -1; + int err; switch (type) { case IFLIB_INTR_TX: @@ -5181,7 +5294,14 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, panic("unknown net intr type"); } GROUPTASK_INIT(gtask, 0, fn, q); - taskqgroup_attach(tqg, gtask, q, irq_num, name); + if (irq_num != -1) { + err = iflib_irq_set_affinity(ctx, irq_num, type, qid, gtask, tqg, q, name); + if (err) + taskqgroup_attach(tqg, gtask, q, irq_num, name); + } + else { + taskqgroup_attach(tqg, gtask, q, irq_num, name); + } } void From 40cf51c43852ee6a0961d3bc6f1ff470102d7a29 Mon Sep 17 00:00:00 2001 From: Li-Wen Hsu Date: Wed, 20 Dec 2017 06:08:16 +0000 Subject: [PATCH 047/115] Add missing `;` Approved by: kevlo --- sys/net/iflib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 663a615a0d9..370efbb7da7 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -5124,7 +5124,7 @@ find_thread(int cpu, int thread_num) static int find_thread(int cpu, int thread_num __unused) { - return cpu_id + return cpu_id; } #endif From d0aec74836fef7ed551fe8edb24639c0f86b908c Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Wed, 20 Dec 2017 13:13:10 +0000 Subject: [PATCH 048/115] Make truss(8) work for 32-bit CloudABI executables on ARM64. This change effectively merges the existing 64-bit support for ARM64 with the 32-on-64-bit support for AMD64. --- usr.bin/truss/Makefile | 1 + usr.bin/truss/aarch64-cloudabi32.c | 112 +++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 usr.bin/truss/aarch64-cloudabi32.c diff --git a/usr.bin/truss/Makefile b/usr.bin/truss/Makefile index c657d837164..ad4f0f5ac30 100644 --- a/usr.bin/truss/Makefile +++ b/usr.bin/truss/Makefile @@ -13,6 +13,7 @@ ABIS+= freebsd # Each ABI is expected to have an ABI.c, MACHINE_ARCH-ABI.c or # MACHINE_CPUARCH-ABI.c file that will be used to map the syscall arguments. .if ${MACHINE_ARCH} == "aarch64" +ABIS+= cloudabi32 ABIS+= cloudabi64 .endif .if ${MACHINE_CPUARCH} == "i386" diff --git a/usr.bin/truss/aarch64-cloudabi32.c b/usr.bin/truss/aarch64-cloudabi32.c new file mode 100644 index 00000000000..c3a5d6bfc48 --- /dev/null +++ b/usr.bin/truss/aarch64-cloudabi32.c @@ -0,0 +1,112 @@ +/*- + * Copyright (c) 2015-2017 Nuxi, https://nuxi.nl/ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include + +#include +#include +#include + +#include "truss.h" + +static int +aarch64_cloudabi32_fetch_args(struct trussinfo *trussinfo, unsigned int narg) +{ + struct current_syscall *cs; + struct ptrace_io_desc iorequest; + struct reg regs; + lwpid_t tid; + + if (narg > 0) { + /* Fetch registers, containing the address of the arguments. */ + tid = trussinfo->curthread->tid; + if (ptrace(PT_GETREGS, tid, (caddr_t)®s, 0) == -1) { + fprintf(trussinfo->outfile, + "-- CANNOT READ REGISTERS --\n"); + return (-1); + } + + /* Fetch arguments. They are already padded to 64 bits. */ + cs = &trussinfo->curthread->cs; + iorequest.piod_op = PIOD_READ_D; + iorequest.piod_offs = (void *)regs.x[2]; + iorequest.piod_addr = cs->args; + iorequest.piod_len = sizeof(cs->args[0]) * narg; + if (ptrace(PT_IO, tid, (caddr_t)&iorequest, 0) == -1 || + iorequest.piod_len == 0) + return (-1); + } + return (0); +} + +static int +aarch64_cloudabi32_fetch_retval(struct trussinfo *trussinfo, long *retval, + int *errorp) +{ + struct ptrace_io_desc iorequest; + struct reg regs; + lwpid_t tid; + + /* Fetch registers, containing the address of the return values. */ + tid = trussinfo->curthread->tid; + if (ptrace(PT_GETREGS, tid, (caddr_t)®s, 0) == -1) { + fprintf(trussinfo->outfile, "-- CANNOT READ REGISTERS --\n"); + return (-1); + } + + if ((regs.spsr & PSR_C) == 0) { + /* System call succeeded. Fetch return values. */ + iorequest.piod_op = PIOD_READ_D; + iorequest.piod_offs = (void *)regs.x[2]; + iorequest.piod_addr = retval; + iorequest.piod_len = sizeof(retval[0]) * 2; + if (ptrace(PT_IO, tid, (caddr_t)&iorequest, 0) == -1 || + iorequest.piod_len == 0) + return (-1); + *errorp = 0; + } else { + /* System call failed. Set error. */ + retval[0] = regs.x[0]; + *errorp = 1; + } + return (0); +} + +static struct procabi aarch64_cloudabi32 = { + "CloudABI ELF32", + SYSDECODE_ABI_CLOUDABI32, + aarch64_cloudabi32_fetch_args, + aarch64_cloudabi32_fetch_retval, + STAILQ_HEAD_INITIALIZER(aarch64_cloudabi32.extra_syscalls), + { NULL } +}; + +PROCABI(aarch64_cloudabi32); From 71688f3b71ad9a85483a2e92debf9981ccf3ac84 Mon Sep 17 00:00:00 2001 From: Brad Davis Date: Wed, 20 Dec 2017 15:21:29 +0000 Subject: [PATCH 049/115] Save others some forehead damange by noting that -r require tmpfs. Reviewed by: bapt --- sbin/reboot/reboot.8 | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sbin/reboot/reboot.8 b/sbin/reboot/reboot.8 index 6f2d023e8bb..6833fdd213a 100644 --- a/sbin/reboot/reboot.8 +++ b/sbin/reboot/reboot.8 @@ -28,7 +28,7 @@ .\" @(#)reboot.8 8.1 (Berkeley) 6/9/93 .\" $FreeBSD$ .\" -.Dd October 23, 2017 +.Dd December 20, 2017 .Dt REBOOT 8 .Os .Sh NAME @@ -142,6 +142,9 @@ After changing vfs.root.mountfrom with .Xr kenv 1 , .Nm Fl r can be used to change the root filesystem while preserving kernel state. +This requires the +.Xr tmpfs 5 +kernel module to be loaded. .El .Pp The From 8c0fa2cc56c74b96dc043bc7d9d8e3db005b33a8 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Wed, 20 Dec 2017 16:02:11 +0000 Subject: [PATCH 050/115] Flesh out the reason for the need for tmpfs a little. Sponsored by: Netflix --- sbin/reboot/reboot.8 | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sbin/reboot/reboot.8 b/sbin/reboot/reboot.8 index 6833fdd213a..5fee3ece7c6 100644 --- a/sbin/reboot/reboot.8 +++ b/sbin/reboot/reboot.8 @@ -144,7 +144,10 @@ After changing vfs.root.mountfrom with can be used to change the root filesystem while preserving kernel state. This requires the .Xr tmpfs 5 -kernel module to be loaded. +kernel module to be loaded because +.Xr init 8 +needs a place to store itself after the old root is unmounted, but +before the new root is in place. .El .Pp The From 87879ba80552aaaa6fdb22fc06c86a180d657ee0 Mon Sep 17 00:00:00 2001 From: Justin Hibbits Date: Wed, 20 Dec 2017 16:49:45 +0000 Subject: [PATCH 051/115] Increase default MAXDSIZ to 32G on powerpc64 Linking LLVM now seems to require more than 1GB data size, so increase the default to 32G, which matches amd64. Reviewed by: nwhitehorn --- sys/powerpc/include/vmparam.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sys/powerpc/include/vmparam.h b/sys/powerpc/include/vmparam.h index ed58d44938b..04639416697 100644 --- a/sys/powerpc/include/vmparam.h +++ b/sys/powerpc/include/vmparam.h @@ -48,8 +48,12 @@ #endif #ifndef MAXDSIZ +#ifdef __powerpc64__ +#define MAXDSIZ (32UL*1024*1024*1024) /* max data size */ +#else #define MAXDSIZ (1*1024*1024*1024) /* max data size */ #endif +#endif #ifndef DFLSSIZ #define DFLSSIZ (8*1024*1024) /* default stack size */ From 23e1a2d7dae369ecf1edd3e909823f42536014d0 Mon Sep 17 00:00:00 2001 From: Hajimu UMEMOTO Date: Wed, 20 Dec 2017 17:44:31 +0000 Subject: [PATCH 052/115] Don't ignore trailing spaces after numerical IP addresses. PR: 224403 Reported by: Michael Kaufmann Reviewed by: Michael Kaufmann MFC after: 1 week --- lib/libc/net/getaddrinfo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/libc/net/getaddrinfo.c b/lib/libc/net/getaddrinfo.c index 4cdb2f2cc0d..a33f240bc37 100644 --- a/lib/libc/net/getaddrinfo.c +++ b/lib/libc/net/getaddrinfo.c @@ -1277,7 +1277,8 @@ explore_numeric(const struct addrinfo *pai, const char *hostname, * does not accept. So we need to separate the case for * AF_INET. */ - if (inet_aton(hostname, (struct in_addr *)pton) != 1) + if (inet_aton(hostname, (struct in_addr *)pton) != 1 || + hostname[strspn(hostname, "0123456789.xabcdefXABCDEF")] != '\0') return 0; p = pton; break; From c19c7afee3c8bb5e3046be27d083f4fa51ee5d73 Mon Sep 17 00:00:00 2001 From: Eric Joyner Date: Wed, 20 Dec 2017 18:15:06 +0000 Subject: [PATCH 053/115] ixgbe(4): Convert driver to use iflib Initial update to the ixgbe PF and VF drivers to support the iflib interface. The PF driver version is bumped to 4.0.0, and the VF driver version is bumped to 2.0.0. Special thanks to sbruno@ for the support in helping make this conversion happen. Submitted by: Jeb Cramer , Krzysztof Galazka (Chris) , Piotr Pietruszewski Reviewed by: sbruno@, shurd@, #IntelNetworking Tested by: Jeffrey Pieper , Sergey Kozlov Sponsored by: Limelight Networks, Intel Corporation Differential Revision: https://reviews.freebsd.org/D11727 --- sys/conf/files | 4 +- sys/dev/ixgbe/if_bypass.c | 91 +- sys/dev/ixgbe/if_fdir.c | 11 +- sys/dev/ixgbe/if_ix.c | 3149 +++++++++++++--------------------- sys/dev/ixgbe/if_ixv.c | 1969 ++++++++------------- sys/dev/ixgbe/if_sriov.c | 103 +- sys/dev/ixgbe/ix_txrx.c | 2389 ++++---------------------- sys/dev/ixgbe/ixgbe.h | 233 +-- sys/dev/ixgbe/ixgbe_common.c | 10 +- sys/dev/ixgbe/ixgbe_fdir.h | 2 +- sys/dev/ixgbe/ixgbe_netmap.c | 521 ------ sys/dev/ixgbe/ixgbe_netmap.h | 59 - sys/dev/ixgbe/ixgbe_osdep.c | 40 +- sys/dev/ixgbe/ixgbe_osdep.h | 40 +- sys/dev/ixgbe/ixgbe_phy.c | 15 +- sys/dev/ixgbe/ixgbe_sriov.h | 15 +- sys/dev/ixgbe/ixgbe_type.h | 2 +- sys/dev/ixgbe/ixgbe_vf.h | 40 +- sys/modules/ix/Makefile | 3 +- sys/modules/ixv/Makefile | 4 +- 20 files changed, 2541 insertions(+), 6159 deletions(-) delete mode 100644 sys/dev/ixgbe/ixgbe_netmap.c delete mode 100644 sys/dev/ixgbe/ixgbe_netmap.h diff --git a/sys/conf/files b/sys/conf/files index ebd14083bd4..1f62c7a210e 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -2223,11 +2223,9 @@ dev/ixgbe/if_ixv.c optional ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe -DSMP" dev/ixgbe/if_bypass.c optional ix inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" -dev/ixgbe/ixgbe_netmap.c optional ix inet \ - compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/if_fdir.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" -dev/ixgbe/if_sriov.c optional ix inet | ixv inet \ +dev/ixgbe/if_sriov.c optional ix inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" dev/ixgbe/ix_txrx.c optional ix inet | ixv inet \ compile-with "${NORMAL_C} -I$S/dev/ixgbe" diff --git a/sys/dev/ixgbe/if_bypass.c b/sys/dev/ixgbe/if_bypass.c index 497de3be7ec..c5e640a5337 100644 --- a/sys/dev/ixgbe/if_bypass.c +++ b/sys/dev/ixgbe/if_bypass.c @@ -165,12 +165,12 @@ ixgbe_bp_set_state(SYSCTL_HANDLER_ARGS) error = hw->mac.ops.bypass_rw(hw, BYPASS_PAGE_CTL0, &state); ixgbe_bypass_mutex_clear(adapter); - if (error) + if (error != 0) return (error); state = (state >> BYPASS_STATUS_OFF_SHIFT) & 0x3; error = sysctl_handle_int(oidp, &state, 0, req); - if ((error) || (req->newptr == NULL)) + if ((error != 0) || (req->newptr == NULL)) return (error); /* Sanity check new state */ @@ -437,7 +437,7 @@ ixgbe_bp_wd_set(SYSCTL_HANDLER_ARGS) struct ixgbe_hw *hw = &adapter->hw; int error, tmp; static int timeout = 0; - u32 mask, arg = BYPASS_PAGE_CTL0; + u32 mask, arg; /* Get the current hardware value */ ixgbe_bypass_mutex_enter(adapter); @@ -456,48 +456,38 @@ ixgbe_bp_wd_set(SYSCTL_HANDLER_ARGS) if ((error) || (req->newptr == NULL)) return (error); - mask = BYPASS_WDT_ENABLE_M; + arg = 0x1 << BYPASS_WDT_ENABLE_SHIFT; + mask = BYPASS_WDT_ENABLE_M | BYPASS_WDT_VALUE_M; switch (timeout) { - case 0: /* disables the timer */ - break; - case 1: - arg = BYPASS_WDT_1_5 << BYPASS_WDT_TIME_SHIFT; - arg |= 0x1 << BYPASS_WDT_ENABLE_SHIFT; - mask |= BYPASS_WDT_VALUE_M; - break; - case 2: - arg = BYPASS_WDT_2 << BYPASS_WDT_TIME_SHIFT; - arg |= 0x1 << BYPASS_WDT_ENABLE_SHIFT; - mask |= BYPASS_WDT_VALUE_M; - break; - case 3: - arg = BYPASS_WDT_3 << BYPASS_WDT_TIME_SHIFT; - arg |= 0x1 << BYPASS_WDT_ENABLE_SHIFT; - mask |= BYPASS_WDT_VALUE_M; - break; - case 4: - arg = BYPASS_WDT_4 << BYPASS_WDT_TIME_SHIFT; - arg |= 0x1 << BYPASS_WDT_ENABLE_SHIFT; - mask |= BYPASS_WDT_VALUE_M; - break; - case 8: - arg = BYPASS_WDT_8 << BYPASS_WDT_TIME_SHIFT; - arg |= 0x1 << BYPASS_WDT_ENABLE_SHIFT; - mask |= BYPASS_WDT_VALUE_M; - break; - case 16: - arg = BYPASS_WDT_16 << BYPASS_WDT_TIME_SHIFT; - arg |= 0x1 << BYPASS_WDT_ENABLE_SHIFT; - mask |= BYPASS_WDT_VALUE_M; - break; - case 32: - arg = BYPASS_WDT_32 << BYPASS_WDT_TIME_SHIFT; - arg |= 0x1 << BYPASS_WDT_ENABLE_SHIFT; - mask |= BYPASS_WDT_VALUE_M; - break; - default: - return (EINVAL); + case 0: /* disables the timer */ + arg = BYPASS_PAGE_CTL0; + mask = BYPASS_WDT_ENABLE_M; + break; + case 1: + arg |= BYPASS_WDT_1_5 << BYPASS_WDT_TIME_SHIFT; + break; + case 2: + arg |= BYPASS_WDT_2 << BYPASS_WDT_TIME_SHIFT; + break; + case 3: + arg |= BYPASS_WDT_3 << BYPASS_WDT_TIME_SHIFT; + break; + case 4: + arg |= BYPASS_WDT_4 << BYPASS_WDT_TIME_SHIFT; + break; + case 8: + arg |= BYPASS_WDT_8 << BYPASS_WDT_TIME_SHIFT; + break; + case 16: + arg |= BYPASS_WDT_16 << BYPASS_WDT_TIME_SHIFT; + break; + case 32: + arg |= BYPASS_WDT_32 << BYPASS_WDT_TIME_SHIFT; + break; + default: + return (EINVAL); } + /* Set the new watchdog */ ixgbe_bypass_mutex_enter(adapter); error = hw->mac.ops.bypass_set(hw, BYPASS_PAGE_CTL0, mask, arg); @@ -541,7 +531,8 @@ ixgbe_bp_wd_reset(SYSCTL_HANDLER_ARGS) error = IXGBE_BYPASS_FW_WRITE_FAILURE; break; } - if (hw->mac.ops.bypass_rw(hw, BYPASS_PAGE_CTL1, &reset_wd)) { + error = hw->mac.ops.bypass_rw(hw, BYPASS_PAGE_CTL1, &reset_wd); + if (error != 0) { error = IXGBE_ERR_INVALID_ARGUMENT; break; } @@ -615,7 +606,7 @@ ixgbe_bp_log(SYSCTL_HANDLER_ARGS) &data); ixgbe_bypass_mutex_clear(adapter); if (error) - return (-EINVAL); + return (EINVAL); eeprom[count].logs += data << (8 * i); } @@ -624,7 +615,7 @@ ixgbe_bp_log(SYSCTL_HANDLER_ARGS) log_off + i, &eeprom[count].actions); ixgbe_bypass_mutex_clear(adapter); if (error) - return (-EINVAL); + return (EINVAL); /* Quit if not a unread log */ if (!(eeprom[count].logs & BYPASS_LOG_CLEAR_M)) @@ -696,21 +687,21 @@ ixgbe_bp_log(SYSCTL_HANDLER_ARGS) ixgbe_bypass_mutex_clear(adapter); if (error) - return (-EINVAL); + return (EINVAL); } status = 0; /* reset */ /* Another log command can now run */ while (atomic_cmpset_int(&adapter->bypass.log, 1, 0) == 0) usec_delay(3000); - return(error); + return (error); unlock_err: ixgbe_bypass_mutex_clear(adapter); status = 0; /* reset */ while (atomic_cmpset_int(&adapter->bypass.log, 1, 0) == 0) usec_delay(3000); - return (-EINVAL); + return (EINVAL); } /* ixgbe_bp_log */ /************************************************************************ @@ -802,7 +793,5 @@ ixgbe_bypass_init(struct adapter *adapter) adapter, 0, ixgbe_bp_wd_reset, "S", "Bypass WD Reset"); adapter->feat_en |= IXGBE_FEATURE_BYPASS; - - return; } /* ixgbe_bypass_init */ diff --git a/sys/dev/ixgbe/if_fdir.c b/sys/dev/ixgbe/if_fdir.c index fa6e4ac30cb..09a5b70464a 100644 --- a/sys/dev/ixgbe/if_fdir.c +++ b/sys/dev/ixgbe/if_fdir.c @@ -50,10 +50,11 @@ ixgbe_init_fdir(struct adapter *adapter) } /* ixgbe_init_fdir */ void -ixgbe_reinit_fdir(void *context, int pending) +ixgbe_reinit_fdir(void *context) { - struct adapter *adapter = context; - struct ifnet *ifp = adapter->ifp; + if_ctx_t ctx = context; + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); if (!(adapter->feat_en & IXGBE_FEATURE_FDIR)) return; @@ -146,9 +147,9 @@ ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) /* TASK_INIT needs this function defined regardless if it's enabled */ void -ixgbe_reinit_fdir(void *context, int pending) +ixgbe_reinit_fdir(void *context) { - UNREFERENCED_2PARAMETER(context, pending); + UNREFERENCED_PARAMETER(context); } /* ixgbe_reinit_fdir */ void diff --git a/sys/dev/ixgbe/if_ix.c b/sys/dev/ixgbe/if_ix.c index 46338a76e47..526b217334a 100644 --- a/sys/dev/ixgbe/if_ix.c +++ b/sys/dev/ixgbe/if_ix.c @@ -33,18 +33,21 @@ /*$FreeBSD$*/ -#ifndef IXGBE_STANDALONE_BUILD #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" -#endif #include "ixgbe.h" +#include "ixgbe_sriov.h" +#include "ifdi_if.h" + +#include +#include /************************************************************************ * Driver version ************************************************************************/ -char ixgbe_driver_version[] = "3.2.12-k"; +char ixgbe_driver_version[] = "4.0.0-k"; /************************************************************************ @@ -56,180 +59,173 @@ char ixgbe_driver_version[] = "3.2.12-k"; * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } ************************************************************************/ -static ixgbe_vendor_info_t ixgbe_vendor_info_array[] = +static pci_vendor_info_t ixgbe_vendor_info_array[] = { - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_QSFP_SF_QP, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T1, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KR, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KX4, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_10G_T, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_1G_T, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_SFP, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_KR, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_KR_L, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SFP, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SFP_N, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SGMII, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SGMII_L, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_10G_T, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_1G_T, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_1G_T_L, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_BYPASS, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BYPASS, 0, 0, 0}, + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_QSFP_SF_QP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T1, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KR, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KX4, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_10G_T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_SFP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_KR, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_KR_L, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SFP, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SFP_N, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SGMII, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SGMII_L, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_10G_T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_1G_T, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_1G_T_L, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_BYPASS, "Intel(R) PRO/10GbE PCI-Express Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BYPASS, "Intel(R) PRO/10GbE PCI-Express Network Driver"), /* required last entry */ - {0, 0, 0, 0, 0} + PVID_END }; -/************************************************************************ - * Table of branding strings - ************************************************************************/ -static char *ixgbe_strings[] = { - "Intel(R) PRO/10GbE PCI-Express Network Driver" -}; +static void *ixgbe_register(device_t dev); +static int ixgbe_if_attach_pre(if_ctx_t ctx); +static int ixgbe_if_attach_post(if_ctx_t ctx); +static int ixgbe_if_detach(if_ctx_t ctx); +static int ixgbe_if_shutdown(if_ctx_t ctx); +static int ixgbe_if_suspend(if_ctx_t ctx); +static int ixgbe_if_resume(if_ctx_t ctx); + +static void ixgbe_if_stop(if_ctx_t ctx); +void ixgbe_if_enable_intr(if_ctx_t ctx); +static void ixgbe_if_disable_intr(if_ctx_t ctx); +static int ixgbe_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid); +static void ixgbe_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr); +static int ixgbe_if_media_change(if_ctx_t ctx); +static int ixgbe_if_msix_intr_assign(if_ctx_t, int); +static int ixgbe_if_mtu_set(if_ctx_t ctx, uint32_t mtu); +static void ixgbe_if_crcstrip_set(if_ctx_t ctx, int onoff, int strip); +static void ixgbe_if_multi_set(if_ctx_t ctx); +static int ixgbe_if_promisc_set(if_ctx_t ctx, int flags); +static int ixgbe_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, + uint64_t *paddrs, int nrxqs, int nrxqsets); +static int ixgbe_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, + uint64_t *paddrs, int nrxqs, int nrxqsets); +static void ixgbe_if_queues_free(if_ctx_t ctx); +static void ixgbe_if_timer(if_ctx_t ctx, uint16_t); +static void ixgbe_if_update_admin_status(if_ctx_t ctx); +static void ixgbe_if_vlan_register(if_ctx_t ctx, u16 vtag); +static void ixgbe_if_vlan_unregister(if_ctx_t ctx, u16 vtag); + +int ixgbe_intr(void *arg); /************************************************************************ * Function prototypes ************************************************************************/ -static int ixgbe_probe(device_t); -static int ixgbe_attach(device_t); -static int ixgbe_detach(device_t); -static int ixgbe_shutdown(device_t); -static int ixgbe_suspend(device_t); -static int ixgbe_resume(device_t); -static int ixgbe_ioctl(struct ifnet *, u_long, caddr_t); -static void ixgbe_init(void *); -static void ixgbe_init_locked(struct adapter *); -static void ixgbe_stop(void *); #if __FreeBSD_version >= 1100036 -static uint64_t ixgbe_get_counter(struct ifnet *, ift_counter); +static uint64_t ixgbe_if_get_counter(if_ctx_t, ift_counter); #endif -static void ixgbe_init_device_features(struct adapter *); -static void ixgbe_check_fan_failure(struct adapter *, u32, bool); -static void ixgbe_add_media_types(struct adapter *); -static void ixgbe_media_status(struct ifnet *, struct ifmediareq *); -static int ixgbe_media_change(struct ifnet *); -static int ixgbe_allocate_pci_resources(struct adapter *); -static void ixgbe_get_slot_info(struct adapter *); -static int ixgbe_allocate_msix(struct adapter *); -static int ixgbe_allocate_legacy(struct adapter *); -static int ixgbe_configure_interrupts(struct adapter *); -static void ixgbe_free_pci_resources(struct adapter *); -static void ixgbe_local_timer(void *); -static int ixgbe_setup_interface(device_t, struct adapter *); -static void ixgbe_config_gpie(struct adapter *); -static void ixgbe_config_dmac(struct adapter *); -static void ixgbe_config_delay_values(struct adapter *); -static void ixgbe_config_link(struct adapter *); -static void ixgbe_check_wol_support(struct adapter *); -static int ixgbe_setup_low_power_mode(struct adapter *); -static void ixgbe_rearm_queues(struct adapter *, u64); -static void ixgbe_initialize_transmit_units(struct adapter *); -static void ixgbe_initialize_receive_units(struct adapter *); -static void ixgbe_enable_rx_drop(struct adapter *); -static void ixgbe_disable_rx_drop(struct adapter *); -static void ixgbe_initialize_rss_mapping(struct adapter *); +static void ixgbe_enable_queue(struct adapter *adapter, u32 vector); +static void ixgbe_disable_queue(struct adapter *adapter, u32 vector); +static void ixgbe_add_device_sysctls(if_ctx_t ctx); +static int ixgbe_allocate_pci_resources(if_ctx_t ctx); +static int ixgbe_setup_low_power_mode(if_ctx_t ctx); -static void ixgbe_enable_intr(struct adapter *); -static void ixgbe_disable_intr(struct adapter *); -static void ixgbe_update_stats_counters(struct adapter *); -static void ixgbe_set_promisc(struct adapter *); -static void ixgbe_set_multi(struct adapter *); -static void ixgbe_update_link_status(struct adapter *); -static void ixgbe_set_ivar(struct adapter *, u8, u8, s8); -static void ixgbe_configure_ivars(struct adapter *); -static u8 *ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); +static void ixgbe_config_dmac(struct adapter *adapter); +static void ixgbe_configure_ivars(struct adapter *adapter); +static void ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, + s8 type); +static u8 *ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); +static bool ixgbe_sfp_probe(if_ctx_t ctx); -static void ixgbe_setup_vlan_hw_support(struct adapter *); -static void ixgbe_register_vlan(void *, struct ifnet *, u16); -static void ixgbe_unregister_vlan(void *, struct ifnet *, u16); +static void ixgbe_free_pci_resources(if_ctx_t ctx); -static void ixgbe_add_device_sysctls(struct adapter *); -static void ixgbe_add_hw_stats(struct adapter *); -static int ixgbe_set_flowcntl(struct adapter *, int); -static int ixgbe_set_advertise(struct adapter *, int); -static int ixgbe_get_advertise(struct adapter *); +static int ixgbe_msix_link(void *arg); +static int ixgbe_msix_que(void *arg); +static void ixgbe_initialize_rss_mapping(struct adapter *adapter); +static void ixgbe_initialize_receive_units(if_ctx_t ctx); +static void ixgbe_initialize_transmit_units(if_ctx_t ctx); + +static int ixgbe_setup_interface(if_ctx_t ctx); +static void ixgbe_init_device_features(struct adapter *adapter); +static void ixgbe_check_fan_failure(struct adapter *, u32, bool); +static void ixgbe_add_media_types(if_ctx_t ctx); +static void ixgbe_update_stats_counters(struct adapter *adapter); +static void ixgbe_config_link(struct adapter *adapter); +static void ixgbe_get_slot_info(struct adapter *); +static void ixgbe_check_wol_support(struct adapter *adapter); +static void ixgbe_enable_rx_drop(struct adapter *); +static void ixgbe_disable_rx_drop(struct adapter *); + +static void ixgbe_add_hw_stats(struct adapter *adapter); +static int ixgbe_set_flowcntl(struct adapter *, int); +static int ixgbe_set_advertise(struct adapter *, int); +static int ixgbe_get_advertise(struct adapter *); +static void ixgbe_setup_vlan_hw_support(if_ctx_t ctx); +static void ixgbe_config_gpie(struct adapter *adapter); +static void ixgbe_config_delay_values(struct adapter *adapter); /* Sysctl handlers */ -static void ixgbe_set_sysctl_value(struct adapter *, const char *, - const char *, int *, int); -static int ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS); #ifdef IXGBE_DEBUG -static int ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS); #endif -static int ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_eee_state(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS); -static int ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS); - -/* Support for pluggable optic modules */ -static bool ixgbe_sfp_probe(struct adapter *); - -/* Legacy (single vector) interrupt handler */ -static void ixgbe_legacy_irq(void *); - -/* The MSI/MSI-X Interrupt handlers */ -static void ixgbe_msix_que(void *); -static void ixgbe_msix_link(void *); +static int ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_eee_state(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS); +static int ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS); /* Deferred interrupt tasklets */ -static void ixgbe_handle_que(void *, int); -static void ixgbe_handle_link(void *, int); -static void ixgbe_handle_msf(void *, int); -static void ixgbe_handle_mod(void *, int); -static void ixgbe_handle_phy(void *, int); - +static void ixgbe_handle_msf(void *); +static void ixgbe_handle_mod(void *); +static void ixgbe_handle_phy(void *); /************************************************************************ * FreeBSD Device Interface Entry Points ************************************************************************/ static device_method_t ix_methods[] = { /* Device interface */ - DEVMETHOD(device_probe, ixgbe_probe), - DEVMETHOD(device_attach, ixgbe_attach), - DEVMETHOD(device_detach, ixgbe_detach), - DEVMETHOD(device_shutdown, ixgbe_shutdown), - DEVMETHOD(device_suspend, ixgbe_suspend), - DEVMETHOD(device_resume, ixgbe_resume), + DEVMETHOD(device_register, ixgbe_register), + DEVMETHOD(device_probe, iflib_device_probe), + DEVMETHOD(device_attach, iflib_device_attach), + DEVMETHOD(device_detach, iflib_device_detach), + DEVMETHOD(device_shutdown, iflib_device_shutdown), + DEVMETHOD(device_suspend, iflib_device_suspend), + DEVMETHOD(device_resume, iflib_device_resume), #ifdef PCI_IOV - DEVMETHOD(pci_iov_init, ixgbe_init_iov), - DEVMETHOD(pci_iov_uninit, ixgbe_uninit_iov), - DEVMETHOD(pci_iov_add_vf, ixgbe_add_vf), + DEVMETHOD(pci_iov_init, iflib_device_iov_init), + DEVMETHOD(pci_iov_uninit, iflib_device_iov_uninit), + DEVMETHOD(pci_iov_add_vf, iflib_device_iov_add_vf), #endif /* PCI_IOV */ DEVMETHOD_END }; @@ -243,41 +239,57 @@ DRIVER_MODULE(ix, pci, ix_driver, ix_devclass, 0, 0); MODULE_DEPEND(ix, pci, 1, 1, 1); MODULE_DEPEND(ix, ether, 1, 1, 1); -#ifdef DEV_NETMAP -MODULE_DEPEND(ix, netmap, 1, 1, 1); -#endif +MODULE_DEPEND(ix, iflib, 1, 1, 1); + +static device_method_t ixgbe_if_methods[] = { + DEVMETHOD(ifdi_attach_pre, ixgbe_if_attach_pre), + DEVMETHOD(ifdi_attach_post, ixgbe_if_attach_post), + DEVMETHOD(ifdi_detach, ixgbe_if_detach), + DEVMETHOD(ifdi_shutdown, ixgbe_if_shutdown), + DEVMETHOD(ifdi_suspend, ixgbe_if_suspend), + DEVMETHOD(ifdi_resume, ixgbe_if_resume), + DEVMETHOD(ifdi_init, ixgbe_if_init), + DEVMETHOD(ifdi_stop, ixgbe_if_stop), + DEVMETHOD(ifdi_msix_intr_assign, ixgbe_if_msix_intr_assign), + DEVMETHOD(ifdi_intr_enable, ixgbe_if_enable_intr), + DEVMETHOD(ifdi_intr_disable, ixgbe_if_disable_intr), + DEVMETHOD(ifdi_tx_queue_intr_enable, ixgbe_if_rx_queue_intr_enable), + DEVMETHOD(ifdi_rx_queue_intr_enable, ixgbe_if_rx_queue_intr_enable), + DEVMETHOD(ifdi_tx_queues_alloc, ixgbe_if_tx_queues_alloc), + DEVMETHOD(ifdi_rx_queues_alloc, ixgbe_if_rx_queues_alloc), + DEVMETHOD(ifdi_queues_free, ixgbe_if_queues_free), + DEVMETHOD(ifdi_update_admin_status, ixgbe_if_update_admin_status), + DEVMETHOD(ifdi_multi_set, ixgbe_if_multi_set), + DEVMETHOD(ifdi_mtu_set, ixgbe_if_mtu_set), + DEVMETHOD(ifdi_crcstrip_set, ixgbe_if_crcstrip_set), + DEVMETHOD(ifdi_media_status, ixgbe_if_media_status), + DEVMETHOD(ifdi_media_change, ixgbe_if_media_change), + DEVMETHOD(ifdi_promisc_set, ixgbe_if_promisc_set), + DEVMETHOD(ifdi_timer, ixgbe_if_timer), + DEVMETHOD(ifdi_vlan_register, ixgbe_if_vlan_register), + DEVMETHOD(ifdi_vlan_unregister, ixgbe_if_vlan_unregister), + DEVMETHOD(ifdi_get_counter, ixgbe_if_get_counter), +#ifdef PCI_IOV + DEVMETHOD(ifdi_iov_init, ixgbe_if_iov_init), + DEVMETHOD(ifdi_iov_uninit, ixgbe_if_iov_uninit), + DEVMETHOD(ifdi_iov_vf_add, ixgbe_if_iov_vf_add), +#endif /* PCI_IOV */ + DEVMETHOD_END +}; /* * TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0, "IXGBE driver parameters"); - -/* - * AIM: Adaptive Interrupt Moderation - * which means that the interrupt rate - * is varied over time based on the - * traffic for that interrupt vector - */ -static int ixgbe_enable_aim = TRUE; -SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RDTUN, &ixgbe_enable_aim, 0, - "Enable adaptive interrupt moderation"); +static driver_t ixgbe_if_driver = { + "ixgbe_if", ixgbe_if_methods, sizeof(struct adapter) +}; static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY); SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second"); -/* How many packets rxeof tries to clean at a time */ -static int ixgbe_rx_process_limit = 256; -SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, - &ixgbe_rx_process_limit, 0, "Maximum number of received packets to process at a time, -1 means unlimited"); - -/* How many packets txeof tries to clean at a time */ -static int ixgbe_tx_process_limit = 256; -SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN, - &ixgbe_tx_process_limit, 0, - "Maximum number of sent packets to process at a time, -1 means unlimited"); - /* Flow control setting, default to full */ static int ixgbe_flow_control = ixgbe_fc_full; SYSCTL_INT(_hw_ix, OID_AUTO, flow_control, CTLFLAG_RDTUN, @@ -305,30 +317,6 @@ static int ixgbe_enable_msix = 1; SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0, "Enable MSI-X interrupts"); -/* - * Number of Queues, can be set to 0, - * it then autoconfigures based on the - * number of cpus with a max of 8. This - * can be overriden manually here. - */ -static int ixgbe_num_queues = 0; -SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0, - "Number of queues to configure, 0 indicates autoconfigure"); - -/* - * Number of TX descriptors per ring, - * setting higher than RX as this seems - * the better performing choice. - */ -static int ixgbe_txd = PERFORM_TXD; -SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0, - "Number of transmit descriptors per queue"); - -/* Number of RX descriptors per ring */ -static int ixgbe_rxd = PERFORM_RXD; -SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0, - "Number of receive descriptors per queue"); - /* * Defining this on will allow the use * of unsupported SFP+ modules, note that @@ -347,24 +335,217 @@ static int ixgbe_enable_fdir = 0; SYSCTL_INT(_hw_ix, OID_AUTO, enable_fdir, CTLFLAG_RDTUN, &ixgbe_enable_fdir, 0, "Enable Flow Director"); -/* Legacy Transmit (single queue) */ -static int ixgbe_enable_legacy_tx = 0; -SYSCTL_INT(_hw_ix, OID_AUTO, enable_legacy_tx, CTLFLAG_RDTUN, - &ixgbe_enable_legacy_tx, 0, "Enable Legacy TX flow"); - /* Receive-Side Scaling */ static int ixgbe_enable_rss = 1; SYSCTL_INT(_hw_ix, OID_AUTO, enable_rss, CTLFLAG_RDTUN, &ixgbe_enable_rss, 0, "Enable Receive-Side Scaling (RSS)"); +#if 0 /* Keep running tab on them for sanity check */ static int ixgbe_total_ports; - -static int (*ixgbe_start_locked)(struct ifnet *, struct tx_ring *); -static int (*ixgbe_ring_empty)(struct ifnet *, struct buf_ring *); +#endif MALLOC_DEFINE(M_IXGBE, "ix", "ix driver allocations"); +/* + * For Flow Director: this is the number of TX packets we sample + * for the filter pool, this means every 20th packet will be probed. + * + * This feature can be disabled by setting this to 0. + */ +static int atr_sample_rate = 20; + +extern struct if_txrx ixgbe_txrx; + +static struct if_shared_ctx ixgbe_sctx_init = { + .isc_magic = IFLIB_MAGIC, + .isc_q_align = PAGE_SIZE,/* max(DBA_ALIGN, PAGE_SIZE) */ + .isc_tx_maxsize = IXGBE_TSO_SIZE, + + .isc_tx_maxsegsize = PAGE_SIZE, + + .isc_rx_maxsize = PAGE_SIZE*4, + .isc_rx_nsegments = 1, + .isc_rx_maxsegsize = PAGE_SIZE*4, + .isc_nfl = 1, + .isc_ntxqs = 1, + .isc_nrxqs = 1, + + .isc_admin_intrcnt = 1, + .isc_vendor_info = ixgbe_vendor_info_array, + .isc_driver_version = ixgbe_driver_version, + .isc_driver = &ixgbe_if_driver, + + .isc_nrxd_min = {MIN_RXD}, + .isc_ntxd_min = {MIN_TXD}, + .isc_nrxd_max = {MAX_RXD}, + .isc_ntxd_max = {MAX_TXD}, + .isc_nrxd_default = {DEFAULT_RXD}, + .isc_ntxd_default = {DEFAULT_TXD}, +}; + +if_shared_ctx_t ixgbe_sctx = &ixgbe_sctx_init; + +/************************************************************************ + * ixgbe_if_tx_queues_alloc + ************************************************************************/ +static int +ixgbe_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, + int ntxqs, int ntxqsets) +{ + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = adapter->shared; + struct ix_tx_queue *que; + int i, j, error; + + MPASS(adapter->num_tx_queues > 0); + MPASS(adapter->num_tx_queues == ntxqsets); + MPASS(ntxqs == 1); + + /* Allocate queue structure memory */ + adapter->tx_queues = + (struct ix_tx_queue *)malloc(sizeof(struct ix_tx_queue) * ntxqsets, + M_IXGBE, M_NOWAIT | M_ZERO); + if (!adapter->tx_queues) { + device_printf(iflib_get_dev(ctx), + "Unable to allocate TX ring memory\n"); + return (ENOMEM); + } + + for (i = 0, que = adapter->tx_queues; i < ntxqsets; i++, que++) { + struct tx_ring *txr = &que->txr; + + /* In case SR-IOV is enabled, align the index properly */ + txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool, + i); + + txr->adapter = que->adapter = adapter; + adapter->active_queues |= (u64)1 << txr->me; + + /* Allocate report status array */ + txr->tx_rsq = (qidx_t *)malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_IXGBE, M_NOWAIT | M_ZERO); + if (txr->tx_rsq == NULL) { + error = ENOMEM; + goto fail; + } + for (j = 0; j < scctx->isc_ntxd[0]; j++) + txr->tx_rsq[j] = QIDX_INVALID; + /* get the virtual and physical address of the hardware queues */ + txr->tail = IXGBE_TDT(txr->me); + txr->tx_base = (union ixgbe_adv_tx_desc *)vaddrs[i]; + txr->tx_paddr = paddrs[i]; + + txr->bytes = 0; + txr->total_packets = 0; + + /* Set the rate at which we sample packets */ + if (adapter->feat_en & IXGBE_FEATURE_FDIR) + txr->atr_sample = atr_sample_rate; + + } + + iflib_config_gtask_init(ctx, &adapter->mod_task, ixgbe_handle_mod, + "mod_task"); + iflib_config_gtask_init(ctx, &adapter->msf_task, ixgbe_handle_msf, + "msf_task"); + iflib_config_gtask_init(ctx, &adapter->phy_task, ixgbe_handle_phy, + "phy_task"); + if (adapter->feat_cap & IXGBE_FEATURE_SRIOV) + iflib_config_gtask_init(ctx, &adapter->mbx_task, + ixgbe_handle_mbx, "mbx_task"); + if (adapter->feat_en & IXGBE_FEATURE_FDIR) + iflib_config_gtask_init(ctx, &adapter->fdir_task, + ixgbe_reinit_fdir, "fdir_task"); + + device_printf(iflib_get_dev(ctx), "allocated for %d queues\n", + adapter->num_tx_queues); + + return (0); + +fail: + ixgbe_if_queues_free(ctx); + + return (error); +} /* ixgbe_if_tx_queues_alloc */ + +/************************************************************************ + * ixgbe_if_rx_queues_alloc + ************************************************************************/ +static int +ixgbe_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, + int nrxqs, int nrxqsets) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_rx_queue *que; + int i; + + MPASS(adapter->num_rx_queues > 0); + MPASS(adapter->num_rx_queues == nrxqsets); + MPASS(nrxqs == 1); + + /* Allocate queue structure memory */ + adapter->rx_queues = + (struct ix_rx_queue *)malloc(sizeof(struct ix_rx_queue)*nrxqsets, + M_IXGBE, M_NOWAIT | M_ZERO); + if (!adapter->rx_queues) { + device_printf(iflib_get_dev(ctx), + "Unable to allocate TX ring memory\n"); + return (ENOMEM); + } + + for (i = 0, que = adapter->rx_queues; i < nrxqsets; i++, que++) { + struct rx_ring *rxr = &que->rxr; + + /* In case SR-IOV is enabled, align the index properly */ + rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool, + i); + + rxr->adapter = que->adapter = adapter; + + /* get the virtual and physical address of the hw queues */ + rxr->tail = IXGBE_RDT(rxr->me); + rxr->rx_base = (union ixgbe_adv_rx_desc *)vaddrs[i]; + rxr->rx_paddr = paddrs[i]; + rxr->bytes = 0; + rxr->que = que; + } + + device_printf(iflib_get_dev(ctx), "allocated for %d rx queues\n", + adapter->num_rx_queues); + + return (0); +} /* ixgbe_if_rx_queues_alloc */ + +/************************************************************************ + * ixgbe_if_queues_free + ************************************************************************/ +static void +ixgbe_if_queues_free(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_tx_queue *tx_que = adapter->tx_queues; + struct ix_rx_queue *rx_que = adapter->rx_queues; + int i; + + if (tx_que != NULL) { + for (i = 0; i < adapter->num_tx_queues; i++, tx_que++) { + struct tx_ring *txr = &tx_que->txr; + if (txr->tx_rsq == NULL) + break; + + free(txr->tx_rsq, M_IXGBE); + txr->tx_rsq = NULL; + } + + free(adapter->tx_queues, M_IXGBE); + adapter->tx_queues = NULL; + } + if (rx_que != NULL) { + free(adapter->rx_queues, M_IXGBE); + adapter->rx_queues = NULL; + } +} /* ixgbe_if_queues_free */ + /************************************************************************ * ixgbe_initialize_rss_mapping ************************************************************************/ @@ -403,17 +584,17 @@ ixgbe_initialize_rss_mapping(struct adapter *adapter) /* Set up the redirection table */ for (i = 0, j = 0; i < table_size; i++, j++) { - if (j == adapter->num_queues) + if (j == adapter->num_rx_queues) j = 0; if (adapter->feat_en & IXGBE_FEATURE_RSS) { /* * Fetch the RSS bucket id for the given indirection * entry. Cap it at the number of configured buckets - * (which is num_queues.) + * (which is num_rx_queues.) */ queue_id = rss_get_indirection_to_bucket(i); - queue_id = queue_id % adapter->num_queues; + queue_id = queue_id % adapter->num_rx_queues; } else queue_id = (j * index_mult); @@ -483,14 +664,16 @@ ixgbe_initialize_rss_mapping(struct adapter *adapter) #define BSIZEPKT_ROUNDUP ((1<rx_rings; - struct ixgbe_hw *hw = &adapter->hw; - struct ifnet *ifp = adapter->ifp; - int i, j; - u32 bufsz, fctrl, srrctl, rxcsum; - u32 hlreg; + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = adapter->shared; + struct ixgbe_hw *hw = &adapter->hw; + struct ifnet *ifp = iflib_get_ifp(ctx); + struct ix_rx_queue *que; + int i, j; + u32 bufsz, fctrl, srrctl, rxcsum; + u32 hlreg; /* * Make sure receives are disabled while @@ -513,24 +696,16 @@ ixgbe_initialize_receive_units(struct adapter *adapter) hlreg |= IXGBE_HLREG0_JUMBOEN; else hlreg &= ~IXGBE_HLREG0_JUMBOEN; - -#ifdef DEV_NETMAP - /* CRC stripping is conditional in Netmap */ - if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && - (ifp->if_capenable & IFCAP_NETMAP) && - !ix_crcstrip) - hlreg &= ~IXGBE_HLREG0_RXCRCSTRP; - else -#endif /* DEV_NETMAP */ - hlreg |= IXGBE_HLREG0_RXCRCSTRP; - IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); bufsz = (adapter->rx_mbuf_sz + BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; - for (i = 0; i < adapter->num_queues; i++, rxr++) { - u64 rdba = rxr->rxdma.dma_paddr; + /* Setup the Base and Length of the Rx Descriptor Ring */ + for (i = 0, que = adapter->rx_queues; i < adapter->num_rx_queues; i++, que++) { + struct rx_ring *rxr = &que->rxr; + u64 rdba = rxr->rx_paddr; + j = rxr->me; /* Setup the Base and Length of the Rx Descriptor Ring */ @@ -538,7 +713,7 @@ ixgbe_initialize_receive_units(struct adapter *adapter) (rdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_RDBAH(j), (rdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_RDLEN(j), - adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc)); + scctx->isc_nrxd[0] * sizeof(union ixgbe_adv_rx_desc)); /* Set up the SRRCTL register */ srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(j)); @@ -553,7 +728,7 @@ ixgbe_initialize_receive_units(struct adapter *adapter) * so we do not need to clear the bit, but do it just in case * this code is moved elsewhere. */ - if (adapter->num_queues > 1 && + if (adapter->num_rx_queues > 1 && adapter->hw.fc.requested_mode == ixgbe_fc_none) { srrctl |= IXGBE_SRRCTL_DROP_EN; } else { @@ -582,7 +757,7 @@ ixgbe_initialize_receive_units(struct adapter *adapter) ixgbe_initialize_rss_mapping(adapter); - if (adapter->num_queues > 1) { + if (adapter->num_rx_queues > 1) { /* RSS and RX IPP Checksum are mutually exclusive */ rxcsum |= IXGBE_RXCSUM_PCSD; } @@ -596,21 +771,25 @@ ixgbe_initialize_receive_units(struct adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); - return; } /* ixgbe_initialize_receive_units */ /************************************************************************ * ixgbe_initialize_transmit_units - Enable transmit units. ************************************************************************/ static void -ixgbe_initialize_transmit_units(struct adapter *adapter) +ixgbe_initialize_transmit_units(if_ctx_t ctx) { - struct tx_ring *txr = adapter->tx_rings; - struct ixgbe_hw *hw = &adapter->hw; + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + if_softc_ctx_t scctx = adapter->shared; + struct ix_tx_queue *que; + int i; /* Setup the Base and Length of the Tx Descriptor Ring */ - for (int i = 0; i < adapter->num_queues; i++, txr++) { - u64 tdba = txr->txdma.dma_paddr; + for (i = 0, que = adapter->tx_queues; i < adapter->num_tx_queues; + i++, que++) { + struct tx_ring *txr = &que->txr; + u64 tdba = txr->tx_paddr; u32 txctrl = 0; int j = txr->me; @@ -618,14 +797,16 @@ ixgbe_initialize_transmit_units(struct adapter *adapter) (tdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_TDBAH(j), (tdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_TDLEN(j), - adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc)); + scctx->isc_ntxd[0] * sizeof(union ixgbe_adv_tx_desc)); /* Setup the HW Tx Head and Tail descriptor pointers */ IXGBE_WRITE_REG(hw, IXGBE_TDH(j), 0); IXGBE_WRITE_REG(hw, IXGBE_TDT(j), 0); /* Cache the tail address */ - txr->tail = IXGBE_TDT(j); + txr->tx_rs_cidx = txr->tx_rs_pidx = txr->tx_cidx_processed = 0; + for (int k = 0; k < scctx->isc_ntxd[0]; k++) + txr->tx_rsq[k] = QIDX_INVALID; /* Disable Head Writeback */ /* @@ -669,22 +850,32 @@ ixgbe_initialize_transmit_units(struct adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); } - return; } /* ixgbe_initialize_transmit_units */ /************************************************************************ - * ixgbe_attach - Device initialization routine + * ixgbe_register + ************************************************************************/ +static void * +ixgbe_register(device_t dev) +{ + return (ixgbe_sctx); +} /* ixgbe_register */ + +/************************************************************************ + * ixgbe_if_attach_pre - Device initialization routine, part 1 * * Called when the driver is being loaded. - * Identifies the type of hardware, allocates all resources - * and initializes the hardware. + * Identifies the type of hardware, initializes the hardware, + * and initializes iflib structures. * * return 0 on success, positive on failure ************************************************************************/ static int -ixgbe_attach(device_t dev) +ixgbe_if_attach_pre(if_ctx_t ctx) { struct adapter *adapter; + device_t dev; + if_softc_ctx_t scctx; struct ixgbe_hw *hw; int error = 0; u32 ctrl_ext; @@ -692,17 +883,15 @@ ixgbe_attach(device_t dev) INIT_DEBUGOUT("ixgbe_attach: begin"); /* Allocate, clear, and link in our adapter structure */ - adapter = device_get_softc(dev); + dev = iflib_get_dev(ctx); + adapter = iflib_get_softc(ctx); adapter->hw.back = adapter; + adapter->ctx = ctx; adapter->dev = dev; + scctx = adapter->shared = iflib_get_softc_ctx(ctx); + adapter->media = iflib_get_media(ctx); hw = &adapter->hw; - /* Core Lock Init*/ - IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); - - /* Set up the timer callout */ - callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); - /* Determine hardware revision */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); @@ -710,16 +899,10 @@ ixgbe_attach(device_t dev) hw->subsystem_vendor_id = pci_get_subvendor(dev); hw->subsystem_device_id = pci_get_subdevice(dev); - /* - * Make sure BUSMASTER is set - */ - pci_enable_busmaster(dev); - /* Do base PCI setup - map BAR0 */ - if (ixgbe_allocate_pci_resources(adapter)) { + if (ixgbe_allocate_pci_resources(ctx)) { device_printf(dev, "Allocation of PCI resources failed\n"); - error = ENXIO; - goto err_out; + return (ENXIO); } /* let hardware know driver is loaded */ @@ -730,10 +913,10 @@ ixgbe_attach(device_t dev) /* * Initialize the shared code */ - if (ixgbe_init_shared_code(hw)) { + if (ixgbe_init_shared_code(hw) != 0) { device_printf(dev, "Unable to initialize the shared code\n"); error = ENXIO; - goto err_out; + goto err_pci; } if (hw->mbx.ops.init_params) @@ -741,38 +924,14 @@ ixgbe_attach(device_t dev) hw->allow_unsupported_sfp = allow_unsupported_sfp; - /* Pick up the 82599 settings */ - if (hw->mac.type != ixgbe_mac_82598EB) { + if (hw->mac.type != ixgbe_mac_82598EB) hw->phy.smart_speed = ixgbe_smart_speed; - adapter->num_segs = IXGBE_82599_SCATTER; - } else - adapter->num_segs = IXGBE_82598_SCATTER; ixgbe_init_device_features(adapter); - if (ixgbe_configure_interrupts(adapter)) { - error = ENXIO; - goto err_out; - } - - /* Allocate multicast array memory. */ - adapter->mta = malloc(sizeof(*adapter->mta) * - MAX_NUM_MULTICAST_ADDRESSES, M_IXGBE, M_NOWAIT); - if (adapter->mta == NULL) { - device_printf(dev, "Can not allocate multicast setup array\n"); - error = ENOMEM; - goto err_out; - } - /* Enable WoL (if supported) */ ixgbe_check_wol_support(adapter); - /* Register for VLAN events */ - adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, - ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); - adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, - ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); - /* Verify adapter fan is still functional (if applicable) */ if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) { u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); @@ -782,57 +941,9 @@ ixgbe_attach(device_t dev) /* Ensure SW/FW semaphore is free */ ixgbe_init_swfw_semaphore(hw); - /* Enable EEE power saving */ - if (adapter->feat_en & IXGBE_FEATURE_EEE) - hw->mac.ops.setup_eee(hw, TRUE); - /* Set an initial default flow control value */ hw->fc.requested_mode = ixgbe_flow_control; - /* Sysctls for limiting the amount of work done in the taskqueues */ - ixgbe_set_sysctl_value(adapter, "rx_processing_limit", - "max number of rx packets to process", - &adapter->rx_process_limit, ixgbe_rx_process_limit); - - ixgbe_set_sysctl_value(adapter, "tx_processing_limit", - "max number of tx packets to process", - &adapter->tx_process_limit, ixgbe_tx_process_limit); - - /* Do descriptor calc and sanity checks */ - if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 || - ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) { - device_printf(dev, "TXD config issue, using default!\n"); - adapter->num_tx_desc = DEFAULT_TXD; - } else - adapter->num_tx_desc = ixgbe_txd; - - /* - * With many RX rings it is easy to exceed the - * system mbuf allocation. Tuning nmbclusters - * can alleviate this. - */ - if (nmbclusters > 0) { - int s; - s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports; - if (s > nmbclusters) { - device_printf(dev, "RX Descriptors exceed system mbuf max, using default instead!\n"); - ixgbe_rxd = DEFAULT_RXD; - } - } - - if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 || - ixgbe_rxd < MIN_RXD || ixgbe_rxd > MAX_RXD) { - device_printf(dev, "RXD config issue, using default!\n"); - adapter->num_rx_desc = DEFAULT_RXD; - } else - adapter->num_rx_desc = ixgbe_rxd; - - /* Allocate our TX/RX Queues */ - if (ixgbe_allocate_queues(adapter)) { - error = ENOMEM; - goto err_out; - } - hw->phy.reset_if_overtemp = TRUE; error = ixgbe_reset_hw(hw); hw->phy.reset_if_overtemp = FALSE; @@ -843,35 +954,24 @@ ixgbe_attach(device_t dev) * for later insertion. */ adapter->sfp_probe = TRUE; - error = IXGBE_SUCCESS; + error = 0; } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module detected!\n"); error = EIO; - goto err_late; + goto err_pci; } else if (error) { device_printf(dev, "Hardware initialization failed\n"); error = EIO; - goto err_late; + goto err_pci; } /* Make sure we have a good EEPROM before we read from it */ if (ixgbe_validate_eeprom_checksum(&adapter->hw, NULL) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; - goto err_late; + goto err_pci; } - /* Setup OS specific network interface */ - if (ixgbe_setup_interface(dev, adapter) != 0) - goto err_late; - - if (adapter->feat_en & IXGBE_FEATURE_MSIX) - error = ixgbe_allocate_msix(adapter); - else - error = ixgbe_allocate_legacy(adapter); - if (error) - goto err_late; - error = ixgbe_start_hw(hw); switch (error) { case IXGBE_ERR_EEPROM_VERSION: @@ -880,7 +980,7 @@ ixgbe_attach(device_t dev) case IXGBE_ERR_SFP_NOT_SUPPORTED: device_printf(dev, "Unsupported SFP+ Module\n"); error = EIO; - goto err_late; + goto err_pci; case IXGBE_ERR_SFP_NOT_PRESENT: device_printf(dev, "No SFP+ Module found\n"); /* falls thru */ @@ -888,14 +988,121 @@ ixgbe_attach(device_t dev) break; } + /* Most of the iflib initialization... */ + + iflib_set_mac(ctx, hw->mac.addr); + switch (adapter->hw.mac.type) { + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_X550EM_a: + scctx->isc_rss_table_size = 512; + scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 64; + break; + default: + scctx->isc_rss_table_size = 128; + scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 16; + } + + /* Allow legacy interrupts */ + ixgbe_txrx.ift_legacy_intr = ixgbe_intr; + + scctx->isc_txqsizes[0] = + roundup2(scctx->isc_ntxd[0] * sizeof(union ixgbe_adv_tx_desc) + + sizeof(u32), DBA_ALIGN), + scctx->isc_rxqsizes[0] = + roundup2(scctx->isc_nrxd[0] * sizeof(union ixgbe_adv_rx_desc), + DBA_ALIGN); + + /* XXX */ + scctx->isc_tx_csum_flags = CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | + CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_TSO; + if (adapter->hw.mac.type == ixgbe_mac_82598EB) { + scctx->isc_tx_nsegments = IXGBE_82598_SCATTER; + scctx->isc_msix_bar = PCIR_BAR(MSIX_82598_BAR); + } else { + scctx->isc_tx_csum_flags |= CSUM_SCTP |CSUM_IP6_SCTP; + scctx->isc_tx_nsegments = IXGBE_82599_SCATTER; + scctx->isc_msix_bar = PCIR_BAR(MSIX_82599_BAR); + } + scctx->isc_tx_tso_segments_max = scctx->isc_tx_nsegments; + scctx->isc_tx_tso_size_max = IXGBE_TSO_SIZE; + scctx->isc_tx_tso_segsize_max = PAGE_SIZE; + + scctx->isc_txrx = &ixgbe_txrx; + + scctx->isc_capenable = IXGBE_CAPS; + + return (0); + +err_pci: + ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); + ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); + ixgbe_free_pci_resources(ctx); + + return (error); +} /* ixgbe_if_attach_pre */ + + /********************************************************************* + * ixgbe_if_attach_post - Device initialization routine, part 2 + * + * Called during driver load, but after interrupts and + * resources have been allocated and configured. + * Sets up some data structures not relevant to iflib. + * + * return 0 on success, positive on failure + *********************************************************************/ +static int +ixgbe_if_attach_post(if_ctx_t ctx) +{ + device_t dev; + struct adapter *adapter; + struct ixgbe_hw *hw; + int error = 0; + + dev = iflib_get_dev(ctx); + adapter = iflib_get_softc(ctx); + hw = &adapter->hw; + + + if (adapter->intr_type == IFLIB_INTR_LEGACY && + (adapter->feat_cap & IXGBE_FEATURE_LEGACY_IRQ) == 0) { + device_printf(dev, "Device does not support legacy interrupts"); + error = ENXIO; + goto err; + } + + /* Allocate multicast array memory. */ + adapter->mta = malloc(sizeof(*adapter->mta) * + MAX_NUM_MULTICAST_ADDRESSES, M_IXGBE, M_NOWAIT); + if (adapter->mta == NULL) { + device_printf(dev, "Can not allocate multicast setup array\n"); + error = ENOMEM; + goto err; + } + + /* hw.ix defaults init */ + ixgbe_set_advertise(adapter, ixgbe_advertise_speed); + /* Enable the optics for 82599 SFP+ fiber */ ixgbe_enable_tx_laser(hw); /* Enable power to the phy. */ ixgbe_set_phy_power(hw, TRUE); + ixgbe_initialize_iov(adapter); + + error = ixgbe_setup_interface(ctx); + if (error) { + device_printf(dev, "Interface setup failed: %d\n", error); + goto err; + } + + ixgbe_if_update_admin_status(ctx); + /* Initialize statistics */ ixgbe_update_stats_counters(adapter); + ixgbe_add_hw_stats(adapter); /* Check PCIE slot type/speed/width */ ixgbe_get_slot_info(adapter); @@ -915,36 +1122,12 @@ ixgbe_attach(device_t dev) ixgbe_define_iov_schemas(dev, &error); /* Add sysctls */ - ixgbe_add_device_sysctls(adapter); - ixgbe_add_hw_stats(adapter); - - /* For Netmap */ - adapter->init_locked = ixgbe_init_locked; - adapter->stop_locked = ixgbe_stop; - - if (adapter->feat_en & IXGBE_FEATURE_NETMAP) - ixgbe_netmap_attach(adapter); - - INIT_DEBUGOUT("ixgbe_attach: end"); + ixgbe_add_device_sysctls(ctx); return (0); - -err_late: - ixgbe_free_transmit_structures(adapter); - ixgbe_free_receive_structures(adapter); - free(adapter->queues, M_DEVBUF); -err_out: - if (adapter->ifp != NULL) - if_free(adapter->ifp); - ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); - ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; - IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); - ixgbe_free_pci_resources(adapter); - free(adapter->mta, M_IXGBE); - IXGBE_CORE_LOCK_DESTROY(adapter); - +err: return (error); -} /* ixgbe_attach */ +} /* ixgbe_if_attach_post */ /************************************************************************ * ixgbe_check_wol_support @@ -981,70 +1164,18 @@ ixgbe_check_wol_support(struct adapter *adapter) * Setup networking device structure and register an interface. ************************************************************************/ static int -ixgbe_setup_interface(device_t dev, struct adapter *adapter) +ixgbe_setup_interface(if_ctx_t ctx) { - struct ifnet *ifp; + struct ifnet *ifp = iflib_get_ifp(ctx); + struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("ixgbe_setup_interface: begin"); - ifp = adapter->ifp = if_alloc(IFT_ETHER); - if (ifp == NULL) { - device_printf(dev, "can not allocate ifnet structure\n"); - return (-1); - } - if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - ifp->if_baudrate = IF_Gbps(10); - ifp->if_init = ixgbe_init; - ifp->if_softc = adapter; - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_ioctl = ixgbe_ioctl; -#if __FreeBSD_version >= 1100036 - if_setgetcounterfn(ifp, ixgbe_get_counter); -#endif -#if __FreeBSD_version >= 1100045 - /* TSO parameters */ - ifp->if_hw_tsomax = 65518; - ifp->if_hw_tsomaxsegcount = IXGBE_82599_SCATTER; - ifp->if_hw_tsomaxsegsize = 2048; -#endif - if (adapter->feat_en & IXGBE_FEATURE_LEGACY_TX) { - ifp->if_start = ixgbe_legacy_start; - IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2); - ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2; - IFQ_SET_READY(&ifp->if_snd); - ixgbe_start_locked = ixgbe_legacy_start_locked; - ixgbe_ring_empty = ixgbe_legacy_ring_empty; - } else { - ifp->if_transmit = ixgbe_mq_start; - ifp->if_qflush = ixgbe_qflush; - ixgbe_start_locked = ixgbe_mq_start_locked; - ixgbe_ring_empty = drbr_empty; - } - - ether_ifattach(ifp, adapter->hw.mac.addr); + if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); + if_setbaudrate(ifp, IF_Gbps(10)); adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; - /* - * Tell the upper layer(s) we support long frames. - */ - ifp->if_hdrlen = sizeof(struct ether_vlan_header); - - /* Set capability flags */ - ifp->if_capabilities |= IFCAP_HWCSUM - | IFCAP_HWCSUM_IPV6 - | IFCAP_TSO - | IFCAP_LRO - | IFCAP_VLAN_HWTAGGING - | IFCAP_VLAN_HWTSO - | IFCAP_VLAN_HWCSUM - | IFCAP_JUMBO_MTU - | IFCAP_VLAN_MTU - | IFCAP_HWSTATS; - - /* Enable the above capabilities by default */ - ifp->if_capenable = ifp->if_capabilities; - /* * Don't turn this on by default, if vlans are * created on another pseudo device (eg. lagg) @@ -1053,36 +1184,25 @@ ixgbe_setup_interface(device_t dev, struct adapter *adapter) * using vlans directly on the ixgbe driver you can * enable this and get full hardware tag filtering. */ - ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; - - /* - * Specify the media types supported by this adapter and register - * callbacks to update media and link information - */ - ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change, - ixgbe_media_status); - + if_setcapenablebit(ifp, 0, IFCAP_VLAN_HWFILTER); adapter->phy_layer = ixgbe_get_supported_physical_layer(&adapter->hw); - ixgbe_add_media_types(adapter); - /* Set autoselect media by default */ - ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); + ixgbe_add_media_types(ctx); + + /* Autoselect media by default */ + ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /* ixgbe_setup_interface */ -#if __FreeBSD_version >= 1100036 /************************************************************************ - * ixgbe_get_counter + * ixgbe_if_get_counter ************************************************************************/ static uint64_t -ixgbe_get_counter(struct ifnet *ifp, ift_counter cnt) +ixgbe_if_get_counter(if_ctx_t ctx, ift_counter cnt) { - struct adapter *adapter; - struct tx_ring *txr; - uint64_t rv; - - adapter = if_getsoftc(ifp); + struct adapter *adapter = iflib_get_softc(ctx); + if_t ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_IPACKETS: @@ -1102,103 +1222,99 @@ ixgbe_get_counter(struct ifnet *ifp, ift_counter cnt) case IFCOUNTER_IQDROPS: return (adapter->iqdrops); case IFCOUNTER_OQDROPS: - rv = 0; - txr = adapter->tx_rings; - for (int i = 0; i < adapter->num_queues; i++, txr++) - rv += txr->br->br_drops; - return (rv); + return (0); case IFCOUNTER_IERRORS: return (adapter->ierrors); default: return (if_get_counter_default(ifp, cnt)); } -} /* ixgbe_get_counter */ -#endif +} /* ixgbe_if_get_counter */ /************************************************************************ * ixgbe_add_media_types ************************************************************************/ static void -ixgbe_add_media_types(struct adapter *adapter) +ixgbe_add_media_types(if_ctx_t ctx) { + struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; - device_t dev = adapter->dev; + device_t dev = iflib_get_dev(ctx); u64 layer; - layer = adapter->phy_layer; + layer = adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); /* Media types with matching FreeBSD media defines */ if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_T, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_T, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10BASE_T) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_TWINAX, 0, + ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) { - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_LR, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_LR, 0, NULL); if (hw->phy.multispeed_fiber) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_LX, 0, + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_LX, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) { - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); if (hw->phy.multispeed_fiber) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); } else if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); #ifdef IFM_ETH_XTYPE if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_KR, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_KR, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_KX4, 0, NULL); + ifmedia_add( adapter->media, IFM_ETHER | IFM_10G_KX4, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_KX, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_KX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX) - ifmedia_add(&adapter->media, IFM_ETHER | IFM_2500_KX, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_2500_KX, 0, NULL); #else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) { device_printf(dev, "Media supported: 10GbaseKR\n"); device_printf(dev, "10GbaseKR mapped to 10GbaseSR\n"); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) { device_printf(dev, "Media supported: 10GbaseKX4\n"); device_printf(dev, "10GbaseKX4 mapped to 10GbaseCX4\n"); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) { device_printf(dev, "Media supported: 1000baseKX\n"); device_printf(dev, "1000baseKX mapped to 1000baseCX\n"); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_CX, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_CX, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX) { device_printf(dev, "Media supported: 2500baseKX\n"); device_printf(dev, "2500baseKX mapped to 2500baseSX\n"); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_2500_SX, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_2500_SX, 0, NULL); } #endif if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) device_printf(dev, "Media supported: 1000baseBX\n"); if (hw->device_id == IXGBE_DEV_ID_82598AT) { - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } - ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_add(adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); } /* ixgbe_add_media_types */ /************************************************************************ @@ -1210,23 +1326,23 @@ ixgbe_is_sfp(struct ixgbe_hw *hw) switch (hw->mac.type) { case ixgbe_mac_82598EB: if (hw->phy.type == ixgbe_phy_nl) - return TRUE; - return FALSE; + return (TRUE); + return (FALSE); case ixgbe_mac_82599EB: switch (hw->mac.ops.get_media_type(hw)) { case ixgbe_media_type_fiber: case ixgbe_media_type_fiber_qsfp: - return TRUE; + return (TRUE); default: - return FALSE; + return (FALSE); } case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) - return TRUE; - return FALSE; + return (TRUE); + return (FALSE); default: - return FALSE; + return (FALSE); } } /* ixgbe_is_sfp */ @@ -1243,31 +1359,24 @@ ixgbe_config_link(struct adapter *adapter) sfp = ixgbe_is_sfp(hw); if (sfp) { - if (hw->phy.multispeed_fiber) { - hw->mac.ops.setup_sfp(hw); - ixgbe_enable_tx_laser(hw); - taskqueue_enqueue(adapter->tq, &adapter->msf_task); - } else - taskqueue_enqueue(adapter->tq, &adapter->mod_task); + GROUPTASK_ENQUEUE(&adapter->mod_task); } else { if (hw->mac.ops.check_link) err = ixgbe_check_link(hw, &adapter->link_speed, &adapter->link_up, FALSE); if (err) - goto out; + return; autoneg = hw->phy.autoneg_advertised; if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) err = hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); if (err) - goto out; + return; if (hw->mac.ops.setup_link) err = hw->mac.ops.setup_link(hw, autoneg, adapter->link_up); } -out: - return; } /* ixgbe_config_link */ /************************************************************************ @@ -1396,15 +1505,16 @@ ixgbe_update_stats_counters(struct adapter *adapter) static void ixgbe_add_hw_stats(struct adapter *adapter) { - device_t dev = adapter->dev; - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; + device_t dev = iflib_get_dev(adapter->ctx); + struct ix_rx_queue *rx_que; + struct ix_tx_queue *tx_que; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct ixgbe_hw_stats *stats = &adapter->stats.pf; struct sysctl_oid *stat_node, *queue_node; struct sysctl_oid_list *stat_list, *queue_list; + int i; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; @@ -1412,27 +1522,18 @@ ixgbe_add_hw_stats(struct adapter *adapter) /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed", - CTLFLAG_RD, &adapter->mbuf_defrag_failed, "m_defrag() failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSI-X IRQ Handled"); - for (int i = 0; i < adapter->num_queues; i++, txr++) { + for (i = 0, tx_que = adapter->tx_queues; i < adapter->num_tx_queues; i++, tx_que++) { + struct tx_ring *txr = &tx_que->txr; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); - SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", - CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i], - sizeof(&adapter->queues[i]), - ixgbe_sysctl_interrupt_rate_handler, "IU", - "Interrupt Rate"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", - CTLFLAG_RD, &(adapter->queues[i].irqs), - "irqs on this queue"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr), ixgbe_sysctl_tdh_handler, "IU", "Transmit Descriptor Head"); @@ -1441,28 +1542,26 @@ ixgbe_add_hw_stats(struct adapter *adapter) ixgbe_sysctl_tdt_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &txr->tso_tx, "TSO"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_tx_dma_setup", - CTLFLAG_RD, &txr->no_tx_dma_setup, - "Driver tx dma failure in xmit"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", - CTLFLAG_RD, &txr->no_desc_avail, - "Queue No Descriptor Available"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &txr->total_packets, "Queue Packets Transmitted"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "br_drops", - CTLFLAG_RD, &txr->br->br_drops, - "Packets dropped in buf_ring"); } - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - struct lro_ctrl *lro = &rxr->lro; - + for (i = 0, rx_que = adapter->rx_queues; i < adapter->num_rx_queues; i++, rx_que++) { + struct rx_ring *rxr = &rx_que->rxr; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); + SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", + CTLTYPE_UINT | CTLFLAG_RW, &adapter->rx_queues[i], + sizeof(&adapter->rx_queues[i]), + ixgbe_sysctl_interrupt_rate_handler, "IU", + "Interrupt Rate"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", + CTLFLAG_RD, &(adapter->rx_queues[i].irqs), + "irqs on this queue"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr), ixgbe_sysctl_rdh_handler, "IU", "Receive Descriptor Head"); @@ -1477,10 +1576,6 @@ ixgbe_add_hw_stats(struct adapter *adapter) CTLFLAG_RD, &rxr->rx_copies, "Copied RX Frames"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_discarded", CTLFLAG_RD, &rxr->rx_discarded, "Discarded RX packets"); - SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued", - CTLFLAG_RD, &lro->lro_queued, 0, "LRO Queued"); - SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed", - CTLFLAG_RD, &lro->lro_flushed, 0, "LRO Flushed"); } /* MAC stats get their own sub node */ @@ -1676,7 +1771,7 @@ ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS) } /* ixgbe_sysctl_rdt_handler */ /************************************************************************ - * ixgbe_register_vlan + * ixgbe_if_vlan_register * * Run via vlan config EVENT, it enables us to use the * HW Filter table since we can get the vlan id. This @@ -1684,60 +1779,45 @@ ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS) * VFTA, init will repopulate the real table. ************************************************************************/ static void -ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) +ixgbe_if_vlan_register(if_ctx_t ctx, u16 vtag) { - struct adapter *adapter = ifp->if_softc; + struct adapter *adapter = iflib_get_softc(ctx); u16 index, bit; - if (ifp->if_softc != arg) /* Not our event */ - return; - - if ((vtag == 0) || (vtag > 4095)) /* Invalid */ - return; - - IXGBE_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; - ixgbe_setup_vlan_hw_support(adapter); - IXGBE_CORE_UNLOCK(adapter); -} /* ixgbe_register_vlan */ + ixgbe_setup_vlan_hw_support(ctx); +} /* ixgbe_if_vlan_register */ /************************************************************************ - * ixgbe_unregister_vlan + * ixgbe_if_vlan_unregister * * Run via vlan unconfig EVENT, remove our entry in the soft vfta. ************************************************************************/ static void -ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) +ixgbe_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { - struct adapter *adapter = ifp->if_softc; + struct adapter *adapter = iflib_get_softc(ctx); u16 index, bit; - if (ifp->if_softc != arg) - return; - - if ((vtag == 0) || (vtag > 4095)) /* Invalid */ - return; - - IXGBE_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ - ixgbe_setup_vlan_hw_support(adapter); - IXGBE_CORE_UNLOCK(adapter); -} /* ixgbe_unregister_vlan */ + ixgbe_setup_vlan_hw_support(ctx); +} /* ixgbe_if_vlan_unregister */ /************************************************************************ * ixgbe_setup_vlan_hw_support ************************************************************************/ static void -ixgbe_setup_vlan_hw_support(struct adapter *adapter) +ixgbe_setup_vlan_hw_support(if_ctx_t ctx) { - struct ifnet *ifp = adapter->ifp; + struct ifnet *ifp = iflib_get_ifp(ctx); + struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; struct rx_ring *rxr; int i; @@ -1755,8 +1835,8 @@ ixgbe_setup_vlan_hw_support(struct adapter *adapter) /* Setup the queues for vlans */ if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { - for (i = 0; i < adapter->num_queues; i++) { - rxr = &adapter->rx_rings[i]; + for (i = 0; i < adapter->num_rx_queues; i++) { + rxr = &adapter->rx_queues[i].rxr; /* On 82599 the VLAN enable is per/queue in RXDCTL */ if (hw->mac.type != ixgbe_mac_82598EB) { ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); @@ -1798,11 +1878,11 @@ ixgbe_setup_vlan_hw_support(struct adapter *adapter) static void ixgbe_get_slot_info(struct adapter *adapter) { - device_t dev = adapter->dev; - struct ixgbe_hw *hw = &adapter->hw; - u32 offset; - u16 link; - int bus_info_valid = TRUE; + device_t dev = iflib_get_dev(adapter->ctx); + struct ixgbe_hw *hw = &adapter->hw; + int bus_info_valid = TRUE; + u32 offset; + u16 link; /* Some devices are behind an internal bridge */ switch (hw->device_id) { @@ -1888,137 +1968,105 @@ display: } /* ixgbe_get_slot_info */ /************************************************************************ - * ixgbe_enable_queue - MSI-X Interrupt Handlers and Tasklets + * ixgbe_if_msix_intr_assign + * + * Setup MSI-X Interrupt resources and handlers ************************************************************************/ -static inline void -ixgbe_enable_queue(struct adapter *adapter, u32 vector) +static int +ixgbe_if_msix_intr_assign(if_ctx_t ctx, int msix) { - struct ixgbe_hw *hw = &adapter->hw; - u64 queue = (u64)(1 << vector); - u32 mask; + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_rx_queue *rx_que = adapter->rx_queues; + struct ix_tx_queue *tx_que; + int error, rid, vector = 0; + int cpu_id = 0; + char buf[16]; + + /* Admin Que is vector 0*/ + rid = vector + 1; + for (int i = 0; i < adapter->num_rx_queues; i++, vector++, rx_que++) { + rid = vector + 1; + + snprintf(buf, sizeof(buf), "rxq%d", i); + error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, + IFLIB_INTR_RX, ixgbe_msix_que, rx_que, rx_que->rxr.me, buf); + + if (error) { + device_printf(iflib_get_dev(ctx), + "Failed to allocate que int %d err: %d", i, error); + adapter->num_rx_queues = i + 1; + goto fail; + } + + rx_que->msix = vector; + adapter->active_queues |= (u64)(1 << rx_que->msix); + if (adapter->feat_en & IXGBE_FEATURE_RSS) { + /* + * The queue ID is used as the RSS layer bucket ID. + * We look up the queue ID -> RSS CPU ID and select + * that. + */ + cpu_id = rss_getcpu(i % rss_getnumbuckets()); + } else { + /* + * Bind the msix vector, and thus the + * rings to the corresponding cpu. + * + * This just happens to match the default RSS + * round-robin bucket -> queue -> CPU allocation. + */ + if (adapter->num_rx_queues > 1) + cpu_id = i; + } - if (hw->mac.type == ixgbe_mac_82598EB) { - mask = (IXGBE_EIMS_RTX_QUEUE & queue); - IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); - } else { - mask = (queue & 0xFFFFFFFF); - if (mask) - IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); - mask = (queue >> 32); - if (mask) - IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); } -} /* ixgbe_enable_queue */ - -/************************************************************************ - * ixgbe_disable_queue - ************************************************************************/ -static inline void -ixgbe_disable_queue(struct adapter *adapter, u32 vector) -{ - struct ixgbe_hw *hw = &adapter->hw; - u64 queue = (u64)(1 << vector); - u32 mask; - - if (hw->mac.type == ixgbe_mac_82598EB) { - mask = (IXGBE_EIMS_RTX_QUEUE & queue); - IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask); - } else { - mask = (queue & 0xFFFFFFFF); - if (mask) - IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask); - mask = (queue >> 32); - if (mask) - IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask); + for (int i = 0; i < adapter->num_tx_queues; i++) { + snprintf(buf, sizeof(buf), "txq%d", i); + tx_que = &adapter->tx_queues[i]; + tx_que->msix = i % adapter->num_rx_queues; + iflib_softirq_alloc_generic(ctx, + &adapter->rx_queues[tx_que->msix].que_irq, + IFLIB_INTR_TX, tx_que, tx_que->txr.me, buf); + } + rid = vector + 1; + error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, + IFLIB_INTR_ADMIN, ixgbe_msix_link, adapter, 0, "aq"); + if (error) { + device_printf(iflib_get_dev(ctx), + "Failed to register admin handler"); + return (error); } -} /* ixgbe_disable_queue */ -/************************************************************************ + adapter->vector = vector; + + return (0); +fail: + iflib_irq_free(ctx, &adapter->irq); + rx_que = adapter->rx_queues; + for (int i = 0; i < adapter->num_rx_queues; i++, rx_que++) + iflib_irq_free(ctx, &rx_que->que_irq); + + return (error); +} /* ixgbe_if_msix_intr_assign */ + +/********************************************************************* * ixgbe_msix_que - MSI-X Queue Interrupt Service routine - ************************************************************************/ -void + **********************************************************************/ +static int ixgbe_msix_que(void *arg) { - struct ix_queue *que = arg; - struct adapter *adapter = que->adapter; - struct ifnet *ifp = adapter->ifp; - struct tx_ring *txr = que->txr; - struct rx_ring *rxr = que->rxr; - bool more; - u32 newitr = 0; - + struct ix_rx_queue *que = arg; + struct adapter *adapter = que->adapter; + struct ifnet *ifp = iflib_get_ifp(que->adapter->ctx); /* Protect against spurious interrupts */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - return; + return 0; ixgbe_disable_queue(adapter, que->msix); ++que->irqs; - more = ixgbe_rxeof(que); - - IXGBE_TX_LOCK(txr); - ixgbe_txeof(txr); - if (!ixgbe_ring_empty(ifp, txr->br)) - ixgbe_start_locked(ifp, txr); - IXGBE_TX_UNLOCK(txr); - - /* Do AIM now? */ - - if (adapter->enable_aim == FALSE) - goto no_calc; - /* - * Do Adaptive Interrupt Moderation: - * - Write out last calculated setting - * - Calculate based on average size over - * the last interval. - */ - if (que->eitr_setting) - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix), - que->eitr_setting); - - que->eitr_setting = 0; - - /* Idle, do nothing */ - if ((txr->bytes == 0) && (rxr->bytes == 0)) - goto no_calc; - - if ((txr->bytes) && (txr->packets)) - newitr = txr->bytes/txr->packets; - if ((rxr->bytes) && (rxr->packets)) - newitr = max(newitr, (rxr->bytes / rxr->packets)); - newitr += 24; /* account for hardware frame, crc */ - - /* set an upper boundary */ - newitr = min(newitr, 3000); - - /* Be nice to the mid range */ - if ((newitr > 300) && (newitr < 1200)) - newitr = (newitr / 3); - else - newitr = (newitr / 2); - - if (adapter->hw.mac.type == ixgbe_mac_82598EB) - newitr |= newitr << 16; - else - newitr |= IXGBE_EITR_CNT_WDIS; - - /* save for next interrupt */ - que->eitr_setting = newitr; - - /* Reset state */ - txr->bytes = 0; - txr->packets = 0; - rxr->bytes = 0; - rxr->packets = 0; - -no_calc: - if (more) - taskqueue_enqueue(que->tq, &que->que_task); - else - ixgbe_enable_queue(adapter, que->msix); - - return; + return (FILTER_SCHEDULE_THREAD); } /* ixgbe_msix_que */ /************************************************************************ @@ -2028,23 +2076,21 @@ no_calc: * the interface using ifconfig. ************************************************************************/ static void -ixgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) +ixgbe_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) { - struct adapter *adapter = ifp->if_softc; + struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; int layer; - INIT_DEBUGOUT("ixgbe_media_status: begin"); - IXGBE_CORE_LOCK(adapter); - ixgbe_update_link_status(adapter); + INIT_DEBUGOUT("ixgbe_if_media_status: begin"); + + iflib_admin_intr_deferred(ctx); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; - if (!adapter->link_active) { - IXGBE_CORE_UNLOCK(adapter); + if (!adapter->link_active) return; - } ifmr->ifm_status |= IFM_ACTIVE; layer = adapter->phy_layer; @@ -2172,7 +2218,6 @@ ixgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) if (IFM_SUBTYPE(ifmr->ifm_active) == 0) ifmr->ifm_active |= IFM_UNKNOWN; -#if __FreeBSD_version >= 900025 /* Display current flow control setting used on link */ if (hw->fc.current_mode == ixgbe_fc_rx_pause || hw->fc.current_mode == ixgbe_fc_full) @@ -2180,11 +2225,6 @@ ixgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) if (hw->fc.current_mode == ixgbe_fc_tx_pause || hw->fc.current_mode == ixgbe_fc_full) ifmr->ifm_active |= IFM_ETH_TXPAUSE; -#endif - - IXGBE_CORE_UNLOCK(adapter); - - return; } /* ixgbe_media_status */ /************************************************************************ @@ -2194,20 +2234,20 @@ ixgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) * media/mediopt option with ifconfig. ************************************************************************/ static int -ixgbe_media_change(struct ifnet *ifp) +ixgbe_if_media_change(if_ctx_t ctx) { - struct adapter *adapter = ifp->if_softc; - struct ifmedia *ifm = &adapter->media; + struct adapter *adapter = iflib_get_softc(ctx); + struct ifmedia *ifm = iflib_get_media(ctx); struct ixgbe_hw *hw = &adapter->hw; ixgbe_link_speed speed = 0; - INIT_DEBUGOUT("ixgbe_media_change: begin"); + INIT_DEBUGOUT("ixgbe_if_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if (hw->phy.media_type == ixgbe_media_type_backplane) - return (ENODEV); + return (EPERM); /* * We don't actually need to check against the supported @@ -2215,48 +2255,48 @@ ixgbe_media_change(struct ifnet *ifp) * that for us. */ switch (IFM_SUBTYPE(ifm->ifm_media)) { - case IFM_AUTO: - case IFM_10G_T: - speed |= IXGBE_LINK_SPEED_100_FULL; - speed |= IXGBE_LINK_SPEED_1GB_FULL; - speed |= IXGBE_LINK_SPEED_10GB_FULL; - break; - case IFM_10G_LRM: - case IFM_10G_LR: + case IFM_AUTO: + case IFM_10G_T: + speed |= IXGBE_LINK_SPEED_100_FULL; + speed |= IXGBE_LINK_SPEED_1GB_FULL; + speed |= IXGBE_LINK_SPEED_10GB_FULL; + break; + case IFM_10G_LRM: + case IFM_10G_LR: #ifndef IFM_ETH_XTYPE - case IFM_10G_SR: /* KR, too */ - case IFM_10G_CX4: /* KX4 */ + case IFM_10G_SR: /* KR, too */ + case IFM_10G_CX4: /* KX4 */ #else - case IFM_10G_KR: - case IFM_10G_KX4: + case IFM_10G_KR: + case IFM_10G_KX4: #endif - speed |= IXGBE_LINK_SPEED_1GB_FULL; - speed |= IXGBE_LINK_SPEED_10GB_FULL; - break; + speed |= IXGBE_LINK_SPEED_1GB_FULL; + speed |= IXGBE_LINK_SPEED_10GB_FULL; + break; #ifndef IFM_ETH_XTYPE - case IFM_1000_CX: /* KX */ + case IFM_1000_CX: /* KX */ #else - case IFM_1000_KX: + case IFM_1000_KX: #endif - case IFM_1000_LX: - case IFM_1000_SX: - speed |= IXGBE_LINK_SPEED_1GB_FULL; - break; - case IFM_1000_T: - speed |= IXGBE_LINK_SPEED_100_FULL; - speed |= IXGBE_LINK_SPEED_1GB_FULL; - break; - case IFM_10G_TWINAX: - speed |= IXGBE_LINK_SPEED_10GB_FULL; - break; - case IFM_100_TX: - speed |= IXGBE_LINK_SPEED_100_FULL; - break; - case IFM_10_T: - speed |= IXGBE_LINK_SPEED_10_FULL; - break; - default: - goto invalid; + case IFM_1000_LX: + case IFM_1000_SX: + speed |= IXGBE_LINK_SPEED_1GB_FULL; + break; + case IFM_1000_T: + speed |= IXGBE_LINK_SPEED_100_FULL; + speed |= IXGBE_LINK_SPEED_1GB_FULL; + break; + case IFM_10G_TWINAX: + speed |= IXGBE_LINK_SPEED_10GB_FULL; + break; + case IFM_100_TX: + speed |= IXGBE_LINK_SPEED_100_FULL; + break; + case IFM_10_T: + speed |= IXGBE_LINK_SPEED_10_FULL; + break; + default: + goto invalid; } hw->mac.autotry_restart = TRUE; @@ -2270,44 +2310,28 @@ ixgbe_media_change(struct ifnet *ifp) return (0); invalid: - device_printf(adapter->dev, "Invalid media type!\n"); + device_printf(iflib_get_dev(ctx), "Invalid media type!\n"); return (EINVAL); -} /* ixgbe_media_change */ +} /* ixgbe_if_media_change */ /************************************************************************ * ixgbe_set_promisc ************************************************************************/ -static void -ixgbe_set_promisc(struct adapter *adapter) +static int +ixgbe_if_promisc_set(if_ctx_t ctx, int flags) { - struct ifnet *ifp = adapter->ifp; - int mcnt = 0; - u32 rctl; + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + u32 rctl; + int mcnt = 0; rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); rctl &= (~IXGBE_FCTRL_UPE); if (ifp->if_flags & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else { - struct ifmultiaddr *ifma; -#if __FreeBSD_version < 800000 - IF_ADDR_LOCK(ifp); -#else - if_maddr_rlock(ifp); -#endif - TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { - if (ifma->ifma_addr->sa_family != AF_LINK) - continue; - if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) - break; - mcnt++; - } -#if __FreeBSD_version < 800000 - IF_ADDR_UNLOCK(ifp); -#else - if_maddr_runlock(ifp); -#endif + mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES); } if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) rctl &= (~IXGBE_FCTRL_MPE); @@ -2321,12 +2345,13 @@ ixgbe_set_promisc(struct adapter *adapter) rctl &= ~IXGBE_FCTRL_UPE; IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, rctl); } -} /* ixgbe_set_promisc */ + return (0); +} /* ixgbe_if_promisc_set */ /************************************************************************ * ixgbe_msix_link - Link status change ISR (MSI/MSI-X) ************************************************************************/ -static void +static int ixgbe_msix_link(void *arg) { struct adapter *adapter = arg; @@ -2349,7 +2374,7 @@ ixgbe_msix_link(void *arg) /* Link status change */ if (eicr & IXGBE_EICR_LSC) { IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC); - taskqueue_enqueue(adapter->tq, &adapter->link_task); + iflib_admin_intr_deferred(adapter->ctx); } if (adapter->hw.mac.type != ixgbe_mac_82598EB) { @@ -2357,17 +2382,16 @@ ixgbe_msix_link(void *arg) (eicr & IXGBE_EICR_FLOW_DIR)) { /* This is probably overkill :) */ if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1)) - return; + return (FILTER_HANDLED); /* Disable the interrupt */ - IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_FLOW_DIR); - taskqueue_enqueue(adapter->tq, &adapter->fdir_task); - } - - if (eicr & IXGBE_EICR_ECC) { - device_printf(adapter->dev, - "CRITICAL: ECC ERROR!! Please Reboot!!\n"); - IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); - } + IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR); + GROUPTASK_ENQUEUE(&adapter->fdir_task); + } else + if (eicr & IXGBE_EICR_ECC) { + device_printf(iflib_get_dev(adapter->ctx), + "\nCRITICAL: ECC ERROR!! Please Reboot!!\n"); + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); + } /* Check for over temp condition */ if (adapter->feat_en & IXGBE_FEATURE_TEMP_SENSOR) { @@ -2382,8 +2406,10 @@ ixgbe_msix_link(void *arg) retval = hw->phy.ops.check_overtemp(hw); if (retval != IXGBE_ERR_OVERTEMP) break; - device_printf(adapter->dev, "CRITICAL: OVER TEMP!! PHY IS SHUT DOWN!!\n"); - device_printf(adapter->dev, "System shutdown required!\n"); + device_printf(iflib_get_dev(adapter->ctx), + "\nCRITICAL: OVER TEMP!! PHY IS SHUT DOWN!!\n"); + device_printf(iflib_get_dev(adapter->ctx), + "System shutdown required!\n"); break; default: if (!(eicr & IXGBE_EICR_TS)) @@ -2391,8 +2417,10 @@ ixgbe_msix_link(void *arg) retval = hw->phy.ops.check_overtemp(hw); if (retval != IXGBE_ERR_OVERTEMP) break; - device_printf(adapter->dev, "CRITICAL: OVER TEMP!! PHY IS SHUT DOWN!!\n"); - device_printf(adapter->dev, "System shutdown required!\n"); + device_printf(iflib_get_dev(adapter->ctx), + "\nCRITICAL: OVER TEMP!! PHY IS SHUT DOWN!!\n"); + device_printf(iflib_get_dev(adapter->ctx), + "System shutdown required!\n"); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS); break; } @@ -2401,7 +2429,7 @@ ixgbe_msix_link(void *arg) /* Check for VF message */ if ((adapter->feat_en & IXGBE_FEATURE_SRIOV) && (eicr & IXGBE_EICR_MAILBOX)) - taskqueue_enqueue(adapter->tq, &adapter->mbx_task); + GROUPTASK_ENQUEUE(&adapter->mbx_task); } if (ixgbe_is_sfp(hw)) { @@ -2413,14 +2441,16 @@ ixgbe_msix_link(void *arg) if (eicr & eicr_mask) { IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr_mask); - taskqueue_enqueue(adapter->tq, &adapter->mod_task); + if (atomic_cmpset_acq_int(&adapter->sfp_reinit, 0, 1)) + GROUPTASK_ENQUEUE(&adapter->mod_task); } if ((hw->mac.type == ixgbe_mac_82599EB) && (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); - taskqueue_enqueue(adapter->tq, &adapter->msf_task); + if (atomic_cmpset_acq_int(&adapter->sfp_reinit, 0, 1)) + GROUPTASK_ENQUEUE(&adapter->msf_task); } } @@ -2434,11 +2464,13 @@ ixgbe_msix_link(void *arg) if ((hw->phy.type == ixgbe_phy_x550em_ext_t) && (eicr & IXGBE_EICR_GPI_SDP0_X540)) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X540); - taskqueue_enqueue(adapter->tq, &adapter->phy_task); + GROUPTASK_ENQUEUE(&adapter->phy_task); } /* Re-enable other interrupts */ IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_OTHER); + + return (FILTER_HANDLED); } /* ixgbe_msix_link */ /************************************************************************ @@ -2447,9 +2479,9 @@ ixgbe_msix_link(void *arg) static int ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) { - struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1); - int error; - unsigned int reg, usec, rate; + struct ix_rx_queue *que = ((struct ix_rx_queue *)oidp->oid_arg1); + int error; + unsigned int reg, usec, rate; reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix)); usec = ((reg & 0x0FF8) >> 3); @@ -2477,52 +2509,50 @@ ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) * ixgbe_add_device_sysctls ************************************************************************/ static void -ixgbe_add_device_sysctls(struct adapter *adapter) +ixgbe_add_device_sysctls(if_ctx_t ctx) { - device_t dev = adapter->dev; + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); struct ixgbe_hw *hw = &adapter->hw; struct sysctl_oid_list *child; - struct sysctl_ctx_list *ctx; + struct sysctl_ctx_list *ctx_list; - ctx = device_get_sysctl_ctx(dev); + ctx_list = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); /* Sysctls for all devices */ - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW, - adapter, 0, ixgbe_sysctl_flowcntl, "I", IXGBE_SYSCTL_DESC_SET_FC); + SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "fc", + CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_flowcntl, "I", + IXGBE_SYSCTL_DESC_SET_FC); - adapter->enable_aim = ixgbe_enable_aim; - SYSCTL_ADD_INT(ctx, child, OID_AUTO, "enable_aim", CTLFLAG_RW, - &adapter->enable_aim, 1, "Interrupt Moderation"); - - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "advertise_speed", + SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_advertise, "I", IXGBE_SYSCTL_DESC_ADV_SPEED); #ifdef IXGBE_DEBUG /* testing sysctls (for all devices) */ - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "power_state", + SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "power_state", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_power_state, "I", "PCI Power State"); - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "print_rss_config", + SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "print_rss_config", CTLTYPE_STRING | CTLFLAG_RD, adapter, 0, ixgbe_sysctl_print_rss_config, "A", "Prints RSS Configuration"); #endif /* for X550 series devices */ if (hw->mac.type >= ixgbe_mac_X550) - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "dmac", - CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_dmac, + SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "dmac", + CTLTYPE_U16 | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_dmac, "I", "DMA Coalesce"); /* for WoL-capable devices */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "wol_enable", + SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "wol_enable", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_wol_enable, "I", "Enable/Disable Wake on LAN"); - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "wufc", - CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_wufc, + SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "wufc", + CTLTYPE_U32 | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_wufc, "I", "Enable/Disable Wake Up Filters"); } @@ -2531,22 +2561,22 @@ ixgbe_add_device_sysctls(struct adapter *adapter) struct sysctl_oid *phy_node; struct sysctl_oid_list *phy_list; - phy_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "phy", + phy_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "phy", CTLFLAG_RD, NULL, "External PHY sysctls"); phy_list = SYSCTL_CHILDREN(phy_node); - SYSCTL_ADD_PROC(ctx, phy_list, OID_AUTO, "temp", - CTLTYPE_INT | CTLFLAG_RD, adapter, 0, ixgbe_sysctl_phy_temp, + SYSCTL_ADD_PROC(ctx_list, phy_list, OID_AUTO, "temp", + CTLTYPE_U16 | CTLFLAG_RD, adapter, 0, ixgbe_sysctl_phy_temp, "I", "Current External PHY Temperature (Celsius)"); - SYSCTL_ADD_PROC(ctx, phy_list, OID_AUTO, "overtemp_occurred", - CTLTYPE_INT | CTLFLAG_RD, adapter, 0, + SYSCTL_ADD_PROC(ctx_list, phy_list, OID_AUTO, + "overtemp_occurred", CTLTYPE_U16 | CTLFLAG_RD, adapter, 0, ixgbe_sysctl_phy_overtemp_occurred, "I", "External PHY High Temperature Event Occurred"); } if (adapter->feat_cap & IXGBE_FEATURE_EEE) { - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "eee_state", + SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "eee_state", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_eee_state, "I", "EEE Power Save State"); } @@ -2556,10 +2586,11 @@ ixgbe_add_device_sysctls(struct adapter *adapter) * ixgbe_allocate_pci_resources ************************************************************************/ static int -ixgbe_allocate_pci_resources(struct adapter *adapter) +ixgbe_allocate_pci_resources(if_ctx_t ctx) { - device_t dev = adapter->dev; - int rid; + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + int rid; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, @@ -2590,83 +2621,37 @@ ixgbe_allocate_pci_resources(struct adapter *adapter) * return 0 on success, positive on failure ************************************************************************/ static int -ixgbe_detach(device_t dev) +ixgbe_if_detach(if_ctx_t ctx) { - struct adapter *adapter = device_get_softc(dev); - struct ix_queue *que = adapter->queues; - struct tx_ring *txr = adapter->tx_rings; - u32 ctrl_ext; + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + u32 ctrl_ext; INIT_DEBUGOUT("ixgbe_detach: begin"); - /* Make sure VLANS are not using driver */ - if (adapter->ifp->if_vlantrunk != NULL) { - device_printf(dev, "Vlan in use, detach first\n"); - return (EBUSY); - } - if (ixgbe_pci_iov_detach(dev) != 0) { device_printf(dev, "SR-IOV in use; detach first.\n"); return (EBUSY); } - ether_ifdetach(adapter->ifp); - /* Stop the adapter */ - IXGBE_CORE_LOCK(adapter); - ixgbe_setup_low_power_mode(adapter); - IXGBE_CORE_UNLOCK(adapter); + iflib_config_gtask_deinit(&adapter->mod_task); + iflib_config_gtask_deinit(&adapter->msf_task); + iflib_config_gtask_deinit(&adapter->phy_task); + if (adapter->feat_cap & IXGBE_FEATURE_SRIOV) + iflib_config_gtask_deinit(&adapter->mbx_task); - for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { - if (que->tq) { - if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) - taskqueue_drain(que->tq, &txr->txq_task); - taskqueue_drain(que->tq, &que->que_task); - taskqueue_free(que->tq); - } - } - - /* Drain the Link queue */ - if (adapter->tq) { - taskqueue_drain(adapter->tq, &adapter->link_task); - taskqueue_drain(adapter->tq, &adapter->mod_task); - taskqueue_drain(adapter->tq, &adapter->msf_task); - if (adapter->feat_cap & IXGBE_FEATURE_SRIOV) - taskqueue_drain(adapter->tq, &adapter->mbx_task); - taskqueue_drain(adapter->tq, &adapter->phy_task); - if (adapter->feat_en & IXGBE_FEATURE_FDIR) - taskqueue_drain(adapter->tq, &adapter->fdir_task); - taskqueue_free(adapter->tq); - } + ixgbe_setup_low_power_mode(ctx); /* let hardware know driver is unloading */ ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); - /* Unregister VLAN events */ - if (adapter->vlan_attach != NULL) - EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); - if (adapter->vlan_detach != NULL) - EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); - - callout_drain(&adapter->timer); - - if (adapter->feat_en & IXGBE_FEATURE_NETMAP) - netmap_detach(adapter->ifp); - - ixgbe_free_pci_resources(adapter); - bus_generic_detach(dev); - if_free(adapter->ifp); - - ixgbe_free_transmit_structures(adapter); - ixgbe_free_receive_structures(adapter); - free(adapter->queues, M_DEVBUF); + ixgbe_free_pci_resources(ctx); free(adapter->mta, M_IXGBE); - IXGBE_CORE_LOCK_DESTROY(adapter); - return (0); -} /* ixgbe_detach */ +} /* ixgbe_if_detach */ /************************************************************************ * ixgbe_setup_low_power_mode - LPLU/WoL preparation @@ -2674,13 +2659,15 @@ ixgbe_detach(device_t dev) * Prepare the adapter/port for LPLU and/or WoL ************************************************************************/ static int -ixgbe_setup_low_power_mode(struct adapter *adapter) +ixgbe_setup_low_power_mode(if_ctx_t ctx) { + struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; - device_t dev = adapter->dev; + device_t dev = iflib_get_dev(ctx); s32 error = 0; - mtx_assert(&adapter->core_mtx, MA_OWNED); + if (!hw->wol_enabled) + ixgbe_set_phy_power(hw, FALSE); /* Limit power management flow to X550EM baseT */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T && @@ -2706,15 +2693,15 @@ ixgbe_setup_low_power_mode(struct adapter *adapter) IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN); /* X550EM baseT adapters need a special LPLU flow */ - hw->phy.reset_disable = true; - ixgbe_stop(adapter); + hw->phy.reset_disable = TRUE; + ixgbe_if_stop(ctx); error = hw->phy.ops.enter_lplu(hw); if (error) device_printf(dev, "Error entering LPLU: %d\n", error); - hw->phy.reset_disable = false; + hw->phy.reset_disable = FALSE; } else { /* Just stop for other adapters */ - ixgbe_stop(adapter); + ixgbe_if_stop(ctx); } return error; @@ -2724,19 +2711,16 @@ ixgbe_setup_low_power_mode(struct adapter *adapter) * ixgbe_shutdown - Shutdown entry point ************************************************************************/ static int -ixgbe_shutdown(device_t dev) +ixgbe_if_shutdown(if_ctx_t ctx) { - struct adapter *adapter = device_get_softc(dev); - int error = 0; + int error = 0; INIT_DEBUGOUT("ixgbe_shutdown: begin"); - IXGBE_CORE_LOCK(adapter); - error = ixgbe_setup_low_power_mode(adapter); - IXGBE_CORE_UNLOCK(adapter); + error = ixgbe_setup_low_power_mode(ctx); return (error); -} /* ixgbe_shutdown */ +} /* ixgbe_if_shutdown */ /************************************************************************ * ixgbe_suspend @@ -2744,21 +2728,16 @@ ixgbe_shutdown(device_t dev) * From D0 to D3 ************************************************************************/ static int -ixgbe_suspend(device_t dev) +ixgbe_if_suspend(if_ctx_t ctx) { - struct adapter *adapter = device_get_softc(dev); - int error = 0; + int error = 0; INIT_DEBUGOUT("ixgbe_suspend: begin"); - IXGBE_CORE_LOCK(adapter); - - error = ixgbe_setup_low_power_mode(adapter); - - IXGBE_CORE_UNLOCK(adapter); + error = ixgbe_setup_low_power_mode(ctx); return (error); -} /* ixgbe_suspend */ +} /* ixgbe_if_suspend */ /************************************************************************ * ixgbe_resume @@ -2766,17 +2745,16 @@ ixgbe_suspend(device_t dev) * From D3 to D0 ************************************************************************/ static int -ixgbe_resume(device_t dev) +ixgbe_if_resume(if_ctx_t ctx) { - struct adapter *adapter = device_get_softc(dev); - struct ifnet *ifp = adapter->ifp; + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); struct ixgbe_hw *hw = &adapter->hw; u32 wus; INIT_DEBUGOUT("ixgbe_resume: begin"); - IXGBE_CORE_LOCK(adapter); - /* Read & clear WUS register */ wus = IXGBE_READ_REG(hw, IXGBE_WUS); if (wus) @@ -2791,81 +2769,105 @@ ixgbe_resume(device_t dev) * will re-advertise all previous advertised speeds */ if (ifp->if_flags & IFF_UP) - ixgbe_init_locked(adapter); - - IXGBE_CORE_UNLOCK(adapter); + ixgbe_if_init(ctx); return (0); -} /* ixgbe_resume */ +} /* ixgbe_if_resume */ /************************************************************************ - * ixgbe_set_if_hwassist - Set the various hardware offload abilities. + * ixgbe_if_mtu_set - Ioctl mtu entry point * - * Takes the ifnet's if_capenable flags (e.g. set by the user using - * ifconfig) and indicates to the OS via the ifnet's if_hwassist - * field what mbuf offload flags the driver will understand. + * Return 0 on success, EINVAL on failure + ************************************************************************/ +static int +ixgbe_if_mtu_set(if_ctx_t ctx, uint32_t mtu) +{ + struct adapter *adapter = iflib_get_softc(ctx); + int error = 0; + + IOCTL_DEBUGOUT("ioctl: SIOCIFMTU (Set Interface MTU)"); + + if (mtu > IXGBE_MAX_MTU) { + error = EINVAL; + } else { + adapter->max_frame_size = mtu + IXGBE_MTU_HDR; + } + + return error; +} /* ixgbe_if_mtu_set */ + +/************************************************************************ + * ixgbe_if_crcstrip_set ************************************************************************/ static void -ixgbe_set_if_hwassist(struct adapter *adapter) +ixgbe_if_crcstrip_set(if_ctx_t ctx, int onoff, int crcstrip) { - struct ifnet *ifp = adapter->ifp; + struct adapter *sc = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &sc->hw; + /* crc stripping is set in two places: + * IXGBE_HLREG0 (modified on init_locked and hw reset) + * IXGBE_RDRXCTL (set by the original driver in + * ixgbe_setup_hw_rsc() called in init_locked. + * We disable the setting when netmap is compiled in). + * We update the values here, but also in ixgbe.c because + * init_locked sometimes is called outside our control. + */ + uint32_t hl, rxc; - ifp->if_hwassist = 0; -#if __FreeBSD_version >= 1000000 - if (ifp->if_capenable & IFCAP_TSO4) - ifp->if_hwassist |= CSUM_IP_TSO; - if (ifp->if_capenable & IFCAP_TSO6) - ifp->if_hwassist |= CSUM_IP6_TSO; - if (ifp->if_capenable & IFCAP_TXCSUM) { - ifp->if_hwassist |= (CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP); - if (adapter->hw.mac.type != ixgbe_mac_82598EB) - ifp->if_hwassist |= CSUM_IP_SCTP; - } - if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) { - ifp->if_hwassist |= (CSUM_IP6_UDP | CSUM_IP6_TCP); - if (adapter->hw.mac.type != ixgbe_mac_82598EB) - ifp->if_hwassist |= CSUM_IP6_SCTP; - } -#else - if (ifp->if_capenable & IFCAP_TSO) - ifp->if_hwassist |= CSUM_TSO; - if (ifp->if_capenable & IFCAP_TXCSUM) { - ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); - if (adapter->hw.mac.type != ixgbe_mac_82598EB) - ifp->if_hwassist |= CSUM_SCTP; - } + hl = IXGBE_READ_REG(hw, IXGBE_HLREG0); + rxc = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); +#ifdef NETMAP + if (netmap_verbose) + D("%s read HLREG 0x%x rxc 0x%x", + onoff ? "enter" : "exit", hl, rxc); #endif -} /* ixgbe_set_if_hwassist */ + /* hw requirements ... */ + rxc &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; + rxc |= IXGBE_RDRXCTL_RSCACKC; + if (onoff && !crcstrip) { + /* keep the crc. Fast rx */ + hl &= ~IXGBE_HLREG0_RXCRCSTRP; + rxc &= ~IXGBE_RDRXCTL_CRCSTRIP; + } else { + /* reset default mode */ + hl |= IXGBE_HLREG0_RXCRCSTRP; + rxc |= IXGBE_RDRXCTL_CRCSTRIP; + } +#ifdef NETMAP + if (netmap_verbose) + D("%s write HLREG 0x%x rxc 0x%x", + onoff ? "enter" : "exit", hl, rxc); +#endif + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hl); + IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc); +} /* ixgbe_if_crcstrip_set */ -/************************************************************************ - * ixgbe_init_locked - Init entry point +/********************************************************************* + * ixgbe_if_init - Init entry point * * Used in two ways: It is used by the stack as an init * entry point in network interface structure. It is also * used by the driver as a hw/sw initialization routine to * get to a consistent state. * - * return 0 on success, positive on failure - ************************************************************************/ + * Return 0 on success, positive on failure + **********************************************************************/ void -ixgbe_init_locked(struct adapter *adapter) +ixgbe_if_init(if_ctx_t ctx) { - struct ifnet *ifp = adapter->ifp; - device_t dev = adapter->dev; + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + device_t dev = iflib_get_dev(ctx); struct ixgbe_hw *hw = &adapter->hw; - struct tx_ring *txr; - struct rx_ring *rxr; + struct ix_rx_queue *rx_que; + struct ix_tx_queue *tx_que; u32 txdctl, mhadd; u32 rxdctl, rxctrl; u32 ctrl_ext; - int err = 0; - mtx_assert(&adapter->core_mtx, MA_OWNED); - INIT_DEBUGOUT("ixgbe_init_locked: begin"); + int i, j, err; - hw->adapter_stopped = FALSE; - ixgbe_stop_adapter(hw); - callout_stop(&adapter->timer); + INIT_DEBUGOUT("ixgbe_if_init: begin"); /* Queue indices may change with IOV mode */ ixgbe_align_all_queue_indices(adapter); @@ -2878,22 +2880,14 @@ ixgbe_init_locked(struct adapter *adapter) ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, 1); hw->addr_ctrl.rar_used_count = 1; - /* Set hardware offload abilities from ifnet flags */ - ixgbe_set_if_hwassist(adapter); - - /* Prepare transmit descriptors and buffers */ - if (ixgbe_setup_transmit_structures(adapter)) { - device_printf(dev, "Could not setup transmit structures\n"); - ixgbe_stop(adapter); - return; - } - ixgbe_init_hw(hw); + ixgbe_initialize_iov(adapter); - ixgbe_initialize_transmit_units(adapter); + + ixgbe_initialize_transmit_units(ctx); /* Setup Multicast table */ - ixgbe_set_multi(adapter); + ixgbe_if_multi_set(ctx); /* Determine the correct mbuf pool, based on frame size */ if (adapter->max_frame_size <= MCLBYTES) @@ -2901,15 +2895,8 @@ ixgbe_init_locked(struct adapter *adapter) else adapter->rx_mbuf_sz = MJUMPAGESIZE; - /* Prepare receive descriptors and buffers */ - if (ixgbe_setup_receive_structures(adapter)) { - device_printf(dev, "Could not setup receive structures\n"); - ixgbe_stop(adapter); - return; - } - /* Configure RX settings */ - ixgbe_initialize_receive_units(adapter); + ixgbe_initialize_receive_units(ctx); /* Enable SDP & MSI-X interrupts based on adapter */ ixgbe_config_gpie(adapter); @@ -2924,8 +2911,9 @@ ixgbe_init_locked(struct adapter *adapter) } /* Now enable all the queues */ - for (int i = 0; i < adapter->num_queues; i++) { - txr = &adapter->tx_rings[i]; + for (i = 0, tx_que = adapter->tx_queues; i < adapter->num_tx_queues; i++, tx_que++) { + struct tx_ring *txr = &tx_que->txr; + txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txr->me)); txdctl |= IXGBE_TXDCTL_ENABLE; /* Set WTHRESH to 8, burst writeback */ @@ -2941,8 +2929,9 @@ ixgbe_init_locked(struct adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txr->me), txdctl); } - for (int i = 0, j = 0; i < adapter->num_queues; i++) { - rxr = &adapter->rx_rings[i]; + for (i = 0, rx_que = adapter->rx_queues; i < adapter->num_rx_queues; i++, rx_que++) { + struct rx_ring *rxr = &rx_que->rxr; + rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); if (hw->mac.type == ixgbe_mac_82598EB) { /* @@ -2955,7 +2944,7 @@ ixgbe_init_locked(struct adapter *adapter) } rxdctl |= IXGBE_RXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), rxdctl); - for (; j < 10; j++) { + for (j = 0; j < 10; j++) { if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)) & IXGBE_RXDCTL_ENABLE) break; @@ -2963,35 +2952,6 @@ ixgbe_init_locked(struct adapter *adapter) msec_delay(1); } wmb(); - - /* - * In netmap mode, we must preserve the buffers made - * available to userspace before the if_init() - * (this is true by default on the TX side, because - * init makes all buffers available to userspace). - * - * netmap_reset() and the device specific routines - * (e.g. ixgbe_setup_receive_rings()) map these - * buffers at the end of the NIC ring, so here we - * must set the RDT (tail) register to make sure - * they are not overwritten. - * - * In this driver the NIC ring starts at RDH = 0, - * RDT points to the last slot available for reception (?), - * so RDT = num_rx_desc - 1 means the whole ring is available. - */ -#ifdef DEV_NETMAP - if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && - (ifp->if_capenable & IFCAP_NETMAP)) { - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_kring *kring = &na->rx_rings[i]; - int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); - - IXGBE_WRITE_REG(hw, IXGBE_RDT(rxr->me), t); - } else -#endif /* DEV_NETMAP */ - IXGBE_WRITE_REG(hw, IXGBE_RDT(rxr->me), - adapter->num_rx_desc - 1); } /* Enable Receive engine */ @@ -3001,10 +2961,8 @@ ixgbe_init_locked(struct adapter *adapter) rxctrl |= IXGBE_RXCTRL_RXEN; ixgbe_enable_rx_dma(hw, rxctrl); - callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); - - /* Set up MSI-X routing */ - if (adapter->feat_en & IXGBE_FEATURE_MSIX) { + /* Set up MSI/MSI-X routing */ + if (ixgbe_enable_msix) { ixgbe_configure_ivars(adapter); /* Set up auto-mask */ if (hw->mac.type == ixgbe_mac_82598EB) @@ -3037,6 +2995,9 @@ ixgbe_init_locked(struct adapter *adapter) /* Set moderation on the Link interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->vector), IXGBE_LINK_ITR); + /* Enable power to the phy. */ + ixgbe_set_phy_power(hw, TRUE); + /* Config/Enable Link */ ixgbe_config_link(adapter); @@ -3047,13 +3008,13 @@ ixgbe_init_locked(struct adapter *adapter) ixgbe_start_hw(hw); /* Set up VLAN support and filter */ - ixgbe_setup_vlan_hw_support(adapter); + ixgbe_setup_vlan_hw_support(ctx); /* Setup DMA Coalescing */ ixgbe_config_dmac(adapter); /* And now turn on interrupts */ - ixgbe_enable_intr(adapter); + ixgbe_if_enable_intr(ctx); /* Enable the use of the MBX by the VF's */ if (adapter->feat_en & IXGBE_FEATURE_SRIOV) { @@ -3062,27 +3023,8 @@ ixgbe_init_locked(struct adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); } - /* Now inform the stack we're ready */ - ifp->if_drv_flags |= IFF_DRV_RUNNING; - - return; } /* ixgbe_init_locked */ -/************************************************************************ - * ixgbe_init - ************************************************************************/ -static void -ixgbe_init(void *arg) -{ - struct adapter *adapter = arg; - - IXGBE_CORE_LOCK(adapter); - ixgbe_init_locked(adapter); - IXGBE_CORE_UNLOCK(adapter); - - return; -} /* ixgbe_init */ - /************************************************************************ * ixgbe_set_ivar * @@ -3101,7 +3043,6 @@ ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) vector |= IXGBE_IVAR_ALLOC_VAL; switch (hw->mac.type) { - case ixgbe_mac_82598EB: if (type == -1) entry = IXGBE_IVAR_OTHER_CAUSES_INDEX; @@ -3113,7 +3054,6 @@ ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) ivar |= (vector << (8 * (entry & 0x3))); IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar); break; - case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: @@ -3132,7 +3072,6 @@ ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar); } - default: break; } @@ -3144,8 +3083,9 @@ ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) static void ixgbe_configure_ivars(struct adapter *adapter) { - struct ix_queue *que = adapter->queues; - u32 newitr; + struct ix_rx_queue *rx_que = adapter->rx_queues; + struct ix_tx_queue *tx_que = adapter->tx_queues; + u32 newitr; if (ixgbe_max_interrupt_rate > 0) newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8; @@ -3158,17 +3098,21 @@ ixgbe_configure_ivars(struct adapter *adapter) newitr = 0; } - for (int i = 0; i < adapter->num_queues; i++, que++) { - struct rx_ring *rxr = &adapter->rx_rings[i]; - struct tx_ring *txr = &adapter->tx_rings[i]; - /* First the RX queue entry */ - ixgbe_set_ivar(adapter, rxr->me, que->msix, 0); - /* ... and the TX */ - ixgbe_set_ivar(adapter, txr->me, que->msix, 1); - /* Set an Initial EITR value */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix), newitr); - } + for (int i = 0; i < adapter->num_rx_queues; i++, rx_que++) { + struct rx_ring *rxr = &rx_que->rxr; + /* First the RX queue entry */ + ixgbe_set_ivar(adapter, rxr->me, rx_que->msix, 0); + + /* Set an Initial EITR value */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(rx_que->msix), newitr); + } + for (int i = 0; i < adapter->num_tx_queues; i++, tx_que++) { + struct tx_ring *txr = &tx_que->txr; + + /* ... and the TX */ + ixgbe_set_ivar(adapter, txr->me, tx_que->msix, 1); + } /* For the Link interrupt */ ixgbe_set_ivar(adapter, 1, adapter->vector, -1); } /* ixgbe_configure_ivars */ @@ -3184,7 +3128,7 @@ ixgbe_config_gpie(struct adapter *adapter) gpie = IXGBE_READ_REG(hw, IXGBE_GPIE); - if (adapter->feat_en & IXGBE_FEATURE_MSIX) { + if (adapter->intr_type == IFLIB_INTR_MSIX) { /* Enable Enhanced MSI-X mode */ gpie |= IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_EIAME @@ -3215,7 +3159,6 @@ ixgbe_config_gpie(struct adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); - return; } /* ixgbe_config_gpie */ /************************************************************************ @@ -3270,41 +3213,39 @@ ixgbe_config_delay_values(struct adapter *adapter) * * Called whenever multicast address list is updated. ************************************************************************/ -static void -ixgbe_set_multi(struct adapter *adapter) +static int +ixgbe_mc_filter_apply(void *arg, struct ifmultiaddr *ifma, int count) { - struct ifmultiaddr *ifma; + struct adapter *adapter = arg; + struct ixgbe_mc_addr *mta = adapter->mta; + + if (ifma->ifma_addr->sa_family != AF_LINK) + return (0); + if (count == MAX_NUM_MULTICAST_ADDRESSES) + return (0); + bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), + mta[count].addr, IXGBE_ETH_LENGTH_OF_ADDRESS); + mta[count].vmdq = adapter->pool; + + return (1); +} /* ixgbe_mc_filter_apply */ + +static void +ixgbe_if_multi_set(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_mc_addr *mta; - struct ifnet *ifp = adapter->ifp; + struct ifnet *ifp = iflib_get_ifp(ctx); u8 *update_ptr; int mcnt = 0; u32 fctrl; - IOCTL_DEBUGOUT("ixgbe_set_multi: begin"); + IOCTL_DEBUGOUT("ixgbe_if_multi_set: begin"); mta = adapter->mta; bzero(mta, sizeof(*mta) * MAX_NUM_MULTICAST_ADDRESSES); -#if __FreeBSD_version < 800000 - IF_ADDR_LOCK(ifp); -#else - if_maddr_rlock(ifp); -#endif - TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { - if (ifma->ifma_addr->sa_family != AF_LINK) - continue; - if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) - break; - bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), - mta[mcnt].addr, IXGBE_ETH_LENGTH_OF_ADDRESS); - mta[mcnt].vmdq = adapter->pool; - mcnt++; - } -#if __FreeBSD_version < 800000 - IF_ADDR_UNLOCK(ifp); -#else - if_maddr_runlock(ifp); -#endif + mcnt = if_multi_apply(iflib_get_ifp(ctx), ixgbe_mc_filter_apply, adapter); fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); @@ -3325,8 +3266,7 @@ ixgbe_set_multi(struct adapter *adapter) ixgbe_mc_array_itr, TRUE); } - return; -} /* ixgbe_set_multi */ +} /* ixgbe_if_multi_set */ /************************************************************************ * ixgbe_mc_array_itr @@ -3355,73 +3295,25 @@ ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) * and runs the watchdog check. ************************************************************************/ static void -ixgbe_local_timer(void *arg) +ixgbe_if_timer(if_ctx_t ctx, uint16_t qid) { - struct adapter *adapter = arg; - device_t dev = adapter->dev; - struct ix_queue *que = adapter->queues; - u64 queues = 0; - int hung = 0; + struct adapter *adapter = iflib_get_softc(ctx); - mtx_assert(&adapter->core_mtx, MA_OWNED); + if (qid != 0) + return; /* Check for pluggable optics */ if (adapter->sfp_probe) - if (!ixgbe_sfp_probe(adapter)) - goto out; /* Nothing to do */ + if (!ixgbe_sfp_probe(ctx)) + return; /* Nothing to do */ - ixgbe_update_link_status(adapter); - ixgbe_update_stats_counters(adapter); + ixgbe_check_link(&adapter->hw, &adapter->link_speed, + &adapter->link_up, 0); - /* - * Check the TX queues status - * - mark hung queues so we don't schedule on them - * - watchdog only if all queues show hung - */ - for (int i = 0; i < adapter->num_queues; i++, que++) { - /* Keep track of queues with work for soft irq */ - if (que->txr->busy) - queues |= ((u64)1 << que->me); - /* - * Each time txeof runs without cleaning, but there - * are uncleaned descriptors it increments busy. If - * we get to the MAX we declare it hung. - */ - if (que->busy == IXGBE_QUEUE_HUNG) { - ++hung; - /* Mark the queue as inactive */ - adapter->active_queues &= ~((u64)1 << que->me); - continue; - } else { - /* Check if we've come back from hung */ - if ((adapter->active_queues & ((u64)1 << que->me)) == 0) - adapter->active_queues |= ((u64)1 << que->me); - } - if (que->busy >= IXGBE_MAX_TX_BUSY) { - device_printf(dev, - "Warning queue %d appears to be hung!\n", i); - que->txr->busy = IXGBE_QUEUE_HUNG; - ++hung; - } - } + /* Fire off the adminq task */ + iflib_admin_intr_deferred(ctx); - /* Only truly watchdog if all queues show hung */ - if (hung == adapter->num_queues) - goto watchdog; - else if (queues != 0) { /* Force an IRQ on queues with work */ - ixgbe_rearm_queues(adapter, queues); - } - -out: - callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); - return; - -watchdog: - device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); - adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - adapter->watchdog_events++; - ixgbe_init_locked(adapter); -} /* ixgbe_local_timer */ +} /* ixgbe_if_timer */ /************************************************************************ * ixgbe_sfp_probe @@ -3429,10 +3321,11 @@ watchdog: * Determine if a port had optics inserted. ************************************************************************/ static bool -ixgbe_sfp_probe(struct adapter *adapter) +ixgbe_sfp_probe(if_ctx_t ctx) { + struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; - device_t dev = adapter->dev; + device_t dev = iflib_get_dev(ctx); bool result = FALSE; if ((hw->phy.type == ixgbe_phy_nl) && @@ -3461,13 +3354,15 @@ out: * ixgbe_handle_mod - Tasklet for SFP module interrupts ************************************************************************/ static void -ixgbe_handle_mod(void *context, int pending) +ixgbe_handle_mod(void *context) { - struct adapter *adapter = context; + if_ctx_t ctx = context; + struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; - device_t dev = adapter->dev; + device_t dev = iflib_get_dev(ctx); u32 err, cage_full = 0; + adapter->sfp_reinit = 1; if (adapter->hw.need_crosstalk_fix) { switch (hw->mac.type) { case ixgbe_mac_82599EB: @@ -3484,23 +3379,31 @@ ixgbe_handle_mod(void *context, int pending) } if (!cage_full) - return; + goto handle_mod_out; } err = hw->phy.ops.identify_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); - return; + goto handle_mod_out; } - err = hw->mac.ops.setup_sfp(hw); + if (hw->mac.type == ixgbe_mac_82598EB) + err = hw->phy.ops.reset(hw); + else + err = hw->mac.ops.setup_sfp(hw); + if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Setup failure - unsupported SFP+ module type.\n"); - return; + goto handle_mod_out; } - taskqueue_enqueue(adapter->tq, &adapter->msf_task); + GROUPTASK_ENQUEUE(&adapter->msf_task); + return; + +handle_mod_out: + adapter->sfp_reinit = 0; } /* ixgbe_handle_mod */ @@ -3508,13 +3411,17 @@ ixgbe_handle_mod(void *context, int pending) * ixgbe_handle_msf - Tasklet for MSF (multispeed fiber) interrupts ************************************************************************/ static void -ixgbe_handle_msf(void *context, int pending) +ixgbe_handle_msf(void *context) { - struct adapter *adapter = context; + if_ctx_t ctx = context; + struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; u32 autoneg; bool negotiate; + if (adapter->sfp_reinit != 1) + return; + /* get_supported_phy_layer will call hw->phy.ops.identify_sfp() */ adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); @@ -3525,18 +3432,21 @@ ixgbe_handle_msf(void *context, int pending) hw->mac.ops.setup_link(hw, autoneg, TRUE); /* Adjust media types shown in ifconfig */ - ifmedia_removeall(&adapter->media); - ixgbe_add_media_types(adapter); - ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); + ifmedia_removeall(adapter->media); + ixgbe_add_media_types(adapter->ctx); + ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); + + adapter->sfp_reinit = 0; } /* ixgbe_handle_msf */ /************************************************************************ * ixgbe_handle_phy - Tasklet for external PHY interrupts ************************************************************************/ static void -ixgbe_handle_phy(void *context, int pending) +ixgbe_handle_phy(void *context) { - struct adapter *adapter = context; + if_ctx_t ctx = context; + struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; int error; @@ -3549,28 +3459,18 @@ ixgbe_handle_phy(void *context, int pending) } /* ixgbe_handle_phy */ /************************************************************************ - * ixgbe_stop - Stop the hardware + * ixgbe_if_stop - Stop the hardware * * Disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. ************************************************************************/ static void -ixgbe_stop(void *arg) +ixgbe_if_stop(if_ctx_t ctx) { - struct ifnet *ifp; - struct adapter *adapter = arg; + struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; - ifp = adapter->ifp; - - mtx_assert(&adapter->core_mtx, MA_OWNED); - - INIT_DEBUGOUT("ixgbe_stop: begin\n"); - ixgbe_disable_intr(adapter); - callout_stop(&adapter->timer); - - /* Let the stack know...*/ - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + INIT_DEBUGOUT("ixgbe_if_stop: begin\n"); ixgbe_reset_hw(hw); hw->adapter_stopped = FALSE; @@ -3582,13 +3482,13 @@ ixgbe_stop(void *arg) /* Update the stack */ adapter->link_up = FALSE; - ixgbe_update_link_status(adapter); + ixgbe_if_update_admin_status(ctx); /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV); return; -} /* ixgbe_stop */ +} /* ixgbe_if_stop */ /************************************************************************ * ixgbe_update_link_status - Update OS on link state @@ -3598,10 +3498,10 @@ ixgbe_stop(void *arg) * a link interrupt. ************************************************************************/ static void -ixgbe_update_link_status(struct adapter *adapter) +ixgbe_if_update_admin_status(if_ctx_t ctx) { - struct ifnet *ifp = adapter->ifp; - device_t dev = adapter->dev; + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); if (adapter->link_up) { if (adapter->link_active == FALSE) { @@ -3614,7 +3514,9 @@ ixgbe_update_link_status(struct adapter *adapter) ixgbe_fc_enable(&adapter->hw); /* Update DMA coalescing config */ ixgbe_config_dmac(adapter); - if_link_state_change(ifp, LINK_STATE_UP); + /* should actually be negotiated value */ + iflib_link_state_change(ctx, LINK_STATE_UP, IF_Gbps(10)); + if (adapter->feat_en & IXGBE_FEATURE_SRIOV) ixgbe_ping_all_vfs(adapter); } @@ -3622,15 +3524,18 @@ ixgbe_update_link_status(struct adapter *adapter) if (adapter->link_active == TRUE) { if (bootverbose) device_printf(dev, "Link is Down\n"); - if_link_state_change(ifp, LINK_STATE_DOWN); + iflib_link_state_change(ctx, LINK_STATE_DOWN, 0); adapter->link_active = FALSE; if (adapter->feat_en & IXGBE_FEATURE_SRIOV) ixgbe_ping_all_vfs(adapter); } } - return; -} /* ixgbe_update_link_status */ + ixgbe_update_stats_counters(adapter); + + /* Re-enable link interrupts */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_LSC); +} /* ixgbe_if_update_admin_status */ /************************************************************************ * ixgbe_config_dmac - Configure DMA Coalescing @@ -3647,7 +3552,7 @@ ixgbe_config_dmac(struct adapter *adapter) if (dcfg->watchdog_timer ^ adapter->dmac || dcfg->link_speed ^ adapter->link_speed) { dcfg->watchdog_timer = adapter->dmac; - dcfg->fcoe_en = false; + dcfg->fcoe_en = FALSE; dcfg->link_speed = adapter->link_speed; dcfg->num_tcs = 1; @@ -3659,14 +3564,15 @@ ixgbe_config_dmac(struct adapter *adapter) } /* ixgbe_config_dmac */ /************************************************************************ - * ixgbe_enable_intr + * ixgbe_if_enable_intr ************************************************************************/ -static void -ixgbe_enable_intr(struct adapter *adapter) +void +ixgbe_if_enable_intr(if_ctx_t ctx) { - struct ixgbe_hw *hw = &adapter->hw; - struct ix_queue *que = adapter->queues; - u32 mask, fwsm; + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + struct ix_rx_queue *que = adapter->rx_queues; + u32 mask, fwsm; mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); @@ -3720,7 +3626,7 @@ ixgbe_enable_intr(struct adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); /* With MSI-X we use auto clear */ - if (adapter->msix_mem) { + if (adapter->intr_type == IFLIB_INTR_MSIX) { mask = IXGBE_EIMS_ENABLE_MASK; /* Don't autoclear Link */ mask &= ~IXGBE_EIMS_OTHER; @@ -3735,21 +3641,22 @@ ixgbe_enable_intr(struct adapter *adapter) * allow for handling the extended (beyond 32) MSI-X * vectors that can be used by 82599 */ - for (int i = 0; i < adapter->num_queues; i++, que++) + for (int i = 0; i < adapter->num_rx_queues; i++, que++) ixgbe_enable_queue(adapter, que->msix); IXGBE_WRITE_FLUSH(hw); - return; -} /* ixgbe_enable_intr */ +} /* ixgbe_if_enable_intr */ /************************************************************************ * ixgbe_disable_intr ************************************************************************/ static void -ixgbe_disable_intr(struct adapter *adapter) +ixgbe_if_disable_intr(if_ctx_t ctx) { - if (adapter->msix_mem) + struct adapter *adapter = iflib_get_softc(ctx); + + if (adapter->intr_type == IFLIB_INTR_MSIX) IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0); if (adapter->hw.mac.type == ixgbe_mac_82598EB) { IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); @@ -3760,53 +3667,101 @@ ixgbe_disable_intr(struct adapter *adapter) } IXGBE_WRITE_FLUSH(&adapter->hw); - return; -} /* ixgbe_disable_intr */ +} /* ixgbe_if_disable_intr */ /************************************************************************ - * ixgbe_legacy_irq - Legacy Interrupt Service routine + * ixgbe_if_rx_queue_intr_enable + ************************************************************************/ +static int +ixgbe_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_rx_queue *que = &adapter->rx_queues[rxqid]; + + ixgbe_enable_queue(adapter, que->rxr.me); + + return (0); +} /* ixgbe_if_rx_queue_intr_enable */ + +/************************************************************************ + * ixgbe_enable_queue ************************************************************************/ static void -ixgbe_legacy_irq(void *arg) +ixgbe_enable_queue(struct adapter *adapter, u32 vector) { - struct ix_queue *que = arg; - struct adapter *adapter = que->adapter; struct ixgbe_hw *hw = &adapter->hw; - struct ifnet *ifp = adapter->ifp; - struct tx_ring *txr = adapter->tx_rings; - bool more = false; - u32 eicr, eicr_mask; + u64 queue = (u64)(1 << vector); + u32 mask; - /* Silicon errata #26 on 82598 */ - IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_IRQ_CLEAR_MASK); + if (hw->mac.type == ixgbe_mac_82598EB) { + mask = (IXGBE_EIMS_RTX_QUEUE & queue); + IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); + } else { + mask = (queue & 0xFFFFFFFF); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); + mask = (queue >> 32); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); + } +} /* ixgbe_enable_queue */ + +/************************************************************************ + * ixgbe_disable_queue + ************************************************************************/ +static void +ixgbe_disable_queue(struct adapter *adapter, u32 vector) +{ + struct ixgbe_hw *hw = &adapter->hw; + u64 queue = (u64)(1 << vector); + u32 mask; + + if (hw->mac.type == ixgbe_mac_82598EB) { + mask = (IXGBE_EIMS_RTX_QUEUE & queue); + IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask); + } else { + mask = (queue & 0xFFFFFFFF); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask); + mask = (queue >> 32); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask); + } +} /* ixgbe_disable_queue */ + +/************************************************************************ + * ixgbe_intr - Legacy Interrupt Service Routine + ************************************************************************/ +int +ixgbe_intr(void *arg) +{ + struct adapter *adapter = arg; + struct ix_rx_queue *que = adapter->rx_queues; + struct ixgbe_hw *hw = &adapter->hw; + if_ctx_t ctx = adapter->ctx; + u32 eicr, eicr_mask; eicr = IXGBE_READ_REG(hw, IXGBE_EICR); ++que->irqs; if (eicr == 0) { - ixgbe_enable_intr(adapter); - return; - } - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - more = ixgbe_rxeof(que); - - IXGBE_TX_LOCK(txr); - ixgbe_txeof(txr); - if (!ixgbe_ring_empty(ifp, txr->br)) - ixgbe_start_locked(ifp, txr); - IXGBE_TX_UNLOCK(txr); + ixgbe_if_enable_intr(ctx); + return (FILTER_HANDLED); } /* Check for fan failure */ - if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) { - ixgbe_check_fan_failure(adapter, eicr, true); + if ((hw->device_id == IXGBE_DEV_ID_82598AT) && + (eicr & IXGBE_EICR_GPI_SDP1)) { + device_printf(adapter->dev, + "\nCRITICAL: FAN FAILURE!! REPLACE IMMEDIATELY!!\n"); IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); } /* Link status change */ - if (eicr & IXGBE_EICR_LSC) - taskqueue_enqueue(adapter->tq, &adapter->link_task); + if (eicr & IXGBE_EICR_LSC) { + IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC); + iflib_admin_intr_deferred(ctx); + } if (ixgbe_is_sfp(hw)) { /* Pluggable optics-related interrupt */ @@ -3817,109 +3772,55 @@ ixgbe_legacy_irq(void *arg) if (eicr & eicr_mask) { IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr_mask); - taskqueue_enqueue(adapter->tq, &adapter->mod_task); + GROUPTASK_ENQUEUE(&adapter->mod_task); } if ((hw->mac.type == ixgbe_mac_82599EB) && (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); - taskqueue_enqueue(adapter->tq, &adapter->msf_task); + if (atomic_cmpset_acq_int(&adapter->sfp_reinit, 0, 1)) + GROUPTASK_ENQUEUE(&adapter->msf_task); } } /* External PHY interrupt */ if ((hw->phy.type == ixgbe_phy_x550em_ext_t) && (eicr & IXGBE_EICR_GPI_SDP0_X540)) - taskqueue_enqueue(adapter->tq, &adapter->phy_task); + GROUPTASK_ENQUEUE(&adapter->phy_task); - if (more) - taskqueue_enqueue(que->tq, &que->que_task); - else - ixgbe_enable_intr(adapter); - - return; -} /* ixgbe_legacy_irq */ + return (FILTER_SCHEDULE_THREAD); +} /* ixgbe_intr */ /************************************************************************ * ixgbe_free_pci_resources ************************************************************************/ static void -ixgbe_free_pci_resources(struct adapter *adapter) +ixgbe_free_pci_resources(if_ctx_t ctx) { - struct ix_queue *que = adapter->queues; - device_t dev = adapter->dev; - int rid, memrid; + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_rx_queue *que = adapter->rx_queues; + device_t dev = iflib_get_dev(ctx); - if (adapter->hw.mac.type == ixgbe_mac_82598EB) - memrid = PCIR_BAR(MSIX_82598_BAR); - else - memrid = PCIR_BAR(MSIX_82599_BAR); + /* Release all msix queue resources */ + if (adapter->intr_type == IFLIB_INTR_MSIX) + iflib_irq_free(ctx, &adapter->irq); - /* - * There is a slight possibility of a failure mode - * in attach that will result in entering this function - * before interrupt resources have been initialized, and - * in that case we do not want to execute the loops below - * We can detect this reliably by the state of the adapter - * res pointer. - */ - if (adapter->res == NULL) - goto mem; - - /* - * Release all msix queue resources: - */ - for (int i = 0; i < adapter->num_queues; i++, que++) { - rid = que->msix + 1; - if (que->tag != NULL) { - bus_teardown_intr(dev, que->res, que->tag); - que->tag = NULL; + if (que != NULL) { + for (int i = 0; i < adapter->num_rx_queues; i++, que++) { + iflib_irq_free(ctx, &que->que_irq); } - if (que->res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); } - - if (adapter->tag != NULL) { - bus_teardown_intr(dev, adapter->res, adapter->tag); - adapter->tag = NULL; - } - - /* Clean the Legacy or Link interrupt last */ - if (adapter->res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, adapter->link_rid, - adapter->res); - -mem: - if ((adapter->feat_en & IXGBE_FEATURE_MSI) || - (adapter->feat_en & IXGBE_FEATURE_MSIX)) - pci_release_msi(dev); - - if (adapter->msix_mem != NULL) - bus_release_resource(dev, SYS_RES_MEMORY, memrid, - adapter->msix_mem); - + /* + * Free link/admin interrupt + */ if (adapter->pci_mem != NULL) - bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), - adapter->pci_mem); + bus_release_resource(dev, SYS_RES_MEMORY, + PCIR_BAR(0), adapter->pci_mem); - return; } /* ixgbe_free_pci_resources */ -/************************************************************************ - * ixgbe_set_sysctl_value - ************************************************************************/ -static void -ixgbe_set_sysctl_value(struct adapter *adapter, const char *name, - const char *description, int *limit, int value) -{ - *limit = value; - SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), - OID_AUTO, name, CTLFLAG_RW, limit, value, description); -} /* ixgbe_set_sysctl_value */ - /************************************************************************ * ixgbe_sysctl_flowcntl * @@ -3962,12 +3863,12 @@ ixgbe_set_flowcntl(struct adapter *adapter, int fc) case ixgbe_fc_tx_pause: case ixgbe_fc_full: adapter->hw.fc.requested_mode = fc; - if (adapter->num_queues > 1) + if (adapter->num_rx_queues > 1) ixgbe_disable_rx_drop(adapter); break; case ixgbe_fc_none: adapter->hw.fc.requested_mode = ixgbe_fc_none; - if (adapter->num_queues > 1) + if (adapter->num_rx_queues > 1) ixgbe_enable_rx_drop(adapter); break; default: @@ -3997,8 +3898,8 @@ ixgbe_enable_rx_drop(struct adapter *adapter) struct rx_ring *rxr; u32 srrctl; - for (int i = 0; i < adapter->num_queues; i++) { - rxr = &adapter->rx_rings[i]; + for (int i = 0; i < adapter->num_rx_queues; i++) { + rxr = &adapter->rx_queues[i].rxr; srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); srrctl |= IXGBE_SRRCTL_DROP_EN; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); @@ -4007,8 +3908,8 @@ ixgbe_enable_rx_drop(struct adapter *adapter) /* enable drop for each vf */ for (int i = 0; i < adapter->num_vfs; i++) { IXGBE_WRITE_REG(hw, IXGBE_QDE, - (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT) | - IXGBE_QDE_ENABLE)); + (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT) | + IXGBE_QDE_ENABLE)); } } /* ixgbe_enable_rx_drop */ @@ -4022,8 +3923,8 @@ ixgbe_disable_rx_drop(struct adapter *adapter) struct rx_ring *rxr; u32 srrctl; - for (int i = 0; i < adapter->num_queues; i++) { - rxr = &adapter->rx_rings[i]; + for (int i = 0; i < adapter->num_rx_queues; i++) { + rxr = &adapter->rx_queues[i].rxr; srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); srrctl &= ~IXGBE_SRRCTL_DROP_EN; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); @@ -4069,7 +3970,7 @@ ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS) static int ixgbe_set_advertise(struct adapter *adapter, int advertise) { - device_t dev; + device_t dev = iflib_get_dev(adapter->ctx); struct ixgbe_hw *hw; ixgbe_link_speed speed = 0; ixgbe_link_speed link_caps = 0; @@ -4080,7 +3981,6 @@ ixgbe_set_advertise(struct adapter *adapter, int advertise) if (adapter->advertise == advertise) /* no change */ return (0); - dev = adapter->dev; hw = &adapter->hw; /* No speed changes for backplane media */ @@ -4199,12 +4099,12 @@ static int ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; - struct ifnet *ifp = adapter->ifp; + struct ifnet *ifp = iflib_get_ifp(adapter->ctx); int error; - u32 newval; + u16 newval; newval = adapter->dmac; - error = sysctl_handle_int(oidp, &newval, 0, req); + error = sysctl_handle_16(oidp, &newval, 0, req); if ((error) || (req->newptr == NULL)) return (error); @@ -4235,7 +4135,7 @@ ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS) /* Re-initialize hardware if it's already running */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) - ixgbe_init(adapter); + ifp->if_init(ifp); return (0); } /* ixgbe_sysctl_dmac */ @@ -4339,7 +4239,7 @@ ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS) new_wufc = adapter->wufc; - error = sysctl_handle_int(oidp, &new_wufc, 0, req); + error = sysctl_handle_32(oidp, &new_wufc, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (new_wufc == adapter->wufc) @@ -4425,14 +4325,14 @@ ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS) u16 reg; if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { - device_printf(adapter->dev, + device_printf(iflib_get_dev(adapter->ctx), "Device has no supported external thermal sensor.\n"); return (ENODEV); } if (hw->phy.ops.read_reg(hw, IXGBE_PHY_CURRENT_TEMP, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®)) { - device_printf(adapter->dev, + device_printf(iflib_get_dev(adapter->ctx), "Error reading from PHY's current temperature register\n"); return (EAGAIN); } @@ -4440,7 +4340,7 @@ ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS) /* Shift temp for output */ reg = reg >> 8; - return (sysctl_handle_int(oidp, NULL, reg, req)); + return (sysctl_handle_16(oidp, NULL, reg, req)); } /* ixgbe_sysctl_phy_temp */ /************************************************************************ @@ -4457,14 +4357,14 @@ ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS) u16 reg; if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { - device_printf(adapter->dev, + device_printf(iflib_get_dev(adapter->ctx), "Device has no supported external thermal sensor.\n"); return (ENODEV); } if (hw->phy.ops.read_reg(hw, IXGBE_PHY_OVERTEMP_STATUS, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®)) { - device_printf(adapter->dev, + device_printf(iflib_get_dev(adapter->ctx), "Error reading from PHY's temperature status register\n"); return (EAGAIN); } @@ -4472,7 +4372,7 @@ ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS) /* Get occurrence bit */ reg = !!(reg & 0x4000); - return (sysctl_handle_int(oidp, 0, reg, req)); + return (sysctl_handle_16(oidp, 0, reg, req)); } /* ixgbe_sysctl_phy_overtemp_occurred */ /************************************************************************ @@ -4489,6 +4389,7 @@ ixgbe_sysctl_eee_state(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; device_t dev = adapter->dev; + struct ifnet *ifp = iflib_get_ifp(adapter->ctx); int curr_eee, new_eee, error = 0; s32 retval; @@ -4517,7 +4418,7 @@ ixgbe_sysctl_eee_state(SYSCTL_HANDLER_ARGS) } /* Restart auto-neg */ - ixgbe_init(adapter); + ifp->if_init(ifp); device_printf(dev, "New EEE state: %d\n", new_eee); @@ -4540,8 +4441,7 @@ ixgbe_init_device_features(struct adapter *adapter) | IXGBE_FEATURE_RSS | IXGBE_FEATURE_MSI | IXGBE_FEATURE_MSIX - | IXGBE_FEATURE_LEGACY_IRQ - | IXGBE_FEATURE_LEGACY_TX; + | IXGBE_FEATURE_LEGACY_IRQ; /* Set capabilities first... */ switch (adapter->hw.mac.type) { @@ -4612,10 +4512,6 @@ ixgbe_init_device_features(struct adapter *adapter) else device_printf(adapter->dev, "Device does not support Flow Director. Leaving disabled."); } - /* Legacy (single queue) transmit */ - if ((adapter->feat_cap & IXGBE_FEATURE_LEGACY_TX) && - ixgbe_enable_legacy_tx) - adapter->feat_en |= IXGBE_FEATURE_LEGACY_TX; /* * Message Signal Interrupts - Extended (MSI-X) * Normal MSI is only enabled if MSI-X calls fail. @@ -4636,222 +4532,6 @@ ixgbe_init_device_features(struct adapter *adapter) } } /* ixgbe_init_device_features */ -/************************************************************************ - * ixgbe_probe - Device identification routine - * - * Determines if the driver should be loaded on - * adapter based on its PCI vendor/device ID. - * - * return BUS_PROBE_DEFAULT on success, positive on failure - ************************************************************************/ -static int -ixgbe_probe(device_t dev) -{ - ixgbe_vendor_info_t *ent; - - u16 pci_vendor_id = 0; - u16 pci_device_id = 0; - u16 pci_subvendor_id = 0; - u16 pci_subdevice_id = 0; - char adapter_name[256]; - - INIT_DEBUGOUT("ixgbe_probe: begin"); - - pci_vendor_id = pci_get_vendor(dev); - if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID) - return (ENXIO); - - pci_device_id = pci_get_device(dev); - pci_subvendor_id = pci_get_subvendor(dev); - pci_subdevice_id = pci_get_subdevice(dev); - - ent = ixgbe_vendor_info_array; - while (ent->vendor_id != 0) { - if ((pci_vendor_id == ent->vendor_id) && - (pci_device_id == ent->device_id) && - ((pci_subvendor_id == ent->subvendor_id) || - (ent->subvendor_id == 0)) && - ((pci_subdevice_id == ent->subdevice_id) || - (ent->subdevice_id == 0))) { - sprintf(adapter_name, "%s, Version - %s", - ixgbe_strings[ent->index], - ixgbe_driver_version); - device_set_desc_copy(dev, adapter_name); - ++ixgbe_total_ports; - return (BUS_PROBE_DEFAULT); - } - ent++; - } - - return (ENXIO); -} /* ixgbe_probe */ - - -/************************************************************************ - * ixgbe_ioctl - Ioctl entry point - * - * Called when the user wants to configure the interface. - * - * return 0 on success, positive on failure - ************************************************************************/ -static int -ixgbe_ioctl(struct ifnet *ifp, u_long command, caddr_t data) -{ - struct adapter *adapter = ifp->if_softc; - struct ifreq *ifr = (struct ifreq *) data; -#if defined(INET) || defined(INET6) - struct ifaddr *ifa = (struct ifaddr *)data; -#endif - int error = 0; - bool avoid_reset = FALSE; - - switch (command) { - case SIOCSIFADDR: -#ifdef INET - if (ifa->ifa_addr->sa_family == AF_INET) - avoid_reset = TRUE; -#endif -#ifdef INET6 - if (ifa->ifa_addr->sa_family == AF_INET6) - avoid_reset = TRUE; -#endif - /* - * Calling init results in link renegotiation, - * so we avoid doing it when possible. - */ - if (avoid_reset) { - ifp->if_flags |= IFF_UP; - if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) - ixgbe_init(adapter); -#ifdef INET - if (!(ifp->if_flags & IFF_NOARP)) - arp_ifinit(ifp, ifa); -#endif - } else - error = ether_ioctl(ifp, command, data); - break; - case SIOCSIFMTU: - IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); - if (ifr->ifr_mtu > IXGBE_MAX_MTU) { - error = EINVAL; - } else { - IXGBE_CORE_LOCK(adapter); - ifp->if_mtu = ifr->ifr_mtu; - adapter->max_frame_size = ifp->if_mtu + IXGBE_MTU_HDR; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - ixgbe_init_locked(adapter); - ixgbe_recalculate_max_frame(adapter); - IXGBE_CORE_UNLOCK(adapter); - } - break; - case SIOCSIFFLAGS: - IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); - IXGBE_CORE_LOCK(adapter); - if (ifp->if_flags & IFF_UP) { - if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { - if ((ifp->if_flags ^ adapter->if_flags) & - (IFF_PROMISC | IFF_ALLMULTI)) { - ixgbe_set_promisc(adapter); - } - } else - ixgbe_init_locked(adapter); - } else - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - ixgbe_stop(adapter); - adapter->if_flags = ifp->if_flags; - IXGBE_CORE_UNLOCK(adapter); - break; - case SIOCADDMULTI: - case SIOCDELMULTI: - IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI"); - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - IXGBE_CORE_LOCK(adapter); - ixgbe_disable_intr(adapter); - ixgbe_set_multi(adapter); - ixgbe_enable_intr(adapter); - IXGBE_CORE_UNLOCK(adapter); - } - break; - case SIOCSIFMEDIA: - case SIOCGIFMEDIA: - IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); - error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); - break; - case SIOCSIFCAP: - { - IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); - - int mask = ifr->ifr_reqcap ^ ifp->if_capenable; - - if (!mask) - break; - - /* HW cannot turn these on/off separately */ - if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { - ifp->if_capenable ^= IFCAP_RXCSUM; - ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; - } - if (mask & IFCAP_TXCSUM) - ifp->if_capenable ^= IFCAP_TXCSUM; - if (mask & IFCAP_TXCSUM_IPV6) - ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; - if (mask & IFCAP_TSO4) - ifp->if_capenable ^= IFCAP_TSO4; - if (mask & IFCAP_TSO6) - ifp->if_capenable ^= IFCAP_TSO6; - if (mask & IFCAP_LRO) - ifp->if_capenable ^= IFCAP_LRO; - if (mask & IFCAP_VLAN_HWTAGGING) - ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; - if (mask & IFCAP_VLAN_HWFILTER) - ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; - if (mask & IFCAP_VLAN_HWTSO) - ifp->if_capenable ^= IFCAP_VLAN_HWTSO; - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - IXGBE_CORE_LOCK(adapter); - ixgbe_init_locked(adapter); - IXGBE_CORE_UNLOCK(adapter); - } - VLAN_CAPABILITIES(ifp); - break; - } -#if __FreeBSD_version >= 1100036 - case SIOCGI2C: - { - struct ixgbe_hw *hw = &adapter->hw; - struct ifi2creq i2c; - int i; - - IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)"); - error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); - if (error != 0) - break; - if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { - error = EINVAL; - break; - } - if (i2c.len > sizeof(i2c.data)) { - error = EINVAL; - break; - } - - for (i = 0; i < i2c.len; i++) - hw->phy.ops.read_i2c_byte(hw, i2c.offset + i, - i2c.dev_addr, &i2c.data[i]); - error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); - break; - } -#endif - default: - IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command); - error = ether_ioctl(ifp, command, data); - break; - } - - return (error); -} /* ixgbe_ioctl */ - /************************************************************************ * ixgbe_check_fan_failure ************************************************************************/ @@ -4867,406 +4547,3 @@ ixgbe_check_fan_failure(struct adapter *adapter, u32 reg, bool in_interrupt) device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! REPLACE IMMEDIATELY!!\n"); } /* ixgbe_check_fan_failure */ -/************************************************************************ - * ixgbe_handle_que - ************************************************************************/ -static void -ixgbe_handle_que(void *context, int pending) -{ - struct ix_queue *que = context; - struct adapter *adapter = que->adapter; - struct tx_ring *txr = que->txr; - struct ifnet *ifp = adapter->ifp; - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - ixgbe_rxeof(que); - IXGBE_TX_LOCK(txr); - ixgbe_txeof(txr); - if (!ixgbe_ring_empty(ifp, txr->br)) - ixgbe_start_locked(ifp, txr); - IXGBE_TX_UNLOCK(txr); - } - - /* Re-enable this interrupt */ - if (que->res != NULL) - ixgbe_enable_queue(adapter, que->msix); - else - ixgbe_enable_intr(adapter); - - return; -} /* ixgbe_handle_que */ - - - -/************************************************************************ - * ixgbe_allocate_legacy - Setup the Legacy or MSI Interrupt handler - ************************************************************************/ -static int -ixgbe_allocate_legacy(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct ix_queue *que = adapter->queues; - struct tx_ring *txr = adapter->tx_rings; - int error; - - /* We allocate a single interrupt resource */ - adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, - &adapter->link_rid, RF_SHAREABLE | RF_ACTIVE); - if (adapter->res == NULL) { - device_printf(dev, - "Unable to allocate bus resource: interrupt\n"); - return (ENXIO); - } - - /* - * Try allocating a fast interrupt and the associated deferred - * processing contexts. - */ - if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) - TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); - TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que); - que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT, - taskqueue_thread_enqueue, &que->tq); - taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq", - device_get_nameunit(adapter->dev)); - - /* Tasklets for Link, SFP and Multispeed Fiber */ - TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter); - TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter); - TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter); - TASK_INIT(&adapter->phy_task, 0, ixgbe_handle_phy, adapter); - if (adapter->feat_en & IXGBE_FEATURE_FDIR) - TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter); - adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT, - taskqueue_thread_enqueue, &adapter->tq); - taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq", - device_get_nameunit(adapter->dev)); - - if ((error = bus_setup_intr(dev, adapter->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq, que, - &adapter->tag)) != 0) { - device_printf(dev, - "Failed to register fast interrupt handler: %d\n", error); - taskqueue_free(que->tq); - taskqueue_free(adapter->tq); - que->tq = NULL; - adapter->tq = NULL; - - return (error); - } - /* For simplicity in the handlers */ - adapter->active_queues = IXGBE_EIMS_ENABLE_MASK; - - return (0); -} /* ixgbe_allocate_legacy */ - - -/************************************************************************ - * ixgbe_allocate_msix - Setup MSI-X Interrupt resources and handlers - ************************************************************************/ -static int -ixgbe_allocate_msix(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct ix_queue *que = adapter->queues; - struct tx_ring *txr = adapter->tx_rings; - int error, rid, vector = 0; - int cpu_id = 0; - unsigned int rss_buckets = 0; - cpuset_t cpu_mask; - - /* - * If we're doing RSS, the number of queues needs to - * match the number of RSS buckets that are configured. - * - * + If there's more queues than RSS buckets, we'll end - * up with queues that get no traffic. - * - * + If there's more RSS buckets than queues, we'll end - * up having multiple RSS buckets map to the same queue, - * so there'll be some contention. - */ - rss_buckets = rss_getnumbuckets(); - if ((adapter->feat_en & IXGBE_FEATURE_RSS) && - (adapter->num_queues != rss_buckets)) { - device_printf(dev, "%s: number of queues (%d) != number of RSS buckets (%d); performance will be impacted.\n", - __func__, adapter->num_queues, rss_buckets); - } - - for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) { - rid = vector + 1; - que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, - RF_SHAREABLE | RF_ACTIVE); - if (que->res == NULL) { - device_printf(dev, "Unable to allocate bus resource: que interrupt [%d]\n", - vector); - return (ENXIO); - } - /* Set the handler function */ - error = bus_setup_intr(dev, que->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_msix_que, que, - &que->tag); - if (error) { - que->res = NULL; - device_printf(dev, "Failed to register QUE handler"); - return (error); - } -#if __FreeBSD_version >= 800504 - bus_describe_intr(dev, que->res, que->tag, "q%d", i); -#endif - que->msix = vector; - adapter->active_queues |= (u64)(1 << que->msix); - - if (adapter->feat_en & IXGBE_FEATURE_RSS) { - /* - * The queue ID is used as the RSS layer bucket ID. - * We look up the queue ID -> RSS CPU ID and select - * that. - */ - cpu_id = rss_getcpu(i % rss_buckets); - CPU_SETOF(cpu_id, &cpu_mask); - } else { - /* - * Bind the MSI-X vector, and thus the - * rings to the corresponding CPU. - * - * This just happens to match the default RSS - * round-robin bucket -> queue -> CPU allocation. - */ - if (adapter->num_queues > 1) - cpu_id = i; - } - if (adapter->num_queues > 1) - bus_bind_intr(dev, que->res, cpu_id); -#ifdef IXGBE_DEBUG - if (adapter->feat_en & IXGBE_FEATURE_RSS) - device_printf(dev, "Bound RSS bucket %d to CPU %d\n", i, - cpu_id); - else - device_printf(dev, "Bound queue %d to cpu %d\n", i, - cpu_id); -#endif /* IXGBE_DEBUG */ - - - if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) - TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, - txr); - TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que); - que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT, - taskqueue_thread_enqueue, &que->tq); -#if __FreeBSD_version < 1100000 - taskqueue_start_threads(&que->tq, 1, PI_NET, "%s:q%d", - device_get_nameunit(adapter->dev), i); -#else - if (adapter->feat_en & IXGBE_FEATURE_RSS) - taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, - &cpu_mask, "%s (bucket %d)", - device_get_nameunit(adapter->dev), cpu_id); - else - taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, - NULL, "%s:q%d", device_get_nameunit(adapter->dev), - i); -#endif - } - - /* and Link */ - adapter->link_rid = vector + 1; - adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, - &adapter->link_rid, RF_SHAREABLE | RF_ACTIVE); - if (!adapter->res) { - device_printf(dev, - "Unable to allocate bus resource: Link interrupt [%d]\n", - adapter->link_rid); - return (ENXIO); - } - /* Set the link handler function */ - error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, - NULL, ixgbe_msix_link, adapter, &adapter->tag); - if (error) { - adapter->res = NULL; - device_printf(dev, "Failed to register LINK handler"); - return (error); - } -#if __FreeBSD_version >= 800504 - bus_describe_intr(dev, adapter->res, adapter->tag, "link"); -#endif - adapter->vector = vector; - /* Tasklets for Link, SFP and Multispeed Fiber */ - TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter); - TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter); - TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter); - if (adapter->feat_cap & IXGBE_FEATURE_SRIOV) - TASK_INIT(&adapter->mbx_task, 0, ixgbe_handle_mbx, adapter); - TASK_INIT(&adapter->phy_task, 0, ixgbe_handle_phy, adapter); - if (adapter->feat_en & IXGBE_FEATURE_FDIR) - TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter); - adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT, - taskqueue_thread_enqueue, &adapter->tq); - taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq", - device_get_nameunit(adapter->dev)); - - return (0); -} /* ixgbe_allocate_msix */ - -/************************************************************************ - * ixgbe_configure_interrupts - * - * Setup MSI-X, MSI, or legacy interrupts (in that order). - * This will also depend on user settings. - ************************************************************************/ -static int -ixgbe_configure_interrupts(struct adapter *adapter) -{ - device_t dev = adapter->dev; - int rid, want, queues, msgs; - - /* Default to 1 queue if MSI-X setup fails */ - adapter->num_queues = 1; - - /* Override by tuneable */ - if (!(adapter->feat_cap & IXGBE_FEATURE_MSIX)) - goto msi; - - /* First try MSI-X */ - msgs = pci_msix_count(dev); - if (msgs == 0) - goto msi; - rid = PCIR_BAR(MSIX_82598_BAR); - adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, - RF_ACTIVE); - if (adapter->msix_mem == NULL) { - rid += 4; /* 82599 maps in higher BAR */ - adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, - &rid, RF_ACTIVE); - } - if (adapter->msix_mem == NULL) { - /* May not be enabled */ - device_printf(adapter->dev, "Unable to map MSI-X table.\n"); - goto msi; - } - - /* Figure out a reasonable auto config value */ - queues = min(mp_ncpus, msgs - 1); - /* If we're doing RSS, clamp at the number of RSS buckets */ - if (adapter->feat_en & IXGBE_FEATURE_RSS) - queues = min(queues, rss_getnumbuckets()); - if (ixgbe_num_queues > queues) { - device_printf(adapter->dev, "ixgbe_num_queues (%d) is too large, using reduced amount (%d).\n", ixgbe_num_queues, queues); - ixgbe_num_queues = queues; - } - - if (ixgbe_num_queues != 0) - queues = ixgbe_num_queues; - /* Set max queues to 8 when autoconfiguring */ - else - queues = min(queues, 8); - - /* reflect correct sysctl value */ - ixgbe_num_queues = queues; - - /* - * Want one vector (RX/TX pair) per queue - * plus an additional for Link. - */ - want = queues + 1; - if (msgs >= want) - msgs = want; - else { - device_printf(adapter->dev, "MSI-X Configuration Problem, %d vectors but %d queues wanted!\n", - msgs, want); - goto msi; - } - if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { - device_printf(adapter->dev, - "Using MSI-X interrupts with %d vectors\n", msgs); - adapter->num_queues = queues; - adapter->feat_en |= IXGBE_FEATURE_MSIX; - return (0); - } - /* - * MSI-X allocation failed or provided us with - * less vectors than needed. Free MSI-X resources - * and we'll try enabling MSI. - */ - pci_release_msi(dev); - -msi: - /* Without MSI-X, some features are no longer supported */ - adapter->feat_cap &= ~IXGBE_FEATURE_RSS; - adapter->feat_en &= ~IXGBE_FEATURE_RSS; - adapter->feat_cap &= ~IXGBE_FEATURE_SRIOV; - adapter->feat_en &= ~IXGBE_FEATURE_SRIOV; - - if (adapter->msix_mem != NULL) { - bus_release_resource(dev, SYS_RES_MEMORY, rid, - adapter->msix_mem); - adapter->msix_mem = NULL; - } - msgs = 1; - if (pci_alloc_msi(dev, &msgs) == 0) { - adapter->feat_en |= IXGBE_FEATURE_MSI; - adapter->link_rid = 1; - device_printf(adapter->dev, "Using an MSI interrupt\n"); - return (0); - } - - if (!(adapter->feat_cap & IXGBE_FEATURE_LEGACY_IRQ)) { - device_printf(adapter->dev, - "Device does not support legacy interrupts.\n"); - return 1; - } - - adapter->feat_en |= IXGBE_FEATURE_LEGACY_IRQ; - adapter->link_rid = 0; - device_printf(adapter->dev, "Using a Legacy interrupt\n"); - - return (0); -} /* ixgbe_configure_interrupts */ - - -/************************************************************************ - * ixgbe_handle_link - Tasklet for MSI-X Link interrupts - * - * Done outside of interrupt context since the driver might sleep - ************************************************************************/ -static void -ixgbe_handle_link(void *context, int pending) -{ - struct adapter *adapter = context; - struct ixgbe_hw *hw = &adapter->hw; - - ixgbe_check_link(hw, &adapter->link_speed, &adapter->link_up, 0); - ixgbe_update_link_status(adapter); - - /* Re-enable link interrupts */ - IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_LSC); -} /* ixgbe_handle_link */ - -/************************************************************************ - * ixgbe_rearm_queues - ************************************************************************/ -static void -ixgbe_rearm_queues(struct adapter *adapter, u64 queues) -{ - u32 mask; - - switch (adapter->hw.mac.type) { - case ixgbe_mac_82598EB: - mask = (IXGBE_EIMS_RTX_QUEUE & queues); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask); - break; - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - case ixgbe_mac_X550EM_a: - mask = (queues & 0xFFFFFFFF); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask); - mask = (queues >> 32); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask); - break; - default: - break; - } -} /* ixgbe_rearm_queues */ - diff --git a/sys/dev/ixgbe/if_ixv.c b/sys/dev/ixgbe/if_ixv.c index 58425a485e8..7114b51b3bb 100644 --- a/sys/dev/ixgbe/if_ixv.c +++ b/sys/dev/ixgbe/if_ixv.c @@ -33,17 +33,19 @@ /*$FreeBSD$*/ -#ifndef IXGBE_STANDALONE_BUILD #include "opt_inet.h" #include "opt_inet6.h" -#endif #include "ixgbe.h" +#include "ifdi_if.h" + +#include +#include /************************************************************************ * Driver version ************************************************************************/ -char ixv_driver_version[] = "1.5.13-k"; +char ixv_driver_version[] = "2.0.0-k"; /************************************************************************ * PCI Device ID Table @@ -54,89 +56,84 @@ char ixv_driver_version[] = "1.5.13-k"; * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } ************************************************************************/ -static ixgbe_vendor_info_t ixv_vendor_info_array[] = +static pci_vendor_info_t ixv_vendor_info_array[] = { - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_VF, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_VF, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550_VF, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_VF, 0, 0, 0}, - {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_VF, 0, 0, 0}, + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), + PVID(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_VF, "Intel(R) PRO/10GbE Virtual Function Network Driver"), /* required last entry */ - {0, 0, 0, 0, 0} -}; - -/************************************************************************ - * Table of branding strings - ************************************************************************/ -static char *ixv_strings[] = { - "Intel(R) PRO/10GbE Virtual Function Network Driver" +PVID_END }; /************************************************************************ * Function prototypes ************************************************************************/ -static int ixv_probe(device_t); -static int ixv_attach(device_t); -static int ixv_detach(device_t); -static int ixv_shutdown(device_t); -static int ixv_ioctl(struct ifnet *, u_long, caddr_t); -static void ixv_init(void *); -static void ixv_init_locked(struct adapter *); -static void ixv_stop(void *); -static uint64_t ixv_get_counter(struct ifnet *, ift_counter); +static void *ixv_register(device_t dev); +static int ixv_if_attach_pre(if_ctx_t ctx); +static int ixv_if_attach_post(if_ctx_t ctx); +static int ixv_if_detach(if_ctx_t ctx); + +static int ixv_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid); +static int ixv_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); +static int ixv_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets); +static void ixv_if_queues_free(if_ctx_t ctx); +static void ixv_identify_hardware(if_ctx_t ctx); static void ixv_init_device_features(struct adapter *); -static void ixv_media_status(struct ifnet *, struct ifmediareq *); -static int ixv_media_change(struct ifnet *); -static int ixv_allocate_pci_resources(struct adapter *); -static int ixv_allocate_msix(struct adapter *); -static int ixv_configure_interrupts(struct adapter *); -static void ixv_free_pci_resources(struct adapter *); -static void ixv_local_timer(void *); -static void ixv_setup_interface(device_t, struct adapter *); +static int ixv_allocate_pci_resources(if_ctx_t ctx); +static void ixv_free_pci_resources(if_ctx_t ctx); +static int ixv_setup_interface(if_ctx_t ctx); +static void ixv_if_media_status(if_ctx_t , struct ifmediareq *); +static int ixv_if_media_change(if_ctx_t ctx); +static void ixv_if_update_admin_status(if_ctx_t ctx); +static int ixv_if_msix_intr_assign(if_ctx_t ctx, int msix); + +static int ixv_if_mtu_set(if_ctx_t ctx, uint32_t mtu); +static void ixv_if_init(if_ctx_t ctx); +static void ixv_if_local_timer(if_ctx_t ctx, uint16_t qid); +static void ixv_if_stop(if_ctx_t ctx); static int ixv_negotiate_api(struct adapter *); -static void ixv_initialize_transmit_units(struct adapter *); -static void ixv_initialize_receive_units(struct adapter *); +static void ixv_initialize_transmit_units(if_ctx_t ctx); +static void ixv_initialize_receive_units(if_ctx_t ctx); static void ixv_initialize_rss_mapping(struct adapter *); -static void ixv_check_link(struct adapter *); -static void ixv_enable_intr(struct adapter *); -static void ixv_disable_intr(struct adapter *); -static void ixv_set_multi(struct adapter *); -static void ixv_update_link_status(struct adapter *); -static int ixv_sysctl_debug(SYSCTL_HANDLER_ARGS); -static void ixv_set_ivar(struct adapter *, u8, u8, s8); +static void ixv_setup_vlan_support(if_ctx_t ctx); static void ixv_configure_ivars(struct adapter *); -static u8 *ixv_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); +static void ixv_if_enable_intr(if_ctx_t ctx); +static void ixv_if_disable_intr(if_ctx_t ctx); +static void ixv_if_multi_set(if_ctx_t ctx); -static void ixv_setup_vlan_support(struct adapter *); -static void ixv_register_vlan(void *, struct ifnet *, u16); -static void ixv_unregister_vlan(void *, struct ifnet *, u16); +static void ixv_if_register_vlan(if_ctx_t, u16); +static void ixv_if_unregister_vlan(if_ctx_t, u16); + +static uint64_t ixv_if_get_counter(if_ctx_t, ift_counter); static void ixv_save_stats(struct adapter *); static void ixv_init_stats(struct adapter *); static void ixv_update_stats(struct adapter *); -static void ixv_add_stats_sysctls(struct adapter *); -static void ixv_set_sysctl_value(struct adapter *, const char *, - const char *, int *, int); +static void ixv_add_stats_sysctls(struct adapter *adapter); + +static int ixv_sysctl_debug(SYSCTL_HANDLER_ARGS); +static void ixv_set_ivar(struct adapter *, u8, u8, s8); + +static u8 *ixv_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); /* The MSI-X Interrupt handlers */ -static void ixv_msix_que(void *); -static void ixv_msix_mbx(void *); - -/* Deferred interrupt tasklets */ -static void ixv_handle_que(void *, int); -static void ixv_handle_link(void *, int); +static int ixv_msix_que(void *); +static int ixv_msix_mbx(void *); /************************************************************************ * FreeBSD Device Interface Entry Points ************************************************************************/ static device_method_t ixv_methods[] = { /* Device interface */ - DEVMETHOD(device_probe, ixv_probe), - DEVMETHOD(device_attach, ixv_attach), - DEVMETHOD(device_detach, ixv_detach), - DEVMETHOD(device_shutdown, ixv_shutdown), + DEVMETHOD(device_register, ixv_register), + DEVMETHOD(device_probe, iflib_device_probe), + DEVMETHOD(device_attach, iflib_device_attach), + DEVMETHOD(device_detach, iflib_device_detach), + DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD_END }; @@ -148,33 +145,44 @@ devclass_t ixv_devclass; DRIVER_MODULE(ixv, pci, ixv_driver, ixv_devclass, 0, 0); MODULE_DEPEND(ixv, pci, 1, 1, 1); MODULE_DEPEND(ixv, ether, 1, 1, 1); +#ifdef DEV_NETMAP MODULE_DEPEND(ixv, netmap, 1, 1, 1); +#endif /* DEV_NETMAP */ + +static device_method_t ixv_if_methods[] = { + DEVMETHOD(ifdi_attach_pre, ixv_if_attach_pre), + DEVMETHOD(ifdi_attach_post, ixv_if_attach_post), + DEVMETHOD(ifdi_detach, ixv_if_detach), + DEVMETHOD(ifdi_init, ixv_if_init), + DEVMETHOD(ifdi_stop, ixv_if_stop), + DEVMETHOD(ifdi_msix_intr_assign, ixv_if_msix_intr_assign), + DEVMETHOD(ifdi_intr_enable, ixv_if_enable_intr), + DEVMETHOD(ifdi_intr_disable, ixv_if_disable_intr), + DEVMETHOD(ifdi_tx_queue_intr_enable, ixv_if_rx_queue_intr_enable), + DEVMETHOD(ifdi_rx_queue_intr_enable, ixv_if_rx_queue_intr_enable), + DEVMETHOD(ifdi_tx_queues_alloc, ixv_if_tx_queues_alloc), + DEVMETHOD(ifdi_rx_queues_alloc, ixv_if_rx_queues_alloc), + DEVMETHOD(ifdi_queues_free, ixv_if_queues_free), + DEVMETHOD(ifdi_update_admin_status, ixv_if_update_admin_status), + DEVMETHOD(ifdi_multi_set, ixv_if_multi_set), + DEVMETHOD(ifdi_mtu_set, ixv_if_mtu_set), + DEVMETHOD(ifdi_media_status, ixv_if_media_status), + DEVMETHOD(ifdi_media_change, ixv_if_media_change), + DEVMETHOD(ifdi_timer, ixv_if_local_timer), + DEVMETHOD(ifdi_vlan_register, ixv_if_register_vlan), + DEVMETHOD(ifdi_vlan_unregister, ixv_if_unregister_vlan), + DEVMETHOD(ifdi_get_counter, ixv_if_get_counter), + DEVMETHOD_END +}; + +static driver_t ixv_if_driver = { + "ixv_if", ixv_if_methods, sizeof(struct adapter) +}; /* * TUNEABLE PARAMETERS: */ -/* Number of Queues - do not exceed MSI-X vectors - 1 */ -static int ixv_num_queues = 1; -TUNABLE_INT("hw.ixv.num_queues", &ixv_num_queues); - -/* - * AIM: Adaptive Interrupt Moderation - * which means that the interrupt rate - * is varied over time based on the - * traffic for that interrupt vector - */ -static int ixv_enable_aim = FALSE; -TUNABLE_INT("hw.ixv.enable_aim", &ixv_enable_aim); - -/* How many packets rxeof tries to clean at a time */ -static int ixv_rx_process_limit = 256; -TUNABLE_INT("hw.ixv.rx_process_limit", &ixv_rx_process_limit); - -/* How many packets txeof tries to clean at a time */ -static int ixv_tx_process_limit = 256; -TUNABLE_INT("hw.ixv.tx_process_limit", &ixv_tx_process_limit); - /* Flow control setting, default to full */ static int ixv_flow_control = ixgbe_fc_full; TUNABLE_INT("hw.ixv.flow_control", &ixv_flow_control); @@ -188,80 +196,190 @@ TUNABLE_INT("hw.ixv.flow_control", &ixv_flow_control); static int ixv_header_split = FALSE; TUNABLE_INT("hw.ixv.hdr_split", &ixv_header_split); -/* - * Number of TX descriptors per ring, - * setting higher than RX as this seems - * the better performing choice. - */ -static int ixv_txd = DEFAULT_TXD; -TUNABLE_INT("hw.ixv.txd", &ixv_txd); - -/* Number of RX descriptors per ring */ -static int ixv_rxd = DEFAULT_RXD; -TUNABLE_INT("hw.ixv.rxd", &ixv_rxd); - -/* Legacy Transmit (single queue) */ -static int ixv_enable_legacy_tx = 0; -TUNABLE_INT("hw.ixv.enable_legacy_tx", &ixv_enable_legacy_tx); - /* * Shadow VFTA table, this is needed because * the real filter table gets cleared during * a soft reset and we need to repopulate it. */ static u32 ixv_shadow_vfta[IXGBE_VFTA_SIZE]; +extern struct if_txrx ixgbe_txrx; -static int (*ixv_start_locked)(struct ifnet *, struct tx_ring *); -static int (*ixv_ring_empty)(struct ifnet *, struct buf_ring *); +static struct if_shared_ctx ixv_sctx_init = { + .isc_magic = IFLIB_MAGIC, + .isc_q_align = PAGE_SIZE,/* max(DBA_ALIGN, PAGE_SIZE) */ + .isc_tx_maxsize = IXGBE_TSO_SIZE, + + .isc_tx_maxsegsize = PAGE_SIZE, + + .isc_rx_maxsize = MJUM16BYTES, + .isc_rx_nsegments = 1, + .isc_rx_maxsegsize = MJUM16BYTES, + .isc_nfl = 1, + .isc_ntxqs = 1, + .isc_nrxqs = 1, + .isc_admin_intrcnt = 1, + .isc_vendor_info = ixv_vendor_info_array, + .isc_driver_version = ixv_driver_version, + .isc_driver = &ixv_if_driver, + + .isc_nrxd_min = {MIN_RXD}, + .isc_ntxd_min = {MIN_TXD}, + .isc_nrxd_max = {MAX_RXD}, + .isc_ntxd_max = {MAX_TXD}, + .isc_nrxd_default = {DEFAULT_RXD}, + .isc_ntxd_default = {DEFAULT_TXD}, +}; + +if_shared_ctx_t ixv_sctx = &ixv_sctx_init; + +static void * +ixv_register(device_t dev) +{ + return (ixv_sctx); +} /************************************************************************ - * ixv_probe - Device identification routine - * - * Determines if the driver should be loaded on - * adapter based on its PCI vendor/device ID. - * - * return BUS_PROBE_DEFAULT on success, positive on failure + * ixv_if_tx_queues_alloc ************************************************************************/ static int -ixv_probe(device_t dev) +ixv_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, + int ntxqs, int ntxqsets) { - ixgbe_vendor_info_t *ent; - u16 pci_vendor_id = 0; - u16 pci_device_id = 0; - u16 pci_subvendor_id = 0; - u16 pci_subdevice_id = 0; - char adapter_name[256]; + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = adapter->shared; + struct ix_tx_queue *que; + int i, j, error; + MPASS(adapter->num_tx_queues == ntxqsets); + MPASS(ntxqs == 1); - pci_vendor_id = pci_get_vendor(dev); - if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID) - return (ENXIO); - - pci_device_id = pci_get_device(dev); - pci_subvendor_id = pci_get_subvendor(dev); - pci_subdevice_id = pci_get_subdevice(dev); - - ent = ixv_vendor_info_array; - while (ent->vendor_id != 0) { - if ((pci_vendor_id == ent->vendor_id) && - (pci_device_id == ent->device_id) && - ((pci_subvendor_id == ent->subvendor_id) || - (ent->subvendor_id == 0)) && - ((pci_subdevice_id == ent->subdevice_id) || - (ent->subdevice_id == 0))) { - sprintf(adapter_name, "%s, Version - %s", - ixv_strings[ent->index], ixv_driver_version); - device_set_desc_copy(dev, adapter_name); - return (BUS_PROBE_DEFAULT); - } - ent++; + /* Allocate queue structure memory */ + adapter->tx_queues = + (struct ix_tx_queue *)malloc(sizeof(struct ix_tx_queue) * ntxqsets, + M_DEVBUF, M_NOWAIT | M_ZERO); + if (!adapter->tx_queues) { + device_printf(iflib_get_dev(ctx), + "Unable to allocate TX ring memory\n"); + return (ENOMEM); } - return (ENXIO); -} /* ixv_probe */ + for (i = 0, que = adapter->tx_queues; i < ntxqsets; i++, que++) { + struct tx_ring *txr = &que->txr; + + txr->me = i; + txr->adapter = que->adapter = adapter; + adapter->active_queues |= (u64)1 << txr->me; + + /* Allocate report status array */ + if (!(txr->tx_rsq = (qidx_t *)malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_DEVBUF, M_NOWAIT | M_ZERO))) { + error = ENOMEM; + goto fail; + } + for (j = 0; j < scctx->isc_ntxd[0]; j++) + txr->tx_rsq[j] = QIDX_INVALID; + /* get the virtual and physical address of the hardware queues */ + txr->tail = IXGBE_VFTDT(txr->me); + txr->tx_base = (union ixgbe_adv_tx_desc *)vaddrs[i*ntxqs]; + txr->tx_paddr = paddrs[i*ntxqs]; + + txr->bytes = 0; + txr->total_packets = 0; + + } + + device_printf(iflib_get_dev(ctx), "allocated for %d queues\n", + adapter->num_tx_queues); + + return (0); + + fail: + ixv_if_queues_free(ctx); + + return (error); +} /* ixv_if_tx_queues_alloc */ /************************************************************************ - * ixv_attach - Device initialization routine + * ixv_if_rx_queues_alloc + ************************************************************************/ +static int +ixv_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, + int nrxqs, int nrxqsets) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_rx_queue *que; + int i, error; + + MPASS(adapter->num_rx_queues == nrxqsets); + MPASS(nrxqs == 1); + + /* Allocate queue structure memory */ + adapter->rx_queues = + (struct ix_rx_queue *)malloc(sizeof(struct ix_rx_queue) * nrxqsets, + M_DEVBUF, M_NOWAIT | M_ZERO); + if (!adapter->rx_queues) { + device_printf(iflib_get_dev(ctx), + "Unable to allocate TX ring memory\n"); + error = ENOMEM; + goto fail; + } + + for (i = 0, que = adapter->rx_queues; i < nrxqsets; i++, que++) { + struct rx_ring *rxr = &que->rxr; + rxr->me = i; + rxr->adapter = que->adapter = adapter; + + + /* get the virtual and physical address of the hw queues */ + rxr->tail = IXGBE_VFRDT(rxr->me); + rxr->rx_base = (union ixgbe_adv_rx_desc *)vaddrs[i]; + rxr->rx_paddr = paddrs[i*nrxqs]; + rxr->bytes = 0; + rxr->que = que; + } + + device_printf(iflib_get_dev(ctx), "allocated for %d rx queues\n", + adapter->num_rx_queues); + + return (0); + +fail: + ixv_if_queues_free(ctx); + + return (error); +} /* ixv_if_rx_queues_alloc */ + +/************************************************************************ + * ixv_if_queues_free + ************************************************************************/ +static void +ixv_if_queues_free(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_tx_queue *que = adapter->tx_queues; + int i; + + if (que == NULL) + goto free; + + for (i = 0; i < adapter->num_tx_queues; i++, que++) { + struct tx_ring *txr = &que->txr; + if (txr->tx_rsq == NULL) + break; + + free(txr->tx_rsq, M_DEVBUF); + txr->tx_rsq = NULL; + } + if (adapter->tx_queues != NULL) + free(adapter->tx_queues, M_DEVBUF); +free: + if (adapter->rx_queues != NULL) + free(adapter->rx_queues, M_DEVBUF); + adapter->tx_queues = NULL; + adapter->rx_queues = NULL; +} /* ixv_if_queues_free */ + +/************************************************************************ + * ixv_if_attach_pre - Device initialization routine * * Called when the driver is being loaded. * Identifies the type of hardware, allocates all resources @@ -270,34 +388,28 @@ ixv_probe(device_t dev) * return 0 on success, positive on failure ************************************************************************/ static int -ixv_attach(device_t dev) +ixv_if_attach_pre(if_ctx_t ctx) { struct adapter *adapter; + device_t dev; + if_softc_ctx_t scctx; struct ixgbe_hw *hw; int error = 0; INIT_DEBUGOUT("ixv_attach: begin"); - /* - * Make sure BUSMASTER is set, on a VM under - * KVM it may not be and will break things. - */ - pci_enable_busmaster(dev); - /* Allocate, clear, and link in our adapter structure */ - adapter = device_get_softc(dev); + dev = iflib_get_dev(ctx); + adapter = iflib_get_softc(ctx); adapter->dev = dev; + adapter->ctx = ctx; adapter->hw.back = adapter; + scctx = adapter->shared = iflib_get_softc_ctx(ctx); + adapter->media = iflib_get_media(ctx); hw = &adapter->hw; - adapter->init_locked = ixv_init_locked; - adapter->stop_locked = ixv_stop; - - /* Core Lock Init*/ - IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); - /* Do base PCI setup - map BAR0 */ - if (ixv_allocate_pci_resources(adapter)) { + if (ixv_allocate_pci_resources(ctx)) { device_printf(dev, "ixv_allocate_pci_resources() failed!\n"); error = ENXIO; goto err_out; @@ -309,46 +421,8 @@ ixv_attach(device_t dev) CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixv_sysctl_debug, "I", "Debug Info"); - SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, - "enable_aim", CTLFLAG_RW, &ixv_enable_aim, 1, - "Interrupt Moderation"); - - /* Set up the timer callout */ - callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); - - /* Save off the information about this board */ - hw->vendor_id = pci_get_vendor(dev); - hw->device_id = pci_get_device(dev); - hw->revision_id = pci_get_revid(dev); - hw->subsystem_vendor_id = pci_get_subvendor(dev); - hw->subsystem_device_id = pci_get_subdevice(dev); - - /* A subset of set_mac_type */ - switch (hw->device_id) { - case IXGBE_DEV_ID_82599_VF: - hw->mac.type = ixgbe_mac_82599_vf; - break; - case IXGBE_DEV_ID_X540_VF: - hw->mac.type = ixgbe_mac_X540_vf; - break; - case IXGBE_DEV_ID_X550_VF: - hw->mac.type = ixgbe_mac_X550_vf; - break; - case IXGBE_DEV_ID_X550EM_X_VF: - hw->mac.type = ixgbe_mac_X550EM_x_vf; - break; - case IXGBE_DEV_ID_X550EM_A_VF: - hw->mac.type = ixgbe_mac_X550EM_a_vf; - break; - default: - /* Shouldn't get here since probe succeeded */ - device_printf(dev, "Unknown device ID!\n"); - error = ENXIO; - goto err_out; - break; - } - + /* Determine hardware revision */ + ixv_identify_hardware(ctx); ixv_init_device_features(adapter); /* Initialize the shared code */ @@ -362,9 +436,6 @@ ixv_attach(device_t dev) /* Setup the mailbox */ ixgbe_init_mbx_params_vf(hw); - /* Set the right number of segments */ - adapter->num_segs = IXGBE_82599_SCATTER; - error = hw->mac.ops.reset_hw(hw); if (error == IXGBE_ERR_RESET_FAILED) device_printf(dev, "...reset_hw() failure: Reset Failed!\n"); @@ -402,55 +473,66 @@ ixv_attach(device_t dev) bcopy(addr, hw->mac.perm_addr, sizeof(addr)); } - /* Register for VLAN events */ - adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, - ixv_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); - adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, - ixv_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); + /* Most of the iflib initialization... */ - /* Sysctls for limiting the amount of work done in the taskqueues */ - ixv_set_sysctl_value(adapter, "rx_processing_limit", - "max number of rx packets to process", - &adapter->rx_process_limit, ixv_rx_process_limit); - - ixv_set_sysctl_value(adapter, "tx_processing_limit", - "max number of tx packets to process", - &adapter->tx_process_limit, ixv_tx_process_limit); - - /* Do descriptor calc and sanity checks */ - if (((ixv_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 || - ixv_txd < MIN_TXD || ixv_txd > MAX_TXD) { - device_printf(dev, "TXD config issue, using default!\n"); - adapter->num_tx_desc = DEFAULT_TXD; - } else - adapter->num_tx_desc = ixv_txd; - - if (((ixv_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 || - ixv_rxd < MIN_RXD || ixv_rxd > MAX_RXD) { - device_printf(dev, "RXD config issue, using default!\n"); - adapter->num_rx_desc = DEFAULT_RXD; - } else - adapter->num_rx_desc = ixv_rxd; - - /* Setup MSI-X */ - error = ixv_configure_interrupts(adapter); - if (error) - goto err_out; - - /* Allocate our TX/RX Queues */ - if (ixgbe_allocate_queues(adapter)) { - device_printf(dev, "ixgbe_allocate_queues() failed!\n"); - error = ENOMEM; - goto err_out; + iflib_set_mac(ctx, hw->mac.addr); + switch (adapter->hw.mac.type) { + case ixgbe_mac_X550_vf: + case ixgbe_mac_X550EM_x_vf: + case ixgbe_mac_X550EM_a_vf: + scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 2; + break; + default: + scctx->isc_ntxqsets_max = scctx->isc_nrxqsets_max = 1; } + scctx->isc_txqsizes[0] = + roundup2(scctx->isc_ntxd[0] * sizeof(union ixgbe_adv_tx_desc) + + sizeof(u32), DBA_ALIGN); + scctx->isc_rxqsizes[0] = + roundup2(scctx->isc_nrxd[0] * sizeof(union ixgbe_adv_rx_desc), + DBA_ALIGN); + /* XXX */ + scctx->isc_tx_csum_flags = CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_TSO | + CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_TSO; + scctx->isc_tx_nsegments = IXGBE_82599_SCATTER; + scctx->isc_msix_bar = PCIR_BAR(MSIX_82598_BAR); + scctx->isc_tx_tso_segments_max = scctx->isc_tx_nsegments; + scctx->isc_tx_tso_size_max = IXGBE_TSO_SIZE; + scctx->isc_tx_tso_segsize_max = PAGE_SIZE; + + scctx->isc_txrx = &ixgbe_txrx; + + /* + * Tell the upper layer(s) we support everything the PF + * driver does except... + * hardware stats + * Wake-on-LAN + */ + scctx->isc_capenable = IXGBE_CAPS; + scctx->isc_capenable ^= IFCAP_HWSTATS | IFCAP_WOL; + + INIT_DEBUGOUT("ixv_if_attach_pre: end"); + + return (0); + +err_out: + ixv_free_pci_resources(ctx); + + return (error); +} /* ixv_if_attach_pre */ + +static int +ixv_if_attach_post(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + int error = 0; /* Setup OS specific network interface */ - ixv_setup_interface(dev, adapter); - - error = ixv_allocate_msix(adapter); + error = ixv_setup_interface(ctx); if (error) { - device_printf(dev, "ixv_allocate_msix() failed!\n"); - goto err_late; + device_printf(dev, "Interface setup failed: %d\n", error); + goto end; } /* Do the stats setup */ @@ -458,23 +540,9 @@ ixv_attach(device_t dev) ixv_init_stats(adapter); ixv_add_stats_sysctls(adapter); - if (adapter->feat_en & IXGBE_FEATURE_NETMAP) - ixgbe_netmap_attach(adapter); - - INIT_DEBUGOUT("ixv_attach: end"); - - return (0); - -err_late: - ixgbe_free_transmit_structures(adapter); - ixgbe_free_receive_structures(adapter); - free(adapter->queues, M_DEVBUF); -err_out: - ixv_free_pci_resources(adapter); - IXGBE_CORE_LOCK_DESTROY(adapter); - - return (error); -} /* ixv_attach */ +end: + return error; +} /* ixv_if_attach_post */ /************************************************************************ * ixv_detach - Device removal routine @@ -486,65 +554,38 @@ err_out: * return 0 on success, positive on failure ************************************************************************/ static int -ixv_detach(device_t dev) +ixv_if_detach(if_ctx_t ctx) { - struct adapter *adapter = device_get_softc(dev); - struct ix_queue *que = adapter->queues; - INIT_DEBUGOUT("ixv_detach: begin"); - /* Make sure VLANS are not using driver */ - if (adapter->ifp->if_vlantrunk != NULL) { - device_printf(dev, "Vlan in use, detach first\n"); - return (EBUSY); - } - - ether_ifdetach(adapter->ifp); - IXGBE_CORE_LOCK(adapter); - ixv_stop(adapter); - IXGBE_CORE_UNLOCK(adapter); - - for (int i = 0; i < adapter->num_queues; i++, que++) { - if (que->tq) { - struct tx_ring *txr = que->txr; - taskqueue_drain(que->tq, &txr->txq_task); - taskqueue_drain(que->tq, &que->que_task); - taskqueue_free(que->tq); - } - } - - /* Drain the Mailbox(link) queue */ - if (adapter->tq) { - taskqueue_drain(adapter->tq, &adapter->link_task); - taskqueue_free(adapter->tq); - } - - /* Unregister VLAN events */ - if (adapter->vlan_attach != NULL) - EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); - if (adapter->vlan_detach != NULL) - EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); - - callout_drain(&adapter->timer); - - if (adapter->feat_en & IXGBE_FEATURE_NETMAP) - netmap_detach(adapter->ifp); - - ixv_free_pci_resources(adapter); - bus_generic_detach(dev); - if_free(adapter->ifp); - - ixgbe_free_transmit_structures(adapter); - ixgbe_free_receive_structures(adapter); - free(adapter->queues, M_DEVBUF); - - IXGBE_CORE_LOCK_DESTROY(adapter); + ixv_free_pci_resources(ctx); return (0); -} /* ixv_detach */ +} /* ixv_if_detach */ /************************************************************************ - * ixv_init_locked - Init entry point + * ixv_if_mtu_set + ************************************************************************/ +static int +ixv_if_mtu_set(if_ctx_t ctx, uint32_t mtu) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + int error = 0; + + IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); + if (mtu > IXGBE_MAX_FRAME_SIZE - IXGBE_MTU_HDR) { + error = EINVAL; + } else { + ifp->if_mtu = mtu; + adapter->max_frame_size = ifp->if_mtu + IXGBE_MTU_HDR; + } + + return error; +} /* ixv_if_mtu_set */ + +/************************************************************************ + * ixv_if_init - Init entry point * * Used in two ways: It is used by the stack as an init entry * point in network interface structure. It is also used @@ -553,48 +594,39 @@ ixv_detach(device_t dev) * * return 0 on success, positive on failure ************************************************************************/ -void -ixv_init_locked(struct adapter *adapter) +static void +ixv_if_init(if_ctx_t ctx) { - struct ifnet *ifp = adapter->ifp; - device_t dev = adapter->dev; + struct adapter *adapter = iflib_get_softc(ctx); + struct ifnet *ifp = iflib_get_ifp(ctx); + device_t dev = iflib_get_dev(ctx); struct ixgbe_hw *hw = &adapter->hw; int error = 0; - INIT_DEBUGOUT("ixv_init_locked: begin"); - mtx_assert(&adapter->core_mtx, MA_OWNED); + INIT_DEBUGOUT("ixv_if_init: begin"); hw->adapter_stopped = FALSE; hw->mac.ops.stop_adapter(hw); - callout_stop(&adapter->timer); /* reprogram the RAR[0] in case user changed it. */ hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); /* Get the latest mac address, User can use a LAA */ - bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr, - IXGBE_ETH_LENGTH_OF_ADDRESS); + bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS); hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, 1); - /* Prepare transmit descriptors and buffers */ - if (ixgbe_setup_transmit_structures(adapter)) { - device_printf(dev, "Could not setup transmit structures\n"); - ixv_stop(adapter); - return; - } - /* Reset VF and renegotiate mailbox API version */ hw->mac.ops.reset_hw(hw); error = ixv_negotiate_api(adapter); if (error) { device_printf(dev, - "Mailbox API negotiation failed in init_locked!\n"); + "Mailbox API negotiation failed in if_init!\n"); return; } - ixv_initialize_transmit_units(adapter); + ixv_initialize_transmit_units(ctx); /* Setup Multicast table */ - ixv_set_multi(adapter); + ixv_if_multi_set(ctx); /* * Determine the correct mbuf pool @@ -605,29 +637,11 @@ ixv_init_locked(struct adapter *adapter) else adapter->rx_mbuf_sz = MCLBYTES; - /* Prepare receive descriptors and buffers */ - if (ixgbe_setup_receive_structures(adapter)) { - device_printf(dev, "Could not setup receive structures\n"); - ixv_stop(adapter); - return; - } - /* Configure RX settings */ - ixv_initialize_receive_units(adapter); - - /* Set the various hardware offload abilities */ - ifp->if_hwassist = 0; - if (ifp->if_capenable & IFCAP_TSO4) - ifp->if_hwassist |= CSUM_TSO; - if (ifp->if_capenable & IFCAP_TXCSUM) { - ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); -#if __FreeBSD_version >= 800000 - ifp->if_hwassist |= CSUM_SCTP; -#endif - } + ixv_initialize_receive_units(ctx); /* Set up VLAN offload and filter */ - ixv_setup_vlan_support(adapter); + ixv_setup_vlan_support(ctx); /* Set up MSI-X routing */ ixv_configure_ivars(adapter); @@ -645,23 +659,19 @@ ixv_init_locked(struct adapter *adapter) hw->mac.ops.check_link(hw, &adapter->link_speed, &adapter->link_up, FALSE); - /* Start watchdog */ - callout_reset(&adapter->timer, hz, ixv_local_timer, adapter); - /* And now turn on interrupts */ - ixv_enable_intr(adapter); + ixv_if_enable_intr(ctx); /* Now inform the stack we're ready */ ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; return; -} /* ixv_init_locked */ - -/* - * MSI-X Interrupt Handlers and Tasklets - */ +} /* ixv_if_init */ +/************************************************************************ + * ixv_enable_queue + ************************************************************************/ static inline void ixv_enable_queue(struct adapter *adapter, u32 vector) { @@ -673,6 +683,9 @@ ixv_enable_queue(struct adapter *adapter, u32 vector) IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); } /* ixv_enable_queue */ +/************************************************************************ + * ixv_disable_queue + ************************************************************************/ static inline void ixv_disable_queue(struct adapter *adapter, u32 vector) { @@ -684,103 +697,26 @@ ixv_disable_queue(struct adapter *adapter, u32 vector) IXGBE_WRITE_REG(hw, IXGBE_VTEIMC, mask); } /* ixv_disable_queue */ -static inline void -ixv_rearm_queues(struct adapter *adapter, u64 queues) -{ - u32 mask = (IXGBE_EIMS_RTX_QUEUE & queues); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEICS, mask); -} /* ixv_rearm_queues */ - /************************************************************************ - * ixv_msix_que - MSI Queue Interrupt Service routine + * ixv_msix_que - MSI-X Queue Interrupt Service routine ************************************************************************/ -void +static int ixv_msix_que(void *arg) { - struct ix_queue *que = arg; - struct adapter *adapter = que->adapter; - struct ifnet *ifp = adapter->ifp; - struct tx_ring *txr = que->txr; - struct rx_ring *rxr = que->rxr; - bool more; - u32 newitr = 0; + struct ix_rx_queue *que = arg; + struct adapter *adapter = que->adapter; ixv_disable_queue(adapter, que->msix); ++que->irqs; - more = ixgbe_rxeof(que); - - IXGBE_TX_LOCK(txr); - ixgbe_txeof(txr); - /* - * Make certain that if the stack - * has anything queued the task gets - * scheduled to handle it. - */ - if (!ixv_ring_empty(adapter->ifp, txr->br)) - ixv_start_locked(ifp, txr); - IXGBE_TX_UNLOCK(txr); - - /* Do AIM now? */ - - if (ixv_enable_aim == FALSE) - goto no_calc; - /* - * Do Adaptive Interrupt Moderation: - * - Write out last calculated setting - * - Calculate based on average size over - * the last interval. - */ - if (que->eitr_setting) - IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEITR(que->msix), - que->eitr_setting); - - que->eitr_setting = 0; - - /* Idle, do nothing */ - if ((txr->bytes == 0) && (rxr->bytes == 0)) - goto no_calc; - - if ((txr->bytes) && (txr->packets)) - newitr = txr->bytes/txr->packets; - if ((rxr->bytes) && (rxr->packets)) - newitr = max(newitr, (rxr->bytes / rxr->packets)); - newitr += 24; /* account for hardware frame, crc */ - - /* set an upper boundary */ - newitr = min(newitr, 3000); - - /* Be nice to the mid range */ - if ((newitr > 300) && (newitr < 1200)) - newitr = (newitr / 3); - else - newitr = (newitr / 2); - - newitr |= newitr << 16; - - /* save for next interrupt */ - que->eitr_setting = newitr; - - /* Reset state */ - txr->bytes = 0; - txr->packets = 0; - rxr->bytes = 0; - rxr->packets = 0; - -no_calc: - if (more) - taskqueue_enqueue(que->tq, &que->que_task); - else /* Re-enable this interrupt */ - ixv_enable_queue(adapter, que->msix); - - return; + return (FILTER_SCHEDULE_THREAD); } /* ixv_msix_que */ /************************************************************************ * ixv_msix_mbx ************************************************************************/ -static void +static int ixv_msix_mbx(void *arg) { struct adapter *adapter = arg; @@ -796,11 +732,11 @@ ixv_msix_mbx(void *arg) /* Link status change */ if (reg & IXGBE_EICR_LSC) - taskqueue_enqueue(adapter->tq, &adapter->link_task); + iflib_admin_intr_deferred(adapter->ctx); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, IXGBE_EIMS_OTHER); - return; + return (FILTER_HANDLED); } /* ixv_msix_mbx */ /************************************************************************ @@ -810,21 +746,19 @@ ixv_msix_mbx(void *arg) * the interface using ifconfig. ************************************************************************/ static void -ixv_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) +ixv_if_media_status(if_ctx_t ctx, struct ifmediareq * ifmr) { - struct adapter *adapter = ifp->if_softc; + struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("ixv_media_status: begin"); - IXGBE_CORE_LOCK(adapter); - ixv_update_link_status(adapter); + + iflib_admin_intr_deferred(ctx); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; - if (!adapter->link_active) { - IXGBE_CORE_UNLOCK(adapter); + if (!adapter->link_active) return; - } ifmr->ifm_status |= IFM_ACTIVE; @@ -842,23 +776,19 @@ ixv_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) ifmr->ifm_active |= IFM_10_T | IFM_FDX; break; } - - IXGBE_CORE_UNLOCK(adapter); - - return; -} /* ixv_media_status */ +} /* ixv_if_media_status */ /************************************************************************ - * ixv_media_change - Media Ioctl callback + * ixv_if_media_change - Media Ioctl callback * * Called when the user changes speed/duplex using * media/mediopt option with ifconfig. ************************************************************************/ static int -ixv_media_change(struct ifnet *ifp) +ixv_if_media_change(if_ctx_t ctx) { - struct adapter *adapter = ifp->if_softc; - struct ifmedia *ifm = &adapter->media; + struct adapter *adapter = iflib_get_softc(ctx); + struct ifmedia *ifm = iflib_get_media(ctx); INIT_DEBUGOUT("ixv_media_change: begin"); @@ -874,7 +804,7 @@ ixv_media_change(struct ifnet *ifp) } return (0); -} /* ixv_media_change */ +} /* ixv_if_media_change */ /************************************************************************ @@ -903,26 +833,22 @@ ixv_negotiate_api(struct adapter *adapter) /************************************************************************ - * ixv_set_multi - Multicast Update + * ixv_if_multi_set - Multicast Update * * Called whenever multicast address list is updated. ************************************************************************/ static void -ixv_set_multi(struct adapter *adapter) +ixv_if_multi_set(if_ctx_t ctx) { u8 mta[MAX_NUM_MULTICAST_ADDRESSES * IXGBE_ETH_LENGTH_OF_ADDRESS]; + struct adapter *adapter = iflib_get_softc(ctx); u8 *update_ptr; struct ifmultiaddr *ifma; - struct ifnet *ifp = adapter->ifp; + if_t ifp = iflib_get_ifp(ctx); int mcnt = 0; - IOCTL_DEBUGOUT("ixv_set_multi: begin"); + IOCTL_DEBUGOUT("ixv_if_multi_set: begin"); -#if __FreeBSD_version < 800000 - IF_ADDR_LOCK(ifp); -#else - if_maddr_rlock(ifp); -#endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; @@ -931,19 +857,12 @@ ixv_set_multi(struct adapter *adapter) IXGBE_ETH_LENGTH_OF_ADDRESS); mcnt++; } -#if __FreeBSD_version < 800000 - IF_ADDR_UNLOCK(ifp); -#else - if_maddr_runlock(ifp); -#endif update_ptr = mta; adapter->hw.mac.ops.update_mc_addr_list(&adapter->hw, update_ptr, mcnt, ixv_mc_array_itr, TRUE); - - return; -} /* ixv_set_multi */ +} /* ixv_if_multi_set */ /************************************************************************ * ixv_mc_array_itr @@ -957,6 +876,7 @@ ixv_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) { u8 *addr = *update_ptr; u8 *newptr; + *vmdq = 0; newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS; @@ -966,157 +886,218 @@ ixv_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) } /* ixv_mc_array_itr */ /************************************************************************ - * ixv_local_timer - Timer routine + * ixv_if_local_timer - Timer routine * * Checks for link status, updates statistics, * and runs the watchdog check. ************************************************************************/ static void -ixv_local_timer(void *arg) +ixv_if_local_timer(if_ctx_t ctx, uint16_t qid) { - struct adapter *adapter = arg; - device_t dev = adapter->dev; - struct ix_queue *que = adapter->queues; - u64 queues = 0; - int hung = 0; + if (qid != 0) + return; - mtx_assert(&adapter->core_mtx, MA_OWNED); - - ixv_check_link(adapter); - - /* Stats Update */ - ixv_update_stats(adapter); - - /* - * Check the TX queues status - * - mark hung queues so we don't schedule on them - * - watchdog only if all queues show hung - */ - for (int i = 0; i < adapter->num_queues; i++, que++) { - /* Keep track of queues with work for soft irq */ - if (que->txr->busy) - queues |= ((u64)1 << que->me); - /* - * Each time txeof runs without cleaning, but there - * are uncleaned descriptors it increments busy. If - * we get to the MAX we declare it hung. - */ - if (que->busy == IXGBE_QUEUE_HUNG) { - ++hung; - /* Mark the queue as inactive */ - adapter->active_queues &= ~((u64)1 << que->me); - continue; - } else { - /* Check if we've come back from hung */ - if ((adapter->active_queues & ((u64)1 << que->me)) == 0) - adapter->active_queues |= ((u64)1 << que->me); - } - if (que->busy >= IXGBE_MAX_TX_BUSY) { - device_printf(dev, - "Warning queue %d appears to be hung!\n", i); - que->txr->busy = IXGBE_QUEUE_HUNG; - ++hung; - } - - } - - /* Only truly watchdog if all queues show hung */ - if (hung == adapter->num_queues) - goto watchdog; - else if (queues != 0) { /* Force an IRQ on queues with work */ - ixv_rearm_queues(adapter, queues); - } - - callout_reset(&adapter->timer, hz, ixv_local_timer, adapter); - - return; - -watchdog: - - device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); - adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - adapter->watchdog_events++; - ixv_init_locked(adapter); -} /* ixv_local_timer */ + /* Fire off the adminq task */ + iflib_admin_intr_deferred(ctx); +} /* ixv_if_local_timer */ /************************************************************************ - * ixv_update_link_status - Update OS on link state + * ixv_if_update_admin_status - Update OS on link state * * Note: Only updates the OS on the cached link state. * The real check of the hardware only happens with * a link interrupt. ************************************************************************/ static void -ixv_update_link_status(struct adapter *adapter) +ixv_if_update_admin_status(if_ctx_t ctx) { - struct ifnet *ifp = adapter->ifp; - device_t dev = adapter->dev; + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + + adapter->hw.mac.get_link_status = TRUE; + ixgbe_check_link(&adapter->hw, &adapter->link_speed, &adapter->link_up, + FALSE); if (adapter->link_up) { if (adapter->link_active == FALSE) { if (bootverbose) - device_printf(dev,"Link is up %d Gbps %s \n", + device_printf(dev, "Link is up %d Gbps %s \n", ((adapter->link_speed == 128) ? 10 : 1), "Full Duplex"); adapter->link_active = TRUE; - if_link_state_change(ifp, LINK_STATE_UP); + iflib_link_state_change(ctx, LINK_STATE_UP, + IF_Gbps(10)); } } else { /* Link down */ if (adapter->link_active == TRUE) { if (bootverbose) - device_printf(dev,"Link is Down\n"); - if_link_state_change(ifp, LINK_STATE_DOWN); + device_printf(dev, "Link is Down\n"); + iflib_link_state_change(ctx, LINK_STATE_DOWN, 0); adapter->link_active = FALSE; } } - return; -} /* ixv_update_link_status */ + /* Stats Update */ + ixv_update_stats(adapter); +} /* ixv_if_update_admin_status */ /************************************************************************ - * ixv_stop - Stop the hardware + * ixv_if_stop - Stop the hardware * * Disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. ************************************************************************/ static void -ixv_stop(void *arg) +ixv_if_stop(if_ctx_t ctx) { - struct ifnet *ifp; - struct adapter *adapter = arg; + struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; - ifp = adapter->ifp; - - mtx_assert(&adapter->core_mtx, MA_OWNED); - INIT_DEBUGOUT("ixv_stop: begin\n"); - ixv_disable_intr(adapter); - /* Tell the stack that the interface is no longer active */ - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); + ixv_if_disable_intr(ctx); hw->mac.ops.reset_hw(hw); adapter->hw.adapter_stopped = FALSE; hw->mac.ops.stop_adapter(hw); - callout_stop(&adapter->timer); + + /* Update the stack */ + adapter->link_up = FALSE; + ixv_if_update_admin_status(ctx); /* reprogram the RAR[0] in case user changed it. */ hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); +} /* ixv_if_stop */ - return; -} /* ixv_stop */ +/************************************************************************ + * ixv_identify_hardware - Determine hardware revision. + ************************************************************************/ +static void +ixv_identify_hardware(if_ctx_t ctx) +{ + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + struct ixgbe_hw *hw = &adapter->hw; + + /* Save off the information about this board */ + hw->vendor_id = pci_get_vendor(dev); + hw->device_id = pci_get_device(dev); + hw->revision_id = pci_get_revid(dev); + hw->subsystem_vendor_id = pci_get_subvendor(dev); + hw->subsystem_device_id = pci_get_subdevice(dev); + + /* A subset of set_mac_type */ + switch (hw->device_id) { + case IXGBE_DEV_ID_82599_VF: + hw->mac.type = ixgbe_mac_82599_vf; + break; + case IXGBE_DEV_ID_X540_VF: + hw->mac.type = ixgbe_mac_X540_vf; + break; + case IXGBE_DEV_ID_X550_VF: + hw->mac.type = ixgbe_mac_X550_vf; + break; + case IXGBE_DEV_ID_X550EM_X_VF: + hw->mac.type = ixgbe_mac_X550EM_x_vf; + break; + case IXGBE_DEV_ID_X550EM_A_VF: + hw->mac.type = ixgbe_mac_X550EM_a_vf; + break; + default: + device_printf(dev, "unknown mac type\n"); + hw->mac.type = ixgbe_mac_unknown; + break; + } +} /* ixv_identify_hardware */ + +/************************************************************************ + * ixv_if_msix_intr_assign - Setup MSI-X Interrupt resources and handlers + ************************************************************************/ +static int +ixv_if_msix_intr_assign(if_ctx_t ctx, int msix) +{ + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + struct ix_rx_queue *rx_que = adapter->rx_queues; + struct ix_tx_queue *tx_que; + int error, rid, vector = 0; + char buf[16]; + + for (int i = 0; i < adapter->num_rx_queues; i++, vector++, rx_que++) { + rid = vector + 1; + + snprintf(buf, sizeof(buf), "rxq%d", i); + error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, + IFLIB_INTR_RX, ixv_msix_que, rx_que, rx_que->rxr.me, buf); + + if (error) { + device_printf(iflib_get_dev(ctx), + "Failed to allocate que int %d err: %d", i, error); + adapter->num_rx_queues = i + 1; + goto fail; + } + + rx_que->msix = vector; + adapter->active_queues |= (u64)(1 << rx_que->msix); + + } + + for (int i = 0; i < adapter->num_tx_queues; i++) { + snprintf(buf, sizeof(buf), "txq%d", i); + tx_que = &adapter->tx_queues[i]; + tx_que->msix = i % adapter->num_rx_queues; + iflib_softirq_alloc_generic(ctx, + &adapter->rx_queues[tx_que->msix].que_irq, + IFLIB_INTR_TX, tx_que, tx_que->txr.me, buf); + } + rid = vector + 1; + error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, + IFLIB_INTR_ADMIN, ixv_msix_mbx, adapter, 0, "aq"); + if (error) { + device_printf(iflib_get_dev(ctx), + "Failed to register admin handler"); + return (error); + } + + adapter->vector = vector; + /* + * Due to a broken design QEMU will fail to properly + * enable the guest for MSIX unless the vectors in + * the table are all set up, so we must rewrite the + * ENABLE in the MSIX control register again at this + * point to cause it to successfully initialize us. + */ + if (adapter->hw.mac.type == ixgbe_mac_82599_vf) { + int msix_ctrl; + pci_find_cap(dev, PCIY_MSIX, &rid); + rid += PCIR_MSIX_CTRL; + msix_ctrl = pci_read_config(dev, rid, 2); + msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; + pci_write_config(dev, rid, msix_ctrl, 2); + } + + return (0); + +fail: + iflib_irq_free(ctx, &adapter->irq); + rx_que = adapter->rx_queues; + for (int i = 0; i < adapter->num_rx_queues; i++, rx_que++) + iflib_irq_free(ctx, &rx_que->que_irq); + + return (error); +} /* ixv_if_msix_intr_assign */ /************************************************************************ * ixv_allocate_pci_resources ************************************************************************/ static int -ixv_allocate_pci_resources(struct adapter *adapter) +ixv_allocate_pci_resources(if_ctx_t ctx) { - device_t dev = adapter->dev; - int rid; + struct adapter *adapter = iflib_get_softc(ctx); + device_t dev = iflib_get_dev(ctx); + int rid; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, @@ -1132,9 +1113,6 @@ ixv_allocate_pci_resources(struct adapter *adapter) rman_get_bushandle(adapter->pci_mem); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; - /* Pick up the tuneable queues */ - adapter->num_queues = ixv_num_queues; - return (0); } /* ixv_allocate_pci_resources */ @@ -1142,61 +1120,26 @@ ixv_allocate_pci_resources(struct adapter *adapter) * ixv_free_pci_resources ************************************************************************/ static void -ixv_free_pci_resources(struct adapter * adapter) +ixv_free_pci_resources(if_ctx_t ctx) { - struct ix_queue *que = adapter->queues; - device_t dev = adapter->dev; - int rid, memrid; + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_rx_queue *que = adapter->rx_queues; + device_t dev = iflib_get_dev(ctx); - memrid = PCIR_BAR(MSIX_82598_BAR); + /* Release all msix queue resources */ + if (adapter->intr_type == IFLIB_INTR_MSIX) + iflib_irq_free(ctx, &adapter->irq); - /* - * There is a slight possibility of a failure mode - * in attach that will result in entering this function - * before interrupt resources have been initialized, and - * in that case we do not want to execute the loops below - * We can detect this reliably by the state of the adapter - * res pointer. - */ - if (adapter->res == NULL) - goto mem; - - /* - * Release all msix queue resources: - */ - for (int i = 0; i < adapter->num_queues; i++, que++) { - rid = que->msix + 1; - if (que->tag != NULL) { - bus_teardown_intr(dev, que->res, que->tag); - que->tag = NULL; + if (que != NULL) { + for (int i = 0; i < adapter->num_rx_queues; i++, que++) { + iflib_irq_free(ctx, &que->que_irq); } - if (que->res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); } - - /* Clean the Mailbox interrupt last */ - rid = adapter->vector + 1; - - if (adapter->tag != NULL) { - bus_teardown_intr(dev, adapter->res, adapter->tag); - adapter->tag = NULL; - } - if (adapter->res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); - -mem: - pci_release_msi(dev); - - if (adapter->msix_mem != NULL) - bus_release_resource(dev, SYS_RES_MEMORY, memrid, - adapter->msix_mem); - + /* Clean the Legacy or Link interrupt last */ if (adapter->pci_mem != NULL) - bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), - adapter->pci_mem); - - return; + bus_release_resource(dev, SYS_RES_MEMORY, + PCIR_BAR(0), adapter->pci_mem); } /* ixv_free_pci_resources */ /************************************************************************ @@ -1204,121 +1147,105 @@ mem: * * Setup networking device structure and register an interface. ************************************************************************/ -static void -ixv_setup_interface(device_t dev, struct adapter *adapter) +static int +ixv_setup_interface(if_ctx_t ctx) { - struct ifnet *ifp; + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx = adapter->shared; + struct ifnet *ifp = iflib_get_ifp(ctx); INIT_DEBUGOUT("ixv_setup_interface: begin"); - ifp = adapter->ifp = if_alloc(IFT_ETHER); - if (ifp == NULL) - panic("%s: can not if_alloc()\n", device_get_nameunit(dev)); - if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - ifp->if_baudrate = 1000000000; - ifp->if_init = ixv_init; - ifp->if_softc = adapter; - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_ioctl = ixv_ioctl; - if_setgetcounterfn(ifp, ixv_get_counter); - /* TSO parameters */ - ifp->if_hw_tsomax = 65518; - ifp->if_hw_tsomaxsegcount = IXGBE_82599_SCATTER; - ifp->if_hw_tsomaxsegsize = 2048; - if (adapter->feat_en & IXGBE_FEATURE_LEGACY_TX) { - ifp->if_start = ixgbe_legacy_start; - ixv_start_locked = ixgbe_legacy_start_locked; - ixv_ring_empty = ixgbe_legacy_ring_empty; - } else { - ifp->if_transmit = ixgbe_mq_start; - ifp->if_qflush = ixgbe_qflush; - ixv_start_locked = ixgbe_mq_start_locked; - ixv_ring_empty = drbr_empty; - } - IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2); + if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); + if_setbaudrate(ifp, IF_Gbps(10)); + ifp->if_snd.ifq_maxlen = scctx->isc_ntxd[0] - 2; - ether_ifattach(ifp, adapter->hw.mac.addr); adapter->max_frame_size = ifp->if_mtu + IXGBE_MTU_HDR; + ifmedia_add(adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); - /* - * Tell the upper layer(s) we support long frames. - */ - ifp->if_hdrlen = sizeof(struct ether_vlan_header); - - /* Set capability flags */ - ifp->if_capabilities |= IFCAP_HWCSUM - | IFCAP_HWCSUM_IPV6 - | IFCAP_TSO - | IFCAP_LRO - | IFCAP_VLAN_HWTAGGING - | IFCAP_VLAN_HWTSO - | IFCAP_VLAN_HWCSUM - | IFCAP_JUMBO_MTU - | IFCAP_VLAN_MTU; - - /* Enable the above capabilities by default */ - ifp->if_capenable = ifp->if_capabilities; - - /* - * Specify the media types supported by this adapter and register - * callbacks to update media and link information - */ - ifmedia_init(&adapter->media, IFM_IMASK, ixv_media_change, - ixv_media_status); - ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); - ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); - - return; + return 0; } /* ixv_setup_interface */ +/************************************************************************ + * ixv_if_get_counter + ************************************************************************/ +static uint64_t +ixv_if_get_counter(if_ctx_t ctx, ift_counter cnt) +{ + struct adapter *adapter = iflib_get_softc(ctx); + if_t ifp = iflib_get_ifp(ctx); + + switch (cnt) { + case IFCOUNTER_IPACKETS: + return (adapter->ipackets); + case IFCOUNTER_OPACKETS: + return (adapter->opackets); + case IFCOUNTER_IBYTES: + return (adapter->ibytes); + case IFCOUNTER_OBYTES: + return (adapter->obytes); + case IFCOUNTER_IMCASTS: + return (adapter->imcasts); + default: + return (if_get_counter_default(ifp, cnt)); + } +} /* ixv_if_get_counter */ /************************************************************************ * ixv_initialize_transmit_units - Enable transmit unit. ************************************************************************/ static void -ixv_initialize_transmit_units(struct adapter *adapter) +ixv_initialize_transmit_units(if_ctx_t ctx) { - struct tx_ring *txr = adapter->tx_rings; - struct ixgbe_hw *hw = &adapter->hw; + struct adapter *adapter = iflib_get_softc(ctx); + struct ixgbe_hw *hw = &adapter->hw; + if_softc_ctx_t scctx = adapter->shared; + struct ix_tx_queue *que = adapter->tx_queues; + int i; - - for (int i = 0; i < adapter->num_queues; i++, txr++) { - u64 tdba = txr->txdma.dma_paddr; - u32 txctrl, txdctl; + for (i = 0; i < adapter->num_tx_queues; i++, que++) { + struct tx_ring *txr = &que->txr; + u64 tdba = txr->tx_paddr; + u32 txctrl, txdctl; + int j = txr->me; /* Set WTHRESH to 8, burst writeback */ - txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i)); + txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(j)); txdctl |= (8 << 16); - IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl); + IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(j), txdctl); /* Set the HW Tx Head and Tail indices */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDH(i), 0); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDT(i), 0); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDH(j), 0); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VFTDT(j), 0); /* Set Tx Tail register */ - txr->tail = IXGBE_VFTDT(i); + txr->tail = IXGBE_VFTDT(j); + + txr->tx_rs_cidx = txr->tx_rs_pidx = txr->tx_cidx_processed = 0; + for (int k = 0; k < scctx->isc_ntxd[0]; k++) + txr->tx_rsq[k] = QIDX_INVALID; /* Set Ring parameters */ - IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i), + IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(j), (tdba & 0x00000000ffffffffULL)); - IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i), (tdba >> 32)); - IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i), - adapter->num_tx_desc * sizeof(struct ixgbe_legacy_tx_desc)); - txctrl = IXGBE_READ_REG(hw, IXGBE_VFDCA_TXCTRL(i)); + IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(j), (tdba >> 32)); + IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(j), + scctx->isc_ntxd[0] * sizeof(struct ixgbe_legacy_tx_desc)); + txctrl = IXGBE_READ_REG(hw, IXGBE_VFDCA_TXCTRL(j)); txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; - IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i), txctrl); + IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(j), txctrl); /* Now enable */ - txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i)); + txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(j)); txdctl |= IXGBE_TXDCTL_ENABLE; - IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl); + IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(j), txdctl); } return; } /* ixv_initialize_transmit_units */ - /************************************************************************ * ixv_initialize_rss_mapping ************************************************************************/ @@ -1345,17 +1272,17 @@ ixv_initialize_rss_mapping(struct adapter *adapter) /* Set up the redirection table */ for (i = 0, j = 0; i < 64; i++, j++) { - if (j == adapter->num_queues) + if (j == adapter->num_rx_queues) j = 0; if (adapter->feat_en & IXGBE_FEATURE_RSS) { /* * Fetch the RSS bucket id for the given indirection * entry. Cap it at the number of configured buckets - * (which is num_queues.) + * (which is num_rx_queues.) */ queue_id = rss_get_indirection_to_bucket(i); - queue_id = queue_id % adapter->num_queues; + queue_id = queue_id % adapter->num_rx_queues; } else queue_id = j; @@ -1416,12 +1343,14 @@ ixv_initialize_rss_mapping(struct adapter *adapter) * ixv_initialize_receive_units - Setup receive registers and features. ************************************************************************/ static void -ixv_initialize_receive_units(struct adapter *adapter) +ixv_initialize_receive_units(if_ctx_t ctx) { - struct rx_ring *rxr = adapter->rx_rings; - struct ixgbe_hw *hw = &adapter->hw; - struct ifnet *ifp = adapter->ifp; - u32 bufsz, rxcsum, psrtype; + struct adapter *adapter = iflib_get_softc(ctx); + if_softc_ctx_t scctx; + struct ixgbe_hw *hw = &adapter->hw; + struct ifnet *ifp = iflib_get_ifp(ctx); + struct ix_rx_queue *que = adapter->rx_queues; + u32 bufsz, psrtype; if (ifp->if_mtu > ETHERMTU) bufsz = 4096 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; @@ -1434,7 +1363,7 @@ ixv_initialize_receive_units(struct adapter *adapter) | IXGBE_PSRTYPE_IPV6HDR | IXGBE_PSRTYPE_L2HDR; - if (adapter->num_queues > 1) + if (adapter->num_rx_queues > 1) psrtype |= 1 << 29; IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype); @@ -1443,17 +1372,20 @@ ixv_initialize_receive_units(struct adapter *adapter) if (ixgbevf_rlpml_set_vf(hw, adapter->max_frame_size) != 0) { device_printf(adapter->dev, "There is a problem with the PF setup. It is likely the receive unit for this VF will not function correctly.\n"); } + scctx = adapter->shared; - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - u64 rdba = rxr->rxdma.dma_paddr; - u32 reg, rxdctl; + for (int i = 0; i < adapter->num_rx_queues; i++, que++) { + struct rx_ring *rxr = &que->rxr; + u64 rdba = rxr->rx_paddr; + u32 reg, rxdctl; + int j = rxr->me; /* Disable the queue */ - rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)); + rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(j)); rxdctl &= ~IXGBE_RXDCTL_ENABLE; - IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl); - for (int j = 0; j < 10; j++) { - if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)) & + IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(j), rxdctl); + for (int k = 0; k < 10; k++) { + if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(j)) & IXGBE_RXDCTL_ENABLE) msec_delay(1); else @@ -1461,32 +1393,32 @@ ixv_initialize_receive_units(struct adapter *adapter) } wmb(); /* Setup the Base and Length of the Rx Descriptor Ring */ - IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i), + IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(j), (rdba & 0x00000000ffffffffULL)); - IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i), (rdba >> 32)); - IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i), - adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc)); + IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(j), (rdba >> 32)); + IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(j), + scctx->isc_nrxd[0] * sizeof(union ixgbe_adv_rx_desc)); /* Reset the ring indices */ IXGBE_WRITE_REG(hw, IXGBE_VFRDH(rxr->me), 0); IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), 0); /* Set up the SRRCTL register */ - reg = IXGBE_READ_REG(hw, IXGBE_VFSRRCTL(i)); + reg = IXGBE_READ_REG(hw, IXGBE_VFSRRCTL(j)); reg &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; reg &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; reg |= bufsz; reg |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; - IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), reg); + IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(j), reg); /* Capture Rx Tail index */ rxr->tail = IXGBE_VFRDT(rxr->me); /* Do the queue enabling last */ rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME; - IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl); - for (int k = 0; k < 10; k++) { - if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)) & + IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(j), rxdctl); + for (int l = 0; l < 10; l++) { + if (IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(j)) & IXGBE_RXDCTL_ENABLE) break; msec_delay(1); @@ -1494,6 +1426,7 @@ ixv_initialize_receive_units(struct adapter *adapter) wmb(); /* Set the Tail Pointer */ +#ifdef DEV_NETMAP /* * In netmap mode, we must preserve the buffers made * available to userspace before the if_init() @@ -1510,51 +1443,33 @@ ixv_initialize_receive_units(struct adapter *adapter) * RDT points to the last slot available for reception (?), * so RDT = num_rx_desc - 1 means the whole ring is available. */ -#ifdef DEV_NETMAP - if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && - (ifp->if_capenable & IFCAP_NETMAP)) { - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_kring *kring = &na->rx_rings[i]; + if (ifp->if_capenable & IFCAP_NETMAP) { + struct netmap_adapter *na = NA(ifp); + struct netmap_kring *kring = &na->rx_rings[j]; int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), t); } else #endif /* DEV_NETMAP */ IXGBE_WRITE_REG(hw, IXGBE_VFRDT(rxr->me), - adapter->num_rx_desc - 1); + scctx->isc_nrxd[0] - 1); } - rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); - ixv_initialize_rss_mapping(adapter); - - if (adapter->num_queues > 1) { - /* RSS and RX IPP Checksum are mutually exclusive */ - rxcsum |= IXGBE_RXCSUM_PCSD; - } - - if (ifp->if_capenable & IFCAP_RXCSUM) - rxcsum |= IXGBE_RXCSUM_PCSD; - - if (!(rxcsum & IXGBE_RXCSUM_PCSD)) - rxcsum |= IXGBE_RXCSUM_IPPCSE; - - IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); - - return; } /* ixv_initialize_receive_units */ /************************************************************************ * ixv_setup_vlan_support ************************************************************************/ static void -ixv_setup_vlan_support(struct adapter *adapter) +ixv_setup_vlan_support(if_ctx_t ctx) { + struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; u32 ctrl, vid, vfta, retry; /* - * We get here thru init_locked, meaning + * We get here thru if_init, meaning * a soft reset, this has already cleared * the VFTA and other state, so if there * have been no vlan's registered do nothing. @@ -1563,7 +1478,7 @@ ixv_setup_vlan_support(struct adapter *adapter) return; /* Enable the queues */ - for (int i = 0; i < adapter->num_queues; i++) { + for (int i = 0; i < adapter->num_rx_queues; i++) { ctrl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i)); ctrl |= IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), ctrl); @@ -1571,7 +1486,7 @@ ixv_setup_vlan_support(struct adapter *adapter) * Let Rx path know that it needs to store VLAN tag * as part of extra mbuf info. */ - adapter->rx_rings[i].vtag_strip = TRUE; + adapter->rx_queues[i].rxr.vtag_strip = TRUE; } /* @@ -1602,7 +1517,7 @@ ixv_setup_vlan_support(struct adapter *adapter) } /* ixv_setup_vlan_support */ /************************************************************************ - * ixv_register_vlan + * ixv_if_register_vlan * * Run via a vlan config EVENT, it enables us to use the * HW Filter table since we can get the vlan id. This just @@ -1610,92 +1525,83 @@ ixv_setup_vlan_support(struct adapter *adapter) * will repopulate the real table. ************************************************************************/ static void -ixv_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) +ixv_if_register_vlan(if_ctx_t ctx, u16 vtag) { - struct adapter *adapter = ifp->if_softc; + struct adapter *adapter = iflib_get_softc(ctx); u16 index, bit; - if (ifp->if_softc != arg) /* Not our event */ - return; - - if ((vtag == 0) || (vtag > 4095)) /* Invalid */ - return; - - IXGBE_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; ixv_shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; - /* Re-init to load the changes */ - ixv_init_locked(adapter); - IXGBE_CORE_UNLOCK(adapter); -} /* ixv_register_vlan */ +} /* ixv_if_register_vlan */ /************************************************************************ - * ixv_unregister_vlan + * ixv_if_unregister_vlan * * Run via a vlan unconfig EVENT, remove our entry * in the soft vfta. ************************************************************************/ static void -ixv_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) +ixv_if_unregister_vlan(if_ctx_t ctx, u16 vtag) { - struct adapter *adapter = ifp->if_softc; + struct adapter *adapter = iflib_get_softc(ctx); u16 index, bit; - if (ifp->if_softc != arg) - return; - - if ((vtag == 0) || (vtag > 4095)) /* Invalid */ - return; - - IXGBE_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; ixv_shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; - /* Re-init to load the changes */ - ixv_init_locked(adapter); - IXGBE_CORE_UNLOCK(adapter); -} /* ixv_unregister_vlan */ +} /* ixv_if_unregister_vlan */ /************************************************************************ - * ixv_enable_intr + * ixv_if_enable_intr ************************************************************************/ static void -ixv_enable_intr(struct adapter *adapter) +ixv_if_enable_intr(if_ctx_t ctx) { + struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw = &adapter->hw; - struct ix_queue *que = adapter->queues; + struct ix_rx_queue *que = adapter->rx_queues; u32 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); - IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); mask = IXGBE_EIMS_ENABLE_MASK; mask &= ~(IXGBE_EIMS_OTHER | IXGBE_EIMS_LSC); IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, mask); - for (int i = 0; i < adapter->num_queues; i++, que++) + for (int i = 0; i < adapter->num_rx_queues; i++, que++) ixv_enable_queue(adapter, que->msix); IXGBE_WRITE_FLUSH(hw); - - return; -} /* ixv_enable_intr */ +} /* ixv_if_enable_intr */ /************************************************************************ - * ixv_disable_intr + * ixv_if_disable_intr ************************************************************************/ static void -ixv_disable_intr(struct adapter *adapter) +ixv_if_disable_intr(if_ctx_t ctx) { + struct adapter *adapter = iflib_get_softc(ctx); IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEIAC, 0); IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEIMC, ~0); IXGBE_WRITE_FLUSH(&adapter->hw); +} /* ixv_if_disable_intr */ - return; -} /* ixv_disable_intr */ +/************************************************************************ + * ixv_if_rx_queue_intr_enable + ************************************************************************/ +static int +ixv_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) +{ + struct adapter *adapter = iflib_get_softc(ctx); + struct ix_rx_queue *que = &adapter->rx_queues[rxqid]; + + ixv_enable_queue(adapter, que->rxr.me); + + return (0); +} /* ixv_if_rx_queue_intr_enable */ /************************************************************************ * ixv_set_ivar @@ -1733,9 +1639,11 @@ ixv_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) static void ixv_configure_ivars(struct adapter *adapter) { - struct ix_queue *que = adapter->queues; + struct ix_rx_queue *que = adapter->rx_queues; - for (int i = 0; i < adapter->num_queues; i++, que++) { + MPASS(adapter->num_rx_queues == adapter->num_tx_queues); + + for (int i = 0; i < adapter->num_rx_queues; i++, que++) { /* First the RX queue entry */ ixv_set_ivar(adapter, i, que->msix, 0); /* ... and the TX */ @@ -1749,33 +1657,6 @@ ixv_configure_ivars(struct adapter *adapter) ixv_set_ivar(adapter, 1, adapter->vector, -1); } /* ixv_configure_ivars */ - -/************************************************************************ - * ixv_get_counter - ************************************************************************/ -static uint64_t -ixv_get_counter(struct ifnet *ifp, ift_counter cnt) -{ - struct adapter *adapter; - - adapter = if_getsoftc(ifp); - - switch (cnt) { - case IFCOUNTER_IPACKETS: - return (adapter->ipackets); - case IFCOUNTER_OPACKETS: - return (adapter->opackets); - case IFCOUNTER_IBYTES: - return (adapter->ibytes); - case IFCOUNTER_OBYTES: - return (adapter->obytes); - case IFCOUNTER_IMCASTS: - return (adapter->imcasts); - default: - return (if_get_counter_default(ifp, cnt)); - } -} /* ixv_get_counter */ - /************************************************************************ * ixv_save_stats * @@ -1859,15 +1740,15 @@ ixv_update_stats(struct adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct ixgbevf_hw_stats *stats = &adapter->stats.vf; - UPDATE_STAT_32(IXGBE_VFGPRC, adapter->stats.vf.last_vfgprc, + UPDATE_STAT_32(IXGBE_VFGPRC, adapter->stats.vf.last_vfgprc, adapter->stats.vf.vfgprc); - UPDATE_STAT_32(IXGBE_VFGPTC, adapter->stats.vf.last_vfgptc, + UPDATE_STAT_32(IXGBE_VFGPTC, adapter->stats.vf.last_vfgptc, adapter->stats.vf.vfgptc); - UPDATE_STAT_36(IXGBE_VFGORC_LSB, IXGBE_VFGORC_MSB, + UPDATE_STAT_36(IXGBE_VFGORC_LSB, IXGBE_VFGORC_MSB, adapter->stats.vf.last_vfgorc, adapter->stats.vf.vfgorc); - UPDATE_STAT_36(IXGBE_VFGOTC_LSB, IXGBE_VFGOTC_MSB, + UPDATE_STAT_36(IXGBE_VFGOTC_LSB, IXGBE_VFGOTC_MSB, adapter->stats.vf.last_vfgotc, adapter->stats.vf.vfgotc); - UPDATE_STAT_32(IXGBE_VFMPRC, adapter->stats.vf.last_vfmprc, + UPDATE_STAT_32(IXGBE_VFMPRC, adapter->stats.vf.last_vfmprc, adapter->stats.vf.vfmprc); /* Fill out the OS statistics structure */ @@ -1885,8 +1766,8 @@ static void ixv_add_stats_sysctls(struct adapter *adapter) { device_t dev = adapter->dev; - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; + struct ix_tx_queue *tx_que = adapter->tx_queues; + struct ix_rx_queue *rx_que = adapter->rx_queues; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); @@ -1898,42 +1779,33 @@ ixv_add_stats_sysctls(struct adapter *adapter) char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", - CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed", - CTLFLAG_RD, &adapter->mbuf_defrag_failed, "m_defrag() failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSI-X IRQ Handled"); - for (int i = 0; i < adapter->num_queues; i++, txr++) { + for (int i = 0; i < adapter->num_tx_queues; i++, tx_que++) { + struct tx_ring *txr = &tx_que->txr; + snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); + queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, + CTLFLAG_RD, NULL, "Queue Name"); + queue_list = SYSCTL_CHILDREN(queue_node); + + SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx", + CTLFLAG_RD, &(txr->tso_tx), "TSO Packets"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", + CTLFLAG_RD, &(txr->total_packets), "TX Packets"); + } + + for (int i = 0; i < adapter->num_rx_queues; i++, rx_que++) { + struct rx_ring *rxr = &rx_que->rxr; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", - CTLFLAG_RD, &(adapter->queues[i].irqs), "IRQs on queue"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_tx_dma_setup", - CTLFLAG_RD, &(txr->no_tx_dma_setup), - "Driver Tx DMA failure in Tx"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_no_desc", - CTLFLAG_RD, &(txr->no_desc_avail), - "Not-enough-descriptors count: TX"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", - CTLFLAG_RD, &(txr->total_packets), "TX Packets"); - SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "br_drops", - CTLFLAG_RD, &(txr->br->br_drops), - "Packets dropped in buf_ring"); - } - - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); - queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, - CTLFLAG_RD, NULL, "Queue Name"); - queue_list = SYSCTL_CHILDREN(queue_node); - + CTLFLAG_RD, &(rx_que->irqs), "IRQs on queue"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &(rxr->rx_packets), "RX packets"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", @@ -1958,19 +1830,6 @@ ixv_add_stats_sysctls(struct adapter *adapter) CTLFLAG_RD, &stats->vfgotc, "Good Octets Transmitted"); } /* ixv_add_stats_sysctls */ -/************************************************************************ - * ixv_set_sysctl_value - ************************************************************************/ -static void -ixv_set_sysctl_value(struct adapter *adapter, const char *name, - const char *description, int *limit, int value) -{ - *limit = value; - SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), - OID_AUTO, name, CTLFLAG_RW, limit, value, description); -} /* ixv_set_sysctl_value */ - /************************************************************************ * ixv_print_debug_info * @@ -1983,34 +1842,10 @@ ixv_print_debug_info(struct adapter *adapter) { device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; - struct ix_queue *que = adapter->queues; - struct rx_ring *rxr; - struct tx_ring *txr; - struct lro_ctrl *lro; device_printf(dev, "Error Byte Count = %u \n", IXGBE_READ_REG(hw, IXGBE_ERRBC)); - for (int i = 0; i < adapter->num_queues; i++, que++) { - txr = que->txr; - rxr = que->rxr; - lro = &rxr->lro; - device_printf(dev, "QUE(%d) IRQs Handled: %lu\n", - que->msix, (long)que->irqs); - device_printf(dev, "RX(%d) Packets Received: %lld\n", - rxr->me, (long long)rxr->rx_packets); - device_printf(dev, "RX(%d) Bytes Received: %lu\n", - rxr->me, (long)rxr->rx_bytes); - device_printf(dev, "RX(%d) LRO Queued= %lld\n", - rxr->me, (long long)lro->lro_queued); - device_printf(dev, "RX(%d) LRO Flushed= %lld\n", - rxr->me, (long long)lro->lro_flushed); - device_printf(dev, "TX(%d) Packets Sent: %lu\n", - txr->me, (long)txr->total_packets); - device_printf(dev, "TX(%d) NO Desc Avail: %lu\n", - txr->me, (long)txr->no_desc_avail); - } - device_printf(dev, "MBX IRQ Handled: %lu\n", (long)adapter->link_irq); } /* ixv_print_debug_info */ @@ -2076,365 +1911,5 @@ ixv_init_device_features(struct adapter *adapter) /* Needs advanced context descriptor regardless of offloads req'd */ if (adapter->feat_cap & IXGBE_FEATURE_NEEDS_CTXD) adapter->feat_en |= IXGBE_FEATURE_NEEDS_CTXD; - - /* Enabled via sysctl... */ - /* Legacy (single queue) transmit */ - if ((adapter->feat_cap & IXGBE_FEATURE_LEGACY_TX) && - ixv_enable_legacy_tx) - adapter->feat_en |= IXGBE_FEATURE_LEGACY_TX; } /* ixv_init_device_features */ -/************************************************************************ - * ixv_shutdown - Shutdown entry point - ************************************************************************/ -static int -ixv_shutdown(device_t dev) -{ - struct adapter *adapter = device_get_softc(dev); - IXGBE_CORE_LOCK(adapter); - ixv_stop(adapter); - IXGBE_CORE_UNLOCK(adapter); - - return (0); -} /* ixv_shutdown */ - - -/************************************************************************ - * ixv_ioctl - Ioctl entry point - * - * Called when the user wants to configure the interface. - * - * return 0 on success, positive on failure - ************************************************************************/ -static int -ixv_ioctl(struct ifnet *ifp, u_long command, caddr_t data) -{ - struct adapter *adapter = ifp->if_softc; - struct ifreq *ifr = (struct ifreq *)data; -#if defined(INET) || defined(INET6) - struct ifaddr *ifa = (struct ifaddr *)data; - bool avoid_reset = FALSE; -#endif - int error = 0; - - switch (command) { - - case SIOCSIFADDR: -#ifdef INET - if (ifa->ifa_addr->sa_family == AF_INET) - avoid_reset = TRUE; -#endif -#ifdef INET6 - if (ifa->ifa_addr->sa_family == AF_INET6) - avoid_reset = TRUE; -#endif -#if defined(INET) || defined(INET6) - /* - * Calling init results in link renegotiation, - * so we avoid doing it when possible. - */ - if (avoid_reset) { - ifp->if_flags |= IFF_UP; - if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) - ixv_init(adapter); - if (!(ifp->if_flags & IFF_NOARP)) - arp_ifinit(ifp, ifa); - } else - error = ether_ioctl(ifp, command, data); - break; -#endif - case SIOCSIFMTU: - IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); - if (ifr->ifr_mtu > IXGBE_MAX_MTU) { - error = EINVAL; - } else { - IXGBE_CORE_LOCK(adapter); - ifp->if_mtu = ifr->ifr_mtu; - adapter->max_frame_size = ifp->if_mtu + IXGBE_MTU_HDR; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - ixv_init_locked(adapter); - IXGBE_CORE_UNLOCK(adapter); - } - break; - case SIOCSIFFLAGS: - IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); - IXGBE_CORE_LOCK(adapter); - if (ifp->if_flags & IFF_UP) { - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - ixv_init_locked(adapter); - } else - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - ixv_stop(adapter); - adapter->if_flags = ifp->if_flags; - IXGBE_CORE_UNLOCK(adapter); - break; - case SIOCADDMULTI: - case SIOCDELMULTI: - IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI"); - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - IXGBE_CORE_LOCK(adapter); - ixv_disable_intr(adapter); - ixv_set_multi(adapter); - ixv_enable_intr(adapter); - IXGBE_CORE_UNLOCK(adapter); - } - break; - case SIOCSIFMEDIA: - case SIOCGIFMEDIA: - IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); - error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); - break; - case SIOCSIFCAP: - { - int mask = ifr->ifr_reqcap ^ ifp->if_capenable; - IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); - if (mask & IFCAP_HWCSUM) - ifp->if_capenable ^= IFCAP_HWCSUM; - if (mask & IFCAP_TSO4) - ifp->if_capenable ^= IFCAP_TSO4; - if (mask & IFCAP_LRO) - ifp->if_capenable ^= IFCAP_LRO; - if (mask & IFCAP_VLAN_HWTAGGING) - ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - IXGBE_CORE_LOCK(adapter); - ixv_init_locked(adapter); - IXGBE_CORE_UNLOCK(adapter); - } - VLAN_CAPABILITIES(ifp); - break; - } - - default: - IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command); - error = ether_ioctl(ifp, command, data); - break; - } - - return (error); -} /* ixv_ioctl */ - -/************************************************************************ - * ixv_init - ************************************************************************/ -static void -ixv_init(void *arg) -{ - struct adapter *adapter = arg; - - IXGBE_CORE_LOCK(adapter); - ixv_init_locked(adapter); - IXGBE_CORE_UNLOCK(adapter); - - return; -} /* ixv_init */ - - -/************************************************************************ - * ixv_handle_que - ************************************************************************/ -static void -ixv_handle_que(void *context, int pending) -{ - struct ix_queue *que = context; - struct adapter *adapter = que->adapter; - struct tx_ring *txr = que->txr; - struct ifnet *ifp = adapter->ifp; - bool more; - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - more = ixgbe_rxeof(que); - IXGBE_TX_LOCK(txr); - ixgbe_txeof(txr); - if (!ixv_ring_empty(ifp, txr->br)) - ixv_start_locked(ifp, txr); - IXGBE_TX_UNLOCK(txr); - if (more) { - taskqueue_enqueue(que->tq, &que->que_task); - return; - } - } - - /* Re-enable this interrupt */ - ixv_enable_queue(adapter, que->msix); - - return; -} /* ixv_handle_que */ - -/************************************************************************ - * ixv_allocate_msix - Setup MSI-X Interrupt resources and handlers - ************************************************************************/ -static int -ixv_allocate_msix(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct ix_queue *que = adapter->queues; - struct tx_ring *txr = adapter->tx_rings; - int error, msix_ctrl, rid, vector = 0; - - for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) { - rid = vector + 1; - que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, - RF_SHAREABLE | RF_ACTIVE); - if (que->res == NULL) { - device_printf(dev, "Unable to allocate bus resource: que interrupt [%d]\n", - vector); - return (ENXIO); - } - /* Set the handler function */ - error = bus_setup_intr(dev, que->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, - ixv_msix_que, que, &que->tag); - if (error) { - que->res = NULL; - device_printf(dev, "Failed to register QUE handler"); - return (error); - } -#if __FreeBSD_version >= 800504 - bus_describe_intr(dev, que->res, que->tag, "que %d", i); -#endif - que->msix = vector; - adapter->active_queues |= (u64)(1 << que->msix); - /* - * Bind the MSI-X vector, and thus the - * ring to the corresponding CPU. - */ - if (adapter->num_queues > 1) - bus_bind_intr(dev, que->res, i); - TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); - TASK_INIT(&que->que_task, 0, ixv_handle_que, que); - que->tq = taskqueue_create_fast("ixv_que", M_NOWAIT, - taskqueue_thread_enqueue, &que->tq); - taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", - device_get_nameunit(adapter->dev)); - } - - /* and Mailbox */ - rid = vector + 1; - adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, - RF_SHAREABLE | RF_ACTIVE); - if (!adapter->res) { - device_printf(dev, - "Unable to allocate bus resource: MBX interrupt [%d]\n", - rid); - return (ENXIO); - } - /* Set the mbx handler function */ - error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, - NULL, ixv_msix_mbx, adapter, &adapter->tag); - if (error) { - adapter->res = NULL; - device_printf(dev, "Failed to register LINK handler"); - return (error); - } -#if __FreeBSD_version >= 800504 - bus_describe_intr(dev, adapter->res, adapter->tag, "mbx"); -#endif - adapter->vector = vector; - /* Tasklets for Mailbox */ - TASK_INIT(&adapter->link_task, 0, ixv_handle_link, adapter); - adapter->tq = taskqueue_create_fast("ixv_mbx", M_NOWAIT, - taskqueue_thread_enqueue, &adapter->tq); - taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s mbxq", - device_get_nameunit(adapter->dev)); - /* - * Due to a broken design QEMU will fail to properly - * enable the guest for MSI-X unless the vectors in - * the table are all set up, so we must rewrite the - * ENABLE in the MSI-X control register again at this - * point to cause it to successfully initialize us. - */ - if (adapter->hw.mac.type == ixgbe_mac_82599_vf) { - pci_find_cap(dev, PCIY_MSIX, &rid); - rid += PCIR_MSIX_CTRL; - msix_ctrl = pci_read_config(dev, rid, 2); - msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; - pci_write_config(dev, rid, msix_ctrl, 2); - } - - return (0); -} /* ixv_allocate_msix */ - -/************************************************************************ - * ixv_configure_interrupts - Setup MSI-X resources - * - * Note: The VF device MUST use MSI-X, there is no fallback. - ************************************************************************/ -static int -ixv_configure_interrupts(struct adapter *adapter) -{ - device_t dev = adapter->dev; - int rid, want, msgs; - - /* Must have at least 2 MSI-X vectors */ - msgs = pci_msix_count(dev); - if (msgs < 2) - goto out; - rid = PCIR_BAR(3); - adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, - RF_ACTIVE); - if (adapter->msix_mem == NULL) { - device_printf(adapter->dev, "Unable to map MSI-X table \n"); - goto out; - } - - /* - * Want vectors for the queues, - * plus an additional for mailbox. - */ - want = adapter->num_queues + 1; - if (want > msgs) { - want = msgs; - adapter->num_queues = msgs - 1; - } else - msgs = want; - if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { - device_printf(adapter->dev, - "Using MSI-X interrupts with %d vectors\n", want); - /* reflect correct sysctl value */ - ixv_num_queues = adapter->num_queues; - - return (0); - } - /* Release in case alloc was insufficient */ - pci_release_msi(dev); -out: - if (adapter->msix_mem != NULL) { - bus_release_resource(dev, SYS_RES_MEMORY, rid, - adapter->msix_mem); - adapter->msix_mem = NULL; - } - device_printf(adapter->dev, "MSI-X config error\n"); - - return (ENXIO); -} /* ixv_configure_interrupts */ - - -/************************************************************************ - * ixv_handle_link - Tasklet handler for MSI-X MBX interrupts - * - * Done outside of interrupt context since the driver might sleep - ************************************************************************/ -static void -ixv_handle_link(void *context, int pending) -{ - struct adapter *adapter = context; - - adapter->hw.mac.ops.check_link(&adapter->hw, &adapter->link_speed, - &adapter->link_up, FALSE); - ixv_update_link_status(adapter); -} /* ixv_handle_link */ - -/************************************************************************ - * ixv_check_link - Used in the local timer to poll for link changes - ************************************************************************/ -static void -ixv_check_link(struct adapter *adapter) -{ - adapter->hw.mac.get_link_status = TRUE; - - adapter->hw.mac.ops.check_link(&adapter->hw, &adapter->link_speed, - &adapter->link_up, FALSE); - ixv_update_link_status(adapter); -} /* ixv_check_link */ - diff --git a/sys/dev/ixgbe/if_sriov.c b/sys/dev/ixgbe/if_sriov.c index 8653fceec14..6d013e80d2f 100644 --- a/sys/dev/ixgbe/if_sriov.c +++ b/sys/dev/ixgbe/if_sriov.c @@ -33,6 +33,7 @@ /*$FreeBSD$*/ #include "ixgbe.h" +#include "ixgbe_sriov.h" #ifdef PCI_IOV @@ -80,10 +81,14 @@ ixgbe_align_all_queue_indices(struct adapter *adapter) int i; int index; - for (i = 0; i < adapter->num_queues; i++) { + for (i = 0; i < adapter->num_rx_queues; i++) { index = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool, i); - adapter->rx_rings[i].me = index; - adapter->tx_rings[i].me = index; + adapter->rx_queues[i].rxr.me = index; + } + + for (i = 0; i < adapter->num_tx_queues; i++) { + index = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool, i); + adapter->tx_queues[i].txr.me = index; } } @@ -233,7 +238,7 @@ ixgbe_vf_set_default_vlan(struct adapter *adapter, struct ixgbe_vf *vf, if (tag == 0) { /* Accept non-vlan tagged traffic. */ - //vmolr |= IXGBE_VMOLR_AUPE; + vmolr |= IXGBE_VMOLR_AUPE; /* Allow VM to tag outgoing traffic; no default tag. */ vmvir = 0; @@ -269,7 +274,7 @@ ixgbe_vf_frame_size_compatible(struct adapter *adapter, struct ixgbe_vf *vf) * frames on either the PF or the VF. */ if (adapter->max_frame_size > ETHER_MAX_LEN || - vf->max_frame_size > ETHER_MAX_LEN) + vf->maximum_frame_size > ETHER_MAX_LEN) return (FALSE); return (TRUE); @@ -281,7 +286,7 @@ ixgbe_vf_frame_size_compatible(struct adapter *adapter, struct ixgbe_vf *vf) * 1.1 or later VF versions always work if they aren't using * jumbo frames. */ - if (vf->max_frame_size <= ETHER_MAX_LEN) + if (vf->maximum_frame_size <= ETHER_MAX_LEN) return (TRUE); /* @@ -292,7 +297,6 @@ ixgbe_vf_frame_size_compatible(struct adapter *adapter, struct ixgbe_vf *vf) return (TRUE); return (FALSE); - } } /* ixgbe_vf_frame_size_compatible */ @@ -451,7 +455,7 @@ ixgbe_vf_set_vlan(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) } /* It is illegal to enable vlan tag 0. */ - if (tag == 0 && enable != 0){ + if (tag == 0 && enable != 0) { ixgbe_send_vf_nack(adapter, vf, msg[0]); return; } @@ -484,8 +488,8 @@ ixgbe_vf_set_lpe(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) return; } - vf->max_frame_size = vf_max_size; - ixgbe_update_max_frame(adapter, vf->max_frame_size); + vf->maximum_frame_size = vf_max_size; + ixgbe_update_max_frame(adapter, vf->maximum_frame_size); /* * We might have to disable reception to this VF if the frame size is @@ -565,8 +569,12 @@ ixgbe_vf_get_queues(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg) static void -ixgbe_process_vf_msg(struct adapter *adapter, struct ixgbe_vf *vf) +ixgbe_process_vf_msg(if_ctx_t ctx, struct ixgbe_vf *vf) { + struct adapter *adapter = iflib_get_softc(ctx); +#ifdef KTR + struct ifnet *ifp = iflib_get_ifp(ctx); +#endif struct ixgbe_hw *hw; uint32_t msg[IXGBE_VFMAILBOX_SIZE]; int error; @@ -578,8 +586,8 @@ ixgbe_process_vf_msg(struct adapter *adapter, struct ixgbe_vf *vf) if (error != 0) return; - CTR3(KTR_MALLOC, "%s: received msg %x from %d", - adapter->ifp->if_xname, msg[0], vf->pool); + CTR3(KTR_MALLOC, "%s: received msg %x from %d", ifp->if_xname, + msg[0], vf->pool); if (msg[0] == IXGBE_VF_RESET) { ixgbe_vf_reset_msg(adapter, vf, msg); return; @@ -620,17 +628,16 @@ ixgbe_process_vf_msg(struct adapter *adapter, struct ixgbe_vf *vf) /* Tasklet for handling VF -> PF mailbox messages */ void -ixgbe_handle_mbx(void *context, int pending) +ixgbe_handle_mbx(void *context) { - struct adapter *adapter; + if_ctx_t ctx = context; + struct adapter *adapter = iflib_get_softc(ctx); struct ixgbe_hw *hw; struct ixgbe_vf *vf; int i; - adapter = context; hw = &adapter->hw; - IXGBE_CORE_LOCK(adapter); for (i = 0; i < adapter->num_vfs; i++) { vf = &adapter->vfs[i]; @@ -639,22 +646,21 @@ ixgbe_handle_mbx(void *context, int pending) ixgbe_process_vf_reset(adapter, vf); if (hw->mbx.ops.check_for_msg(hw, vf->pool) == 0) - ixgbe_process_vf_msg(adapter, vf); + ixgbe_process_vf_msg(ctx, vf); if (hw->mbx.ops.check_for_ack(hw, vf->pool) == 0) ixgbe_process_vf_ack(adapter, vf); } } - IXGBE_CORE_UNLOCK(adapter); } /* ixgbe_handle_mbx */ int -ixgbe_init_iov(device_t dev, u16 num_vfs, const nvlist_t *config) +ixgbe_if_iov_init(if_ctx_t ctx, u16 num_vfs, const nvlist_t *config) { struct adapter *adapter; int retval = 0; - adapter = device_get_softc(dev); + adapter = iflib_get_softc(ctx); adapter->iov_mode = IXGBE_NO_VM; if (num_vfs == 0) { @@ -682,45 +688,38 @@ ixgbe_init_iov(device_t dev, u16 num_vfs, const nvlist_t *config) goto err_init_iov; } - IXGBE_CORE_LOCK(adapter); - adapter->vfs = malloc(sizeof(*adapter->vfs) * num_vfs, M_IXGBE_SRIOV, M_NOWAIT | M_ZERO); if (adapter->vfs == NULL) { retval = ENOMEM; - IXGBE_CORE_UNLOCK(adapter); goto err_init_iov; } adapter->num_vfs = num_vfs; - adapter->init_locked(adapter); + ixgbe_if_init(adapter->ctx); adapter->feat_en |= IXGBE_FEATURE_SRIOV; - IXGBE_CORE_UNLOCK(adapter); - - return retval; + return (retval); err_init_iov: adapter->num_vfs = 0; adapter->pool = 0; adapter->iov_mode = IXGBE_NO_VM; - return retval; -} /* ixgbe_init_iov */ + return (retval); +} /* ixgbe_if_iov_init */ void -ixgbe_uninit_iov(device_t dev) +ixgbe_if_iov_uninit(if_ctx_t ctx) { struct ixgbe_hw *hw; struct adapter *adapter; uint32_t pf_reg, vf_reg; - adapter = device_get_softc(dev); + adapter = iflib_get_softc(ctx); hw = &adapter->hw; - IXGBE_CORE_LOCK(adapter); - /* Enable rx/tx for the PF and disable it for all VFs. */ pf_reg = IXGBE_VF_INDEX(adapter->pool); IXGBE_WRITE_REG(hw, IXGBE_VFRE(pf_reg), IXGBE_VF_BIT(adapter->pool)); @@ -739,9 +738,7 @@ ixgbe_uninit_iov(device_t dev) adapter->vfs = NULL; adapter->num_vfs = 0; adapter->feat_en &= ~IXGBE_FEATURE_SRIOV; - - IXGBE_CORE_UNLOCK(adapter); -} /* ixgbe_uninit_iov */ +} /* ixgbe_if_iov_uninit */ static void ixgbe_init_vf(struct adapter *adapter, struct ixgbe_vf *vf) @@ -749,8 +746,6 @@ ixgbe_init_vf(struct adapter *adapter, struct ixgbe_vf *vf) struct ixgbe_hw *hw; uint32_t vf_index, pfmbimr; - IXGBE_CORE_LOCK_ASSERT(adapter); - hw = &adapter->hw; if (!(vf->flags & IXGBE_VF_ACTIVE)) @@ -786,8 +781,6 @@ ixgbe_initialize_iov(struct adapter *adapter) if (adapter->iov_mode == IXGBE_NO_VM) return; - IXGBE_CORE_LOCK_ASSERT(adapter); - /* RMW appropriate registers based on IOV mode */ /* Read... */ mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC); @@ -844,36 +837,33 @@ ixgbe_recalculate_max_frame(struct adapter *adapter) { struct ixgbe_vf *vf; - IXGBE_CORE_LOCK_ASSERT(adapter); - for (int i = 0; i < adapter->num_vfs; i++) { vf = &adapter->vfs[i]; if (vf->flags & IXGBE_VF_ACTIVE) - ixgbe_update_max_frame(adapter, vf->max_frame_size); + ixgbe_update_max_frame(adapter, vf->maximum_frame_size); } } /* ixgbe_recalculate_max_frame */ int -ixgbe_add_vf(device_t dev, u16 vfnum, const nvlist_t *config) +ixgbe_if_iov_vf_add(if_ctx_t ctx, u16 vfnum, const nvlist_t *config) { struct adapter *adapter; struct ixgbe_vf *vf; const void *mac; - adapter = device_get_softc(dev); + adapter = iflib_get_softc(ctx); KASSERT(vfnum < adapter->num_vfs, ("VF index %d is out of range %d", vfnum, adapter->num_vfs)); - IXGBE_CORE_LOCK(adapter); vf = &adapter->vfs[vfnum]; vf->pool= vfnum; /* RAR[0] is used by the PF so use vfnum + 1 for VF RAR. */ vf->rar_index = vfnum + 1; vf->default_vlan = 0; - vf->max_frame_size = ETHER_MAX_LEN; - ixgbe_update_max_frame(adapter, vf->max_frame_size); + vf->maximum_frame_size = ETHER_MAX_LEN; + ixgbe_update_max_frame(adapter, vf->maximum_frame_size); if (nvlist_exists_binary(config, "mac-addr")) { mac = nvlist_get_binary(config, "mac-addr", NULL); @@ -890,25 +880,16 @@ ixgbe_add_vf(device_t dev, u16 vfnum, const nvlist_t *config) vf->flags |= IXGBE_VF_ACTIVE; ixgbe_init_vf(adapter, vf); - IXGBE_CORE_UNLOCK(adapter); return (0); -} /* ixgbe_add_vf */ +} /* ixgbe_if_iov_vf_add */ #else void -ixgbe_handle_mbx(void *context, int pending) +ixgbe_handle_mbx(void *context) { - UNREFERENCED_2PARAMETER(context, pending); + UNREFERENCED_PARAMETER(context); } /* ixgbe_handle_mbx */ -inline int -ixgbe_vf_que_index(int mode, int vfnum, int num) -{ - UNREFERENCED_2PARAMETER(mode, vfnum); - - return num; -} /* ixgbe_vf_que_index */ - #endif diff --git a/sys/dev/ixgbe/ix_txrx.c b/sys/dev/ixgbe/ix_txrx.c index c634afb62e3..cea3cbd9884 100644 --- a/sys/dev/ixgbe/ix_txrx.c +++ b/sys/dev/ixgbe/ix_txrx.c @@ -41,1878 +41,436 @@ #include "ixgbe.h" -/* - * HW RSC control: - * this feature only works with - * IPv4, and only on 82599 and later. - * Also this will cause IP forwarding to - * fail and that can't be controlled by - * the stack as LRO can. For all these - * reasons I've deemed it best to leave - * this off and not bother with a tuneable - * interface, this would need to be compiled - * to enable. - */ -static bool ixgbe_rsc_enable = FALSE; - -/* - * For Flow Director: this is the - * number of TX packets we sample - * for the filter pool, this means - * every 20th packet will be probed. - * - * This feature can be disabled by - * setting this to 0. - */ -static int atr_sample_rate = 20; /************************************************************************ - * Local Function prototypes + * Local Function prototypes ************************************************************************/ -static void ixgbe_setup_transmit_ring(struct tx_ring *); -static void ixgbe_free_transmit_buffers(struct tx_ring *); -static int ixgbe_setup_receive_ring(struct rx_ring *); -static void ixgbe_free_receive_buffers(struct rx_ring *); -static void ixgbe_rx_checksum(u32, struct mbuf *, u32); -static void ixgbe_refresh_mbufs(struct rx_ring *, int); -static int ixgbe_xmit(struct tx_ring *, struct mbuf **); -static int ixgbe_tx_ctx_setup(struct tx_ring *, - struct mbuf *, u32 *, u32 *); -static int ixgbe_tso_setup(struct tx_ring *, - struct mbuf *, u32 *, u32 *); -static __inline void ixgbe_rx_discard(struct rx_ring *, int); -static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *, - struct mbuf *, u32); -static int ixgbe_dma_malloc(struct adapter *, bus_size_t, - struct ixgbe_dma_alloc *, int); -static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *); - -/************************************************************************ - * ixgbe_legacy_start_locked - Transmit entry point - * - * Called by the stack to initiate a transmit. - * The driver will remain in this routine as long as there are - * packets to transmit and transmit resources are available. - * In case resources are not available, the stack is notified - * and the packet is requeued. - ************************************************************************/ -int -ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr) -{ - struct mbuf *m_head; - struct adapter *adapter = txr->adapter; - - IXGBE_TX_LOCK_ASSERT(txr); - - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - return (ENETDOWN); - if (!adapter->link_active) - return (ENETDOWN); - - while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { - if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) - break; - - IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); - if (m_head == NULL) - break; - - if (ixgbe_xmit(txr, &m_head)) { - if (m_head != NULL) - IFQ_DRV_PREPEND(&ifp->if_snd, m_head); - break; - } - /* Send a copy of the frame to the BPF listener */ - ETHER_BPF_MTAP(ifp, m_head); - } - - return IXGBE_SUCCESS; -} /* ixgbe_legacy_start_locked */ - -/************************************************************************ - * ixgbe_legacy_start - * - * Called by the stack, this always uses the first tx ring, - * and should not be used with multiqueue tx enabled. - ************************************************************************/ -void -ixgbe_legacy_start(struct ifnet *ifp) -{ - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = adapter->tx_rings; - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - IXGBE_TX_LOCK(txr); - ixgbe_legacy_start_locked(ifp, txr); - IXGBE_TX_UNLOCK(txr); - } -} /* ixgbe_legacy_start */ - -/************************************************************************ - * ixgbe_mq_start - Multiqueue Transmit Entry Point - * - * (if_transmit function) - ************************************************************************/ -int -ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) -{ - struct adapter *adapter = ifp->if_softc; - struct ix_queue *que; - struct tx_ring *txr; - int i, err = 0; - uint32_t bucket_id; - - /* - * When doing RSS, map it to the same outbound queue - * as the incoming flow would be mapped to. - * - * If everything is setup correctly, it should be the - * same bucket that the current CPU we're on is. - */ - if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { - if ((adapter->feat_en & IXGBE_FEATURE_RSS) && - (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), - &bucket_id) == 0)) { - i = bucket_id % adapter->num_queues; -#ifdef IXGBE_DEBUG - if (bucket_id > adapter->num_queues) - if_printf(ifp, - "bucket_id (%d) > num_queues (%d)\n", - bucket_id, adapter->num_queues); -#endif - } else - i = m->m_pkthdr.flowid % adapter->num_queues; - } else - i = curcpu % adapter->num_queues; - - /* Check for a hung queue and pick alternative */ - if (((1 << i) & adapter->active_queues) == 0) - i = ffsl(adapter->active_queues); - - txr = &adapter->tx_rings[i]; - que = &adapter->queues[i]; - - err = drbr_enqueue(ifp, txr->br, m); - if (err) - return (err); - if (IXGBE_TX_TRYLOCK(txr)) { - ixgbe_mq_start_locked(ifp, txr); - IXGBE_TX_UNLOCK(txr); - } else - taskqueue_enqueue(que->tq, &txr->txq_task); - - return (0); -} /* ixgbe_mq_start */ - -/************************************************************************ - * ixgbe_mq_start_locked - ************************************************************************/ -int -ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) -{ - struct mbuf *next; - int enqueued = 0, err = 0; - - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - return (ENETDOWN); - if (txr->adapter->link_active == 0) - return (ENETDOWN); - - /* Process the queue */ -#if __FreeBSD_version < 901504 - next = drbr_dequeue(ifp, txr->br); - while (next != NULL) { - if ((err = ixgbe_xmit(txr, &next)) != 0) { - if (next != NULL) - err = drbr_enqueue(ifp, txr->br, next); -#else - while ((next = drbr_peek(ifp, txr->br)) != NULL) { - err = ixgbe_xmit(txr, &next); - if (err != 0) { - if (next == NULL) - drbr_advance(ifp, txr->br); - else - drbr_putback(ifp, txr->br, next); -#endif - break; - } -#if __FreeBSD_version >= 901504 - drbr_advance(ifp, txr->br); -#endif - enqueued++; -#if __FreeBSD_version >= 1100036 - /* - * Since we're looking at the tx ring, we can check - * to see if we're a VF by examing our tail register - * address. - */ - if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) && - (next->m_flags & M_MCAST)) - if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); -#endif - /* Send a copy of the frame to the BPF listener */ - ETHER_BPF_MTAP(ifp, next); - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - break; -#if __FreeBSD_version < 901504 - next = drbr_dequeue(ifp, txr->br); -#endif - } - - if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter)) - ixgbe_txeof(txr); - - return (err); -} /* ixgbe_mq_start_locked */ - -/************************************************************************ - * ixgbe_deferred_mq_start - * - * Called from a taskqueue to drain queued transmit packets. - ************************************************************************/ -void -ixgbe_deferred_mq_start(void *arg, int pending) -{ - struct tx_ring *txr = arg; - struct adapter *adapter = txr->adapter; - struct ifnet *ifp = adapter->ifp; - - IXGBE_TX_LOCK(txr); - if (!drbr_empty(ifp, txr->br)) - ixgbe_mq_start_locked(ifp, txr); - IXGBE_TX_UNLOCK(txr); -} /* ixgbe_deferred_mq_start */ - -/************************************************************************ - * ixgbe_qflush - Flush all ring buffers - ************************************************************************/ -void -ixgbe_qflush(struct ifnet *ifp) -{ - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = adapter->tx_rings; - struct mbuf *m; - - for (int i = 0; i < adapter->num_queues; i++, txr++) { - IXGBE_TX_LOCK(txr); - while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) - m_freem(m); - IXGBE_TX_UNLOCK(txr); - } - if_qflush(ifp); -} /* ixgbe_qflush */ - - -/************************************************************************ - * ixgbe_xmit - * - * Maps the mbufs to tx descriptors, allowing the - * TX engine to transmit the packets. - * - * Return 0 on success, positive on failure - ************************************************************************/ -static int -ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) -{ - struct adapter *adapter = txr->adapter; - struct ixgbe_tx_buf *txbuf; - union ixgbe_adv_tx_desc *txd = NULL; - struct mbuf *m_head; - int i, j, error, nsegs; - int first; - u32 olinfo_status = 0, cmd_type_len; - bool remap = TRUE; - bus_dma_segment_t segs[adapter->num_segs]; - bus_dmamap_t map; - - m_head = *m_headp; - - /* Basic descriptor defines */ - cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | - IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); - - if (m_head->m_flags & M_VLANTAG) - cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; - - /* - * Important to capture the first descriptor - * used because it will contain the index of - * the one we tell the hardware to report back - */ - first = txr->next_avail_desc; - txbuf = &txr->tx_buffers[first]; - map = txbuf->map; - - /* - * Map the packet for DMA. - */ -retry: - error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, - &nsegs, BUS_DMA_NOWAIT); - - if (__predict_false(error)) { - struct mbuf *m; - - switch (error) { - case EFBIG: - /* Try it again? - one try */ - if (remap == TRUE) { - remap = FALSE; - /* - * XXX: m_defrag will choke on - * non-MCLBYTES-sized clusters - */ - m = m_defrag(*m_headp, M_NOWAIT); - if (m == NULL) { - adapter->mbuf_defrag_failed++; - m_freem(*m_headp); - *m_headp = NULL; - return (ENOBUFS); - } - *m_headp = m; - goto retry; - } else - return (error); - case ENOMEM: - txr->no_tx_dma_setup++; - return (error); - default: - txr->no_tx_dma_setup++; - m_freem(*m_headp); - *m_headp = NULL; - return (error); - } - } - - /* Make certain there are enough descriptors */ - if (txr->tx_avail < (nsegs + 2)) { - txr->no_desc_avail++; - bus_dmamap_unload(txr->txtag, map); - return (ENOBUFS); - } - m_head = *m_headp; - - /* - * Set up the appropriate offload context - * this will consume the first descriptor - */ - error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); - if (__predict_false(error)) { - if (error == ENOBUFS) - *m_headp = NULL; - return (error); - } - - /* Do the flow director magic */ - if ((adapter->feat_en & IXGBE_FEATURE_FDIR) && - (txr->atr_sample) && (!adapter->fdir_reinit)) { - ++txr->atr_count; - if (txr->atr_count >= atr_sample_rate) { - ixgbe_atr(txr, m_head); - txr->atr_count = 0; - } - } - - olinfo_status |= IXGBE_ADVTXD_CC; - i = txr->next_avail_desc; - for (j = 0; j < nsegs; j++) { - bus_size_t seglen; - bus_addr_t segaddr; - - txbuf = &txr->tx_buffers[i]; - txd = &txr->tx_base[i]; - seglen = segs[j].ds_len; - segaddr = htole64(segs[j].ds_addr); - - txd->read.buffer_addr = segaddr; - txd->read.cmd_type_len = htole32(txr->txd_cmd | - cmd_type_len | seglen); - txd->read.olinfo_status = htole32(olinfo_status); - - if (++i == txr->num_desc) - i = 0; - } - - txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); - txr->tx_avail -= nsegs; - txr->next_avail_desc = i; - - txbuf->m_head = m_head; - /* - * Here we swap the map so the last descriptor, - * which gets the completion interrupt has the - * real map, and the first descriptor gets the - * unused map from this descriptor. - */ - txr->tx_buffers[first].map = txbuf->map; - txbuf->map = map; - bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); - - /* Set the EOP descriptor that will be marked done */ - txbuf = &txr->tx_buffers[first]; - txbuf->eop = txd; - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - /* - * Advance the Transmit Descriptor Tail (Tdt), this tells the - * hardware that this frame is available to transmit. - */ - ++txr->total_packets; - IXGBE_WRITE_REG(&adapter->hw, txr->tail, i); - - /* Mark queue as having work */ - if (txr->busy == 0) - txr->busy = 1; - - return (0); -} /* ixgbe_xmit */ - - -/************************************************************************ - * ixgbe_allocate_transmit_buffers - * - * Allocate memory for tx_buffer structures. The tx_buffer stores all - * the information needed to transmit a packet on the wire. This is - * called only once at attach, setup is done every reset. - ************************************************************************/ -static int -ixgbe_allocate_transmit_buffers(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - device_t dev = adapter->dev; - struct ixgbe_tx_buf *txbuf; - int error, i; - - /* - * Setup DMA descriptor areas. - */ - error = bus_dma_tag_create( - /* parent */ bus_get_dma_tag(adapter->dev), - /* alignment */ 1, - /* bounds */ 0, - /* lowaddr */ BUS_SPACE_MAXADDR, - /* highaddr */ BUS_SPACE_MAXADDR, - /* filter */ NULL, - /* filterarg */ NULL, - /* maxsize */ IXGBE_TSO_SIZE, - /* nsegments */ adapter->num_segs, - /* maxsegsize */ PAGE_SIZE, - /* flags */ 0, - /* lockfunc */ NULL, - /* lockfuncarg */ NULL, - &txr->txtag); - if (error != 0) { - device_printf(dev, "Unable to allocate TX DMA tag\n"); - goto fail; - } - - txr->tx_buffers = - (struct ixgbe_tx_buf *)malloc(sizeof(struct ixgbe_tx_buf) * - adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); - if (txr->tx_buffers == NULL) { - device_printf(dev, "Unable to allocate tx_buffer memory\n"); - error = ENOMEM; - goto fail; - } - - /* Create the descriptor buffer dma maps */ - txbuf = txr->tx_buffers; - for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { - error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); - if (error != 0) { - device_printf(dev, "Unable to create TX DMA map\n"); - goto fail; - } - } - - return 0; -fail: - /* We free all, it handles case where we are in the middle */ - ixgbe_free_transmit_structures(adapter); - - return (error); -} /* ixgbe_allocate_transmit_buffers */ - -/************************************************************************ - * ixgbe_setup_transmit_ring - Initialize a transmit ring. - ************************************************************************/ -static void -ixgbe_setup_transmit_ring(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - struct ixgbe_tx_buf *txbuf; -#ifdef DEV_NETMAP - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_slot *slot; -#endif /* DEV_NETMAP */ - - /* Clear the old ring contents */ - IXGBE_TX_LOCK(txr); - -#ifdef DEV_NETMAP - if (adapter->feat_en & IXGBE_FEATURE_NETMAP) { - /* - * (under lock): if in netmap mode, do some consistency - * checks and set slot to entry 0 of the netmap ring. - */ - slot = netmap_reset(na, NR_TX, txr->me, 0); - } -#endif /* DEV_NETMAP */ - - bzero((void *)txr->tx_base, - (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); - /* Reset indices */ - txr->next_avail_desc = 0; - txr->next_to_clean = 0; - - /* Free any existing tx buffers. */ - txbuf = txr->tx_buffers; - for (int i = 0; i < txr->num_desc; i++, txbuf++) { - if (txbuf->m_head != NULL) { - bus_dmamap_sync(txr->txtag, txbuf->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, txbuf->map); - m_freem(txbuf->m_head); - txbuf->m_head = NULL; - } - -#ifdef DEV_NETMAP - /* - * In netmap mode, set the map for the packet buffer. - * NOTE: Some drivers (not this one) also need to set - * the physical buffer address in the NIC ring. - * Slots in the netmap ring (indexed by "si") are - * kring->nkr_hwofs positions "ahead" wrt the - * corresponding slot in the NIC ring. In some drivers - * (not here) nkr_hwofs can be negative. Function - * netmap_idx_n2k() handles wraparounds properly. - */ - if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) { - int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); - netmap_load_map(na, txr->txtag, - txbuf->map, NMB(na, slot + si)); - } -#endif /* DEV_NETMAP */ - - /* Clear the EOP descriptor pointer */ - txbuf->eop = NULL; - } - - /* Set the rate at which we sample packets */ - if (adapter->feat_en & IXGBE_FEATURE_FDIR) - txr->atr_sample = atr_sample_rate; - - /* Set number of descriptors available */ - txr->tx_avail = adapter->num_tx_desc; - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - IXGBE_TX_UNLOCK(txr); -} /* ixgbe_setup_transmit_ring */ - -/************************************************************************ - * ixgbe_setup_transmit_structures - Initialize all transmit rings. - ************************************************************************/ -int -ixgbe_setup_transmit_structures(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - - for (int i = 0; i < adapter->num_queues; i++, txr++) - ixgbe_setup_transmit_ring(txr); - - return (0); -} /* ixgbe_setup_transmit_structures */ - -/************************************************************************ - * ixgbe_free_transmit_structures - Free all transmit rings. - ************************************************************************/ -void -ixgbe_free_transmit_structures(struct adapter *adapter) -{ - struct tx_ring *txr = adapter->tx_rings; - - for (int i = 0; i < adapter->num_queues; i++, txr++) { - IXGBE_TX_LOCK(txr); - ixgbe_free_transmit_buffers(txr); - ixgbe_dma_free(adapter, &txr->txdma); - IXGBE_TX_UNLOCK(txr); - IXGBE_TX_LOCK_DESTROY(txr); - } - free(adapter->tx_rings, M_DEVBUF); -} /* ixgbe_free_transmit_structures */ - -/************************************************************************ - * ixgbe_free_transmit_buffers - * - * Free transmit ring related data structures. - ************************************************************************/ -static void -ixgbe_free_transmit_buffers(struct tx_ring *txr) -{ - struct adapter *adapter = txr->adapter; - struct ixgbe_tx_buf *tx_buffer; - int i; - - INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin"); - - if (txr->tx_buffers == NULL) - return; - - tx_buffer = txr->tx_buffers; - for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { - if (tx_buffer->m_head != NULL) { - bus_dmamap_sync(txr->txtag, tx_buffer->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, tx_buffer->map); - m_freem(tx_buffer->m_head); - tx_buffer->m_head = NULL; - if (tx_buffer->map != NULL) { - bus_dmamap_destroy(txr->txtag, tx_buffer->map); - tx_buffer->map = NULL; - } - } else if (tx_buffer->map != NULL) { - bus_dmamap_unload(txr->txtag, tx_buffer->map); - bus_dmamap_destroy(txr->txtag, tx_buffer->map); - tx_buffer->map = NULL; - } - } - if (txr->br != NULL) - buf_ring_free(txr->br, M_DEVBUF); - if (txr->tx_buffers != NULL) { - free(txr->tx_buffers, M_DEVBUF); - txr->tx_buffers = NULL; - } - if (txr->txtag != NULL) { - bus_dma_tag_destroy(txr->txtag); - txr->txtag = NULL; - } -} /* ixgbe_free_transmit_buffers */ +static int ixgbe_isc_txd_encap(void *arg, if_pkt_info_t pi); +static void ixgbe_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx); +static int ixgbe_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear); + +static void ixgbe_isc_rxd_refill(void *arg, if_rxd_update_t iru); +static void ixgbe_isc_rxd_flush(void *arg, uint16_t qsidx, uint8_t flidx __unused, qidx_t pidx); +static int ixgbe_isc_rxd_available(void *arg, uint16_t qsidx, qidx_t pidx, + qidx_t budget); +static int ixgbe_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri); + +static void ixgbe_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype); +static int ixgbe_tx_ctx_setup(struct ixgbe_adv_tx_context_desc *, if_pkt_info_t); + +extern void ixgbe_if_enable_intr(if_ctx_t ctx); +static int ixgbe_determine_rsstype(u16 pkt_info); + +struct if_txrx ixgbe_txrx = { + ixgbe_isc_txd_encap, + ixgbe_isc_txd_flush, + ixgbe_isc_txd_credits_update, + ixgbe_isc_rxd_available, + ixgbe_isc_rxd_pkt_get, + ixgbe_isc_rxd_refill, + ixgbe_isc_rxd_flush, + NULL +}; + +extern if_shared_ctx_t ixgbe_sctx; /************************************************************************ * ixgbe_tx_ctx_setup * * Advanced Context Descriptor setup for VLAN, CSUM or TSO + * ************************************************************************/ static int -ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, - u32 *cmd_type_len, u32 *olinfo_status) +ixgbe_tx_ctx_setup(struct ixgbe_adv_tx_context_desc *TXD, if_pkt_info_t pi) { - struct ixgbe_adv_tx_context_desc *TXD; - struct ether_vlan_header *eh; -#ifdef INET - struct ip *ip; -#endif -#ifdef INET6 - struct ip6_hdr *ip6; -#endif - int ehdrlen, ip_hlen = 0; - int offload = TRUE; - int ctxd = txr->next_avail_desc; - u32 vlan_macip_lens = 0; - u32 type_tucmd_mlhl = 0; - u16 vtag = 0; - u16 etype; - u8 ipproto = 0; - caddr_t l3d; + u32 vlan_macip_lens, type_tucmd_mlhl; + u32 olinfo_status, mss_l4len_idx, pktlen, offload; + u8 ehdrlen; - - /* First check if TSO is to be used */ - if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) - return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status)); - - if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) - offload = FALSE; - - /* Indicate the whole packet as payload when not doing TSO */ - *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; - - /* Now ready a context descriptor */ - TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd]; + offload = TRUE; + olinfo_status = mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0; + /* VLAN MACLEN IPLEN */ + vlan_macip_lens |= (htole16(pi->ipi_vtag) << IXGBE_ADVTXD_VLAN_SHIFT); /* - * In advanced descriptors the vlan tag must - * be placed into the context descriptor. Hence - * we need to make one even if not doing offloads. + * Some of our VF devices need a context descriptor for every + * packet. That means the ehdrlen needs to be non-zero in order + * for the host driver not to flag a malicious event. The stack + * will most likely populate this for all other reasons of why + * this function was called. */ - if (mp->m_flags & M_VLANTAG) { - vtag = htole16(mp->m_pkthdr.ether_vtag); - vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); - } else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) && - (offload == FALSE)) - return (0); - - /* - * Determine where frame payload starts. - * Jump over vlan headers if already present, - * helpful for QinQ too. - */ - eh = mtod(mp, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { - etype = ntohs(eh->evl_proto); - ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - } else { - etype = ntohs(eh->evl_encap_proto); + if (pi->ipi_ehdrlen == 0) { ehdrlen = ETHER_HDR_LEN; - } - - /* Set the ether header length */ + ehdrlen += (pi->ipi_vtag != 0) ? ETHER_VLAN_ENCAP_LEN : 0; + } else + ehdrlen = pi->ipi_ehdrlen; vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; - if (offload == FALSE) - goto no_offloads; + pktlen = pi->ipi_len; + /* First check if TSO is to be used */ + if (pi->ipi_csum_flags & CSUM_TSO) { + /* This is used in the transmit desc in encap */ + pktlen = pi->ipi_len - ehdrlen - pi->ipi_ip_hlen - pi->ipi_tcp_hlen; + mss_l4len_idx |= (pi->ipi_tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); + mss_l4len_idx |= (pi->ipi_tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); + } - /* - * If the first mbuf only includes the ethernet header, - * jump to the next one - * XXX: This assumes the stack splits mbufs containing headers - * on header boundaries - * XXX: And assumes the entire IP header is contained in one mbuf - */ - if (mp->m_len == ehdrlen && mp->m_next) - l3d = mtod(mp->m_next, caddr_t); + olinfo_status |= pktlen << IXGBE_ADVTXD_PAYLEN_SHIFT; + + if (pi->ipi_flags & IPI_TX_IPV4) { + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; + /* Tell transmit desc to also do IPv4 checksum. */ + if (pi->ipi_csum_flags & (CSUM_IP|CSUM_TSO)) + olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; + } else if (pi->ipi_flags & IPI_TX_IPV6) + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; else - l3d = mtod(mp, caddr_t) + ehdrlen; + offload = FALSE; - switch (etype) { -#ifdef INET - case ETHERTYPE_IP: - ip = (struct ip *)(l3d); - ip_hlen = ip->ip_hl << 2; - ipproto = ip->ip_p; - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; - /* Insert IPv4 checksum into data descriptors */ - if (mp->m_pkthdr.csum_flags & CSUM_IP) { - ip->ip_sum = 0; - *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; - } - break; -#endif -#ifdef INET6 - case ETHERTYPE_IPV6: - ip6 = (struct ip6_hdr *)(l3d); - ip_hlen = sizeof(struct ip6_hdr); - ipproto = ip6->ip6_nxt; - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; - break; -#endif - default: + vlan_macip_lens |= pi->ipi_ip_hlen; + + switch (pi->ipi_ipproto) { + case IPPROTO_TCP: + if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; + else offload = FALSE; - break; + break; + case IPPROTO_UDP: + if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; + else + offload = FALSE; + break; + case IPPROTO_SCTP: + if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) + type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; + else + offload = FALSE; + break; + default: + offload = FALSE; + break; } +/* Insert L4 checksum into data descriptors */ + if (offload) + olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; - vlan_macip_lens |= ip_hlen; - - /* No support for offloads for non-L4 next headers */ - switch (ipproto) { - case IPPROTO_TCP: - if (mp->m_pkthdr.csum_flags & - (CSUM_IP_TCP | CSUM_IP6_TCP)) - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; - else - offload = false; - break; - case IPPROTO_UDP: - if (mp->m_pkthdr.csum_flags & - (CSUM_IP_UDP | CSUM_IP6_UDP)) - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; - else - offload = false; - break; - case IPPROTO_SCTP: - if (mp->m_pkthdr.csum_flags & - (CSUM_IP_SCTP | CSUM_IP6_SCTP)) - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP; - else - offload = false; - break; - default: - offload = false; - break; - } - - if (offload) /* Insert L4 checksum into data descriptors */ - *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; - -no_offloads: type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; /* Now copy bits into descriptor */ TXD->vlan_macip_lens = htole32(vlan_macip_lens); TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); TXD->seqnum_seed = htole32(0); - TXD->mss_l4len_idx = htole32(0); + TXD->mss_l4len_idx = htole32(mss_l4len_idx); - /* We've consumed the first desc, adjust counters */ - if (++ctxd == txr->num_desc) - ctxd = 0; - txr->next_avail_desc = ctxd; - --txr->tx_avail; - - return (0); + return (olinfo_status); } /* ixgbe_tx_ctx_setup */ /************************************************************************ - * ixgbe_tso_setup - * - * Setup work for hardware segmentation offload (TSO) on - * adapters using advanced tx descriptors + * ixgbe_isc_txd_encap ************************************************************************/ static int -ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len, - u32 *olinfo_status) +ixgbe_isc_txd_encap(void *arg, if_pkt_info_t pi) { + struct adapter *sc = arg; + if_softc_ctx_t scctx = sc->shared; + struct ix_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx]; + struct tx_ring *txr = &que->txr; + int nsegs = pi->ipi_nsegs; + bus_dma_segment_t *segs = pi->ipi_segs; + union ixgbe_adv_tx_desc *txd = NULL; struct ixgbe_adv_tx_context_desc *TXD; - struct ether_vlan_header *eh; -#ifdef INET6 - struct ip6_hdr *ip6; -#endif -#ifdef INET - struct ip *ip; -#endif - struct tcphdr *th; - int ctxd, ehdrlen, ip_hlen, tcp_hlen; - u32 vlan_macip_lens = 0; - u32 type_tucmd_mlhl = 0; - u32 mss_l4len_idx = 0, paylen; - u16 vtag = 0, eh_type; + int i, j, first, pidx_last; + u32 olinfo_status, cmd, flags; + qidx_t ntxd; - /* - * Determine where frame payload starts. - * Jump over vlan headers if already present - */ - eh = mtod(mp, struct ether_vlan_header *); - if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { - ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; - eh_type = eh->evl_proto; + cmd = (IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); + + if (pi->ipi_mflags & M_VLANTAG) + cmd |= IXGBE_ADVTXD_DCMD_VLE; + + i = first = pi->ipi_pidx; + flags = (pi->ipi_flags & IPI_TX_INTR) ? IXGBE_TXD_CMD_RS : 0; + ntxd = scctx->isc_ntxd[0]; + + TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[first]; + if ((pi->ipi_csum_flags & CSUM_OFFLOAD) || + (sc->feat_en & IXGBE_FEATURE_NEEDS_CTXD) || + pi->ipi_vtag) { + /********************************************* + * Set up the appropriate offload context + * this will consume the first descriptor + *********************************************/ + olinfo_status = ixgbe_tx_ctx_setup(TXD, pi); + if (pi->ipi_csum_flags & CSUM_TSO) { + cmd |= IXGBE_ADVTXD_DCMD_TSE; + ++txr->tso_tx; + } + + if (++i == scctx->isc_ntxd[0]) + i = 0; } else { - ehdrlen = ETHER_HDR_LEN; - eh_type = eh->evl_encap_proto; + /* Indicate the whole packet as payload when not doing TSO */ + olinfo_status = pi->ipi_len << IXGBE_ADVTXD_PAYLEN_SHIFT; } - switch (ntohs(eh_type)) { -#ifdef INET - case ETHERTYPE_IP: - ip = (struct ip *)(mp->m_data + ehdrlen); - if (ip->ip_p != IPPROTO_TCP) - return (ENXIO); - ip->ip_sum = 0; - ip_hlen = ip->ip_hl << 2; - th = (struct tcphdr *)((caddr_t)ip + ip_hlen); - th->th_sum = in_pseudo(ip->ip_src.s_addr, - ip->ip_dst.s_addr, htons(IPPROTO_TCP)); - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; - /* Tell transmit desc to also do IPv4 checksum. */ - *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; - break; -#endif -#ifdef INET6 - case ETHERTYPE_IPV6: - ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); - /* XXX-BZ For now we do not pretend to support ext. hdrs. */ - if (ip6->ip6_nxt != IPPROTO_TCP) - return (ENXIO); - ip_hlen = sizeof(struct ip6_hdr); - th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); - th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; - break; -#endif - default: - panic("%s: CSUM_TSO but no supported IP version (0x%04x)", - __func__, ntohs(eh_type)); - break; + olinfo_status |= IXGBE_ADVTXD_CC; + for (j = 0; j < nsegs; j++) { + bus_size_t seglen; + + txd = &txr->tx_base[i]; + seglen = segs[j].ds_len; + + txd->read.buffer_addr = htole64(segs[j].ds_addr); + txd->read.cmd_type_len = htole32(cmd | seglen); + txd->read.olinfo_status = htole32(olinfo_status); + + pidx_last = i; + if (++i == scctx->isc_ntxd[0]) { + i = 0; + } } - ctxd = txr->next_avail_desc; - TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd]; - - tcp_hlen = th->th_off << 2; - - /* This is used in the transmit desc in encap */ - paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; - - /* VLAN MACLEN IPLEN */ - if (mp->m_flags & M_VLANTAG) { - vtag = htole16(mp->m_pkthdr.ether_vtag); - vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); + if (flags) { + txr->tx_rsq[txr->tx_rs_pidx] = pidx_last; + txr->tx_rs_pidx = (txr->tx_rs_pidx + 1) & (ntxd - 1); } + txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | flags); - vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; - vlan_macip_lens |= ip_hlen; - TXD->vlan_macip_lens = htole32(vlan_macip_lens); + txr->bytes += pi->ipi_len; + pi->ipi_new_pidx = i; - /* ADV DTYPE TUCMD */ - type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; - type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; - TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); - - /* MSS L4LEN IDX */ - mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); - mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); - TXD->mss_l4len_idx = htole32(mss_l4len_idx); - - TXD->seqnum_seed = htole32(0); - - if (++ctxd == txr->num_desc) - ctxd = 0; - - txr->tx_avail--; - txr->next_avail_desc = ctxd; - *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; - *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; - *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; - ++txr->tso_tx; + ++txr->total_packets; return (0); -} /* ixgbe_tso_setup */ - +} /* ixgbe_isc_txd_encap */ /************************************************************************ - * ixgbe_txeof - * - * Examine each tx_buffer in the used queue. If the hardware is done - * processing the packet then free associated resources. The - * tx_buffer is put back on the free queue. + * ixgbe_isc_txd_flush ************************************************************************/ -void -ixgbe_txeof(struct tx_ring *txr) +static void +ixgbe_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx) { - struct adapter *adapter = txr->adapter; - struct ixgbe_tx_buf *buf; - union ixgbe_adv_tx_desc *txd; - u32 work, processed = 0; - u32 limit = adapter->tx_process_limit; + struct adapter *sc = arg; + struct ix_tx_queue *que = &sc->tx_queues[txqid]; + struct tx_ring *txr = &que->txr; - mtx_assert(&txr->tx_mtx, MA_OWNED); + IXGBE_WRITE_REG(&sc->hw, txr->tail, pidx); +} /* ixgbe_isc_txd_flush */ -#ifdef DEV_NETMAP - if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && - (adapter->ifp->if_capenable & IFCAP_NETMAP)) { - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_kring *kring = &na->tx_rings[txr->me]; - txd = txr->tx_base; - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_POSTREAD); - /* - * In netmap mode, all the work is done in the context - * of the client thread. Interrupt handlers only wake up - * clients, which may be sleeping on individual rings - * or on a global resource for all rings. - * To implement tx interrupt mitigation, we wake up the client - * thread roughly every half ring, even if the NIC interrupts - * more frequently. This is implemented as follows: - * - ixgbe_txsync() sets kring->nr_kflags with the index of - * the slot that should wake up the thread (nkr_num_slots - * means the user thread should not be woken up); - * - the driver ignores tx interrupts unless netmap_mitigate=0 - * or the slot has the DD bit set. - */ - if (!netmap_mitigate || - (kring->nr_kflags < kring->nkr_num_slots && - txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { - netmap_tx_irq(adapter->ifp, txr->me); - } - return; - } -#endif /* DEV_NETMAP */ +/************************************************************************ + * ixgbe_isc_txd_credits_update + ************************************************************************/ +static int +ixgbe_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear) +{ + struct adapter *sc = arg; + if_softc_ctx_t scctx = sc->shared; + struct ix_tx_queue *que = &sc->tx_queues[txqid]; + struct tx_ring *txr = &que->txr; + qidx_t processed = 0; + int updated; + qidx_t cur, prev, ntxd, rs_cidx; + int32_t delta; + uint8_t status; - if (txr->tx_avail == txr->num_desc) { - txr->busy = 0; - return; - } + rs_cidx = txr->tx_rs_cidx; + if (rs_cidx == txr->tx_rs_pidx) + return (0); - /* Get work starting point */ - work = txr->next_to_clean; - buf = &txr->tx_buffers[work]; - txd = &txr->tx_base[work]; - work -= txr->num_desc; /* The distance to ring end */ - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_POSTREAD); + cur = txr->tx_rsq[rs_cidx]; + status = txr->tx_base[cur].wb.status; + updated = !!(status & IXGBE_TXD_STAT_DD); + if (clear == false || updated == 0) + return (updated); + + prev = txr->tx_cidx_processed; + ntxd = scctx->isc_ntxd[0]; do { - union ixgbe_adv_tx_desc *eop = buf->eop; - if (eop == NULL) /* No work */ + delta = (int32_t)cur - (int32_t)prev; + if (delta < 0) + delta += ntxd; + + processed += delta; + prev = cur; + rs_cidx = (rs_cidx + 1) & (ntxd - 1); + if (rs_cidx == txr->tx_rs_pidx) break; - if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0) - break; /* I/O not complete */ + cur = txr->tx_rsq[rs_cidx]; + status = txr->tx_base[cur].wb.status; + } while ((status & IXGBE_TXD_STAT_DD)); - if (buf->m_head) { - txr->bytes += buf->m_head->m_pkthdr.len; - bus_dmamap_sync(txr->txtag, buf->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, buf->map); - m_freem(buf->m_head); - buf->m_head = NULL; - } - buf->eop = NULL; - ++txr->tx_avail; + txr->tx_rs_cidx = rs_cidx; + txr->tx_cidx_processed = prev; - /* We clean the range if multi segment */ - while (txd != eop) { - ++txd; - ++buf; - ++work; - /* wrap the ring? */ - if (__predict_false(!work)) { - work -= txr->num_desc; - buf = txr->tx_buffers; - txd = txr->tx_base; - } - if (buf->m_head) { - txr->bytes += buf->m_head->m_pkthdr.len; - bus_dmamap_sync(txr->txtag, buf->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(txr->txtag, buf->map); - m_freem(buf->m_head); - buf->m_head = NULL; - } - ++txr->tx_avail; - buf->eop = NULL; - - } - ++txr->packets; - ++processed; - - /* Try the next packet */ - ++txd; - ++buf; - ++work; - /* reset with a wrap */ - if (__predict_false(!work)) { - work -= txr->num_desc; - buf = txr->tx_buffers; - txd = txr->tx_base; - } - prefetch(txd); - } while (__predict_true(--limit)); - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - work += txr->num_desc; - txr->next_to_clean = work; - - /* - * Queue Hang detection, we know there's - * work outstanding or the first return - * would have been taken, so increment busy - * if nothing managed to get cleaned, then - * in local_timer it will be checked and - * marked as HUNG if it exceeds a MAX attempt. - */ - if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG)) - ++txr->busy; - /* - * If anything gets cleaned we reset state to 1, - * note this will turn off HUNG if its set. - */ - if (processed) - txr->busy = 1; - - if (txr->tx_avail == txr->num_desc) - txr->busy = 0; - - return; -} /* ixgbe_txeof */ + return (processed); +} /* ixgbe_isc_txd_credits_update */ /************************************************************************ - * ixgbe_rsc_count - * - * Used to detect a descriptor that has been merged by Hardware RSC. - ************************************************************************/ -static inline u32 -ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx) -{ - return (le32toh(rx->wb.lower.lo_dword.data) & - IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT; -} /* ixgbe_rsc_count */ - -/************************************************************************ - * ixgbe_setup_hw_rsc - * - * Initialize Hardware RSC (LRO) feature on 82599 - * for an RX ring, this is toggled by the LRO capability - * even though it is transparent to the stack. - * - * NOTE: Since this HW feature only works with IPv4 and - * testing has shown soft LRO to be as effective, - * this feature will be disabled by default. + * ixgbe_isc_rxd_refill ************************************************************************/ static void -ixgbe_setup_hw_rsc(struct rx_ring *rxr) +ixgbe_isc_rxd_refill(void *arg, if_rxd_update_t iru) { - struct adapter *adapter = rxr->adapter; - struct ixgbe_hw *hw = &adapter->hw; - u32 rscctrl, rdrxctl; + struct adapter *sc = arg; + struct ix_rx_queue *que = &sc->rx_queues[iru->iru_qsidx]; + struct rx_ring *rxr = &que->rxr; + uint64_t *paddrs; + int i; + uint32_t next_pidx, pidx; + uint16_t count; - /* If turning LRO/RSC off we need to disable it */ - if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) { - rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); - rscctrl &= ~IXGBE_RSCCTL_RSCEN; - return; + paddrs = iru->iru_paddrs; + pidx = iru->iru_pidx; + count = iru->iru_count; + + for (i = 0, next_pidx = pidx; i < count; i++) { + rxr->rx_base[next_pidx].read.pkt_addr = htole64(paddrs[i]); + if (++next_pidx == sc->shared->isc_nrxd[0]) + next_pidx = 0; } - - rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); - rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; -#ifdef DEV_NETMAP - /* Always strip CRC unless Netmap disabled it */ - if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) || - !(adapter->ifp->if_capenable & IFCAP_NETMAP) || - ix_crcstrip) -#endif /* DEV_NETMAP */ - rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; - rdrxctl |= IXGBE_RDRXCTL_RSCACKC; - IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); - - rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); - rscctrl |= IXGBE_RSCCTL_RSCEN; - /* - * Limit the total number of descriptors that - * can be combined, so it does not exceed 64K - */ - if (rxr->mbuf_sz == MCLBYTES) - rscctrl |= IXGBE_RSCCTL_MAXDESC_16; - else if (rxr->mbuf_sz == MJUMPAGESIZE) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; - else if (rxr->mbuf_sz == MJUM9BYTES) - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; - else /* Using 16K cluster */ - rscctrl |= IXGBE_RSCCTL_MAXDESC_1; - - IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl); - - /* Enable TCP header recognition */ - IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), - (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR)); - - /* Disable RSC for ACK packets */ - IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, - (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); - - rxr->hw_rsc = TRUE; -} /* ixgbe_setup_hw_rsc */ +} /* ixgbe_isc_rxd_refill */ /************************************************************************ - * ixgbe_refresh_mbufs - * - * Refresh mbuf buffers for RX descriptor rings - * - now keeps its own state so discards due to resource - * exhaustion are unnecessary, if an mbuf cannot be obtained - * it just returns, keeping its placeholder, thus it can simply - * be recalled to try again. + * ixgbe_isc_rxd_flush ************************************************************************/ static void -ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit) +ixgbe_isc_rxd_flush(void *arg, uint16_t qsidx, uint8_t flidx __unused, qidx_t pidx) { - struct adapter *adapter = rxr->adapter; - struct ixgbe_rx_buf *rxbuf; - struct mbuf *mp; - bus_dma_segment_t seg[1]; - int i, j, nsegs, error; - bool refreshed = FALSE; + struct adapter *sc = arg; + struct ix_rx_queue *que = &sc->rx_queues[qsidx]; + struct rx_ring *rxr = &que->rxr; - i = j = rxr->next_to_refresh; - /* Control the loop with one beyond */ - if (++j == rxr->num_desc) - j = 0; - - while (j != limit) { - rxbuf = &rxr->rx_buffers[i]; - if (rxbuf->buf == NULL) { - mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, - rxr->mbuf_sz); - if (mp == NULL) - goto update; - if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) - m_adj(mp, ETHER_ALIGN); - } else - mp = rxbuf->buf; - - mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; - - /* If we're dealing with an mbuf that was copied rather - * than replaced, there's no need to go through busdma. - */ - if ((rxbuf->flags & IXGBE_RX_COPY) == 0) { - /* Get the memory mapping */ - bus_dmamap_unload(rxr->ptag, rxbuf->pmap); - error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, - mp, seg, &nsegs, BUS_DMA_NOWAIT); - if (error != 0) { - printf("Refresh mbufs: payload dmamap load failure - %d\n", error); - m_free(mp); - rxbuf->buf = NULL; - goto update; - } - rxbuf->buf = mp; - bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_PREREAD); - rxbuf->addr = rxr->rx_base[i].read.pkt_addr = - htole64(seg[0].ds_addr); - } else { - rxr->rx_base[i].read.pkt_addr = rxbuf->addr; - rxbuf->flags &= ~IXGBE_RX_COPY; - } - - refreshed = TRUE; - /* Next is precalculated */ - i = j; - rxr->next_to_refresh = i; - if (++j == rxr->num_desc) - j = 0; - } - -update: - if (refreshed) /* Update hardware tail index */ - IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh); - - return; -} /* ixgbe_refresh_mbufs */ + IXGBE_WRITE_REG(&sc->hw, rxr->tail, pidx); +} /* ixgbe_isc_rxd_flush */ /************************************************************************ - * ixgbe_allocate_receive_buffers - * - * Allocate memory for rx_buffer structures. Since we use one - * rx_buffer per received packet, the maximum number of rx_buffer's - * that we'll need is equal to the number of receive descriptors - * that we've allocated. + * ixgbe_isc_rxd_available ************************************************************************/ static int -ixgbe_allocate_receive_buffers(struct rx_ring *rxr) +ixgbe_isc_rxd_available(void *arg, uint16_t qsidx, qidx_t pidx, qidx_t budget) { - struct adapter *adapter = rxr->adapter; - device_t dev = adapter->dev; - struct ixgbe_rx_buf *rxbuf; - int bsize, error; + struct adapter *sc = arg; + struct ix_rx_queue *que = &sc->rx_queues[qsidx]; + struct rx_ring *rxr = &que->rxr; + union ixgbe_adv_rx_desc *rxd; + u32 staterr; + int cnt, i, nrxd; - bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc; - rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF, - M_NOWAIT | M_ZERO); - if (rxr->rx_buffers == NULL) { - device_printf(dev, "Unable to allocate rx_buffer memory\n"); - error = ENOMEM; - goto fail; + if (budget == 1) { + rxd = &rxr->rx_base[pidx]; + staterr = le32toh(rxd->wb.upper.status_error); + + return (staterr & IXGBE_RXD_STAT_DD); } - error = bus_dma_tag_create( - /* parent */ bus_get_dma_tag(dev), - /* alignment */ 1, - /* bounds */ 0, - /* lowaddr */ BUS_SPACE_MAXADDR, - /* highaddr */ BUS_SPACE_MAXADDR, - /* filter */ NULL, - /* filterarg */ NULL, - /* maxsize */ MJUM16BYTES, - /* nsegments */ 1, - /* maxsegsize */ MJUM16BYTES, - /* flags */ 0, - /* lockfunc */ NULL, - /* lockfuncarg */ NULL, - &rxr->ptag); - if (error != 0) { - device_printf(dev, "Unable to create RX DMA tag\n"); - goto fail; - } - - for (int i = 0; i < rxr->num_desc; i++, rxbuf++) { - rxbuf = &rxr->rx_buffers[i]; - error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); - if (error) { - device_printf(dev, "Unable to create RX dma map\n"); - goto fail; - } - } - - return (0); - -fail: - /* Frees all, but can handle partial completion */ - ixgbe_free_receive_structures(adapter); - - return (error); -} /* ixgbe_allocate_receive_buffers */ - -/************************************************************************ - * ixgbe_free_receive_ring - ************************************************************************/ -static void -ixgbe_free_receive_ring(struct rx_ring *rxr) -{ - for (int i = 0; i < rxr->num_desc; i++) { - ixgbe_rx_discard(rxr, i); - } -} /* ixgbe_free_receive_ring */ - -/************************************************************************ - * ixgbe_setup_receive_ring - * - * Initialize a receive ring and its buffers. - ************************************************************************/ -static int -ixgbe_setup_receive_ring(struct rx_ring *rxr) -{ - struct adapter *adapter; - struct ifnet *ifp; - device_t dev; - struct ixgbe_rx_buf *rxbuf; - struct lro_ctrl *lro = &rxr->lro; -#ifdef DEV_NETMAP - struct netmap_adapter *na = NA(rxr->adapter->ifp); - struct netmap_slot *slot; -#endif /* DEV_NETMAP */ - bus_dma_segment_t seg[1]; - int rsize, nsegs, error = 0; - - adapter = rxr->adapter; - ifp = adapter->ifp; - dev = adapter->dev; - - /* Clear the ring contents */ - IXGBE_RX_LOCK(rxr); - -#ifdef DEV_NETMAP - if (adapter->feat_en & IXGBE_FEATURE_NETMAP) - slot = netmap_reset(na, NR_RX, rxr->me, 0); -#endif /* DEV_NETMAP */ - - rsize = roundup2(adapter->num_rx_desc * - sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); - bzero((void *)rxr->rx_base, rsize); - /* Cache the size */ - rxr->mbuf_sz = adapter->rx_mbuf_sz; - - /* Free current RX buffer structs and their mbufs */ - ixgbe_free_receive_ring(rxr); - - /* Now replenish the mbufs */ - for (int j = 0; j != rxr->num_desc; ++j) { - struct mbuf *mp; - - rxbuf = &rxr->rx_buffers[j]; - -#ifdef DEV_NETMAP - /* - * In netmap mode, fill the map and set the buffer - * address in the NIC ring, considering the offset - * between the netmap and NIC rings (see comment in - * ixgbe_setup_transmit_ring() ). No need to allocate - * an mbuf, so end the block with a continue; - */ - if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) { - int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); - uint64_t paddr; - void *addr; - - addr = PNMB(na, slot + sj, &paddr); - netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); - /* Update descriptor and the cached value */ - rxr->rx_base[j].read.pkt_addr = htole64(paddr); - rxbuf->addr = htole64(paddr); - continue; - } -#endif /* DEV_NETMAP */ - - rxbuf->flags = 0; - rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, - adapter->rx_mbuf_sz); - if (rxbuf->buf == NULL) { - error = ENOBUFS; - goto fail; - } - mp = rxbuf->buf; - mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; - /* Get the memory mapping */ - error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg, - &nsegs, BUS_DMA_NOWAIT); - if (error != 0) - goto fail; - bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD); - /* Update the descriptor and the cached value */ - rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr); - rxbuf->addr = htole64(seg[0].ds_addr); - } - - - /* Setup our descriptor indices */ - rxr->next_to_check = 0; - rxr->next_to_refresh = 0; - rxr->lro_enabled = FALSE; - rxr->rx_copies = 0; - rxr->rx_bytes = 0; - rxr->vtag_strip = FALSE; - - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - /* - * Now set up the LRO interface - */ - if (ixgbe_rsc_enable) - ixgbe_setup_hw_rsc(rxr); - else if (ifp->if_capenable & IFCAP_LRO) { - int err = tcp_lro_init(lro); - if (err) { - device_printf(dev, "LRO Initialization failed!\n"); - goto fail; - } - INIT_DEBUGOUT("RX Soft LRO Initialized\n"); - rxr->lro_enabled = TRUE; - lro->ifp = adapter->ifp; - } - - IXGBE_RX_UNLOCK(rxr); - - return (0); - -fail: - ixgbe_free_receive_ring(rxr); - IXGBE_RX_UNLOCK(rxr); - - return (error); -} /* ixgbe_setup_receive_ring */ - -/************************************************************************ - * ixgbe_setup_receive_structures - Initialize all receive rings. - ************************************************************************/ -int -ixgbe_setup_receive_structures(struct adapter *adapter) -{ - struct rx_ring *rxr = adapter->rx_rings; - int j; - - for (j = 0; j < adapter->num_queues; j++, rxr++) - if (ixgbe_setup_receive_ring(rxr)) - goto fail; - - return (0); -fail: - /* - * Free RX buffers allocated so far, we will only handle - * the rings that completed, the failing case will have - * cleaned up for itself. 'j' failed, so its the terminus. - */ - for (int i = 0; i < j; ++i) { - rxr = &adapter->rx_rings[i]; - IXGBE_RX_LOCK(rxr); - ixgbe_free_receive_ring(rxr); - IXGBE_RX_UNLOCK(rxr); - } - - return (ENOBUFS); -} /* ixgbe_setup_receive_structures */ - - -/************************************************************************ - * ixgbe_free_receive_structures - Free all receive rings. - ************************************************************************/ -void -ixgbe_free_receive_structures(struct adapter *adapter) -{ - struct rx_ring *rxr = adapter->rx_rings; - - INIT_DEBUGOUT("ixgbe_free_receive_structures: begin"); - - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - ixgbe_free_receive_buffers(rxr); - /* Free LRO memory */ - tcp_lro_free(&rxr->lro); - /* Free the ring memory as well */ - ixgbe_dma_free(adapter, &rxr->rxdma); - } - - free(adapter->rx_rings, M_DEVBUF); -} /* ixgbe_free_receive_structures */ - - -/************************************************************************ - * ixgbe_free_receive_buffers - Free receive ring data structures - ************************************************************************/ -static void -ixgbe_free_receive_buffers(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - struct ixgbe_rx_buf *rxbuf; - - INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin"); - - /* Cleanup any existing buffers */ - if (rxr->rx_buffers != NULL) { - for (int i = 0; i < adapter->num_rx_desc; i++) { - rxbuf = &rxr->rx_buffers[i]; - ixgbe_rx_discard(rxr, i); - if (rxbuf->pmap != NULL) { - bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); - rxbuf->pmap = NULL; - } - } - if (rxr->rx_buffers != NULL) { - free(rxr->rx_buffers, M_DEVBUF); - rxr->rx_buffers = NULL; - } - } - - if (rxr->ptag != NULL) { - bus_dma_tag_destroy(rxr->ptag); - rxr->ptag = NULL; - } - - return; -} /* ixgbe_free_receive_buffers */ - -/************************************************************************ - * ixgbe_rx_input - ************************************************************************/ -static __inline void -ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, - u32 ptype) -{ - /* - * ATM LRO is only for IP/TCP packets and TCP checksum of the packet - * should be computed by hardware. Also it should not have VLAN tag in - * ethernet header. In case of IPv6 we do not yet support ext. hdrs. - */ - if (rxr->lro_enabled && - (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && - (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && - ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == - (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) || - (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) == - (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) && - (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == - (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { - /* - * Send to the stack if: - * - LRO not enabled, or - * - no LRO resources, or - * - lro enqueue fails - */ - if (rxr->lro.lro_cnt != 0) - if (tcp_lro_rx(&rxr->lro, m, 0) == 0) - return; - } - (*ifp->if_input)(ifp, m); -} /* ixgbe_rx_input */ - -/************************************************************************ - * ixgbe_rx_discard - ************************************************************************/ -static __inline void -ixgbe_rx_discard(struct rx_ring *rxr, int i) -{ - struct ixgbe_rx_buf *rbuf; - - rbuf = &rxr->rx_buffers[i]; - - /* - * With advanced descriptors the writeback - * clobbers the buffer addrs, so its easier - * to just free the existing mbufs and take - * the normal refresh path to get new buffers - * and mapping. - */ - - if (rbuf->fmp != NULL) {/* Partial chain ? */ - bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD); - m_freem(rbuf->fmp); - rbuf->fmp = NULL; - rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */ - } else if (rbuf->buf) { - bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD); - m_free(rbuf->buf); - rbuf->buf = NULL; - } - bus_dmamap_unload(rxr->ptag, rbuf->pmap); - - rbuf->flags = 0; - - return; -} /* ixgbe_rx_discard */ - - -/************************************************************************ - * ixgbe_rxeof - * - * Executes in interrupt context. It replenishes the - * mbufs in the descriptor and sends data which has - * been dma'ed into host memory to upper layer. - * - * Return TRUE for more work, FALSE for all clean. - ************************************************************************/ -bool -ixgbe_rxeof(struct ix_queue *que) -{ - struct adapter *adapter = que->adapter; - struct rx_ring *rxr = que->rxr; - struct ifnet *ifp = adapter->ifp; - struct lro_ctrl *lro = &rxr->lro; - union ixgbe_adv_rx_desc *cur; - struct ixgbe_rx_buf *rbuf, *nbuf; - int i, nextp, processed = 0; - u32 staterr = 0; - u32 count = adapter->rx_process_limit; - u16 pkt_info; - - IXGBE_RX_LOCK(rxr); - -#ifdef DEV_NETMAP - if (adapter->feat_en & IXGBE_FEATURE_NETMAP) { - /* Same as the txeof routine: wakeup clients on intr. */ - if (netmap_rx_irq(ifp, rxr->me, &processed)) { - IXGBE_RX_UNLOCK(rxr); - return (FALSE); - } - } -#endif /* DEV_NETMAP */ - - for (i = rxr->next_to_check; count != 0;) { - struct mbuf *sendmp, *mp; - u32 rsc, ptype; - u16 len; - u16 vtag = 0; - bool eop; - - /* Sync the ring. */ - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - - cur = &rxr->rx_base[i]; - staterr = le32toh(cur->wb.upper.status_error); - pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); + nrxd = sc->shared->isc_nrxd[0]; + // em has cnt < nrxd. off by 1 here or there? +// for (cnt = 0, i = pidx; cnt < nrxd && cnt <= budget;) { + for (cnt = 0, i = pidx; cnt < nrxd-1 && cnt <= budget;) { + rxd = &rxr->rx_base[i]; + staterr = le32toh(rxd->wb.upper.status_error); if ((staterr & IXGBE_RXD_STAT_DD) == 0) break; - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - break; + if (++i == nrxd) + i = 0; + if (staterr & IXGBE_RXD_STAT_EOP) + cnt++; + } - count--; - sendmp = NULL; - nbuf = NULL; - rsc = 0; - cur->wb.upper.status_error = 0; - rbuf = &rxr->rx_buffers[i]; - mp = rbuf->buf; + return (cnt); +} /* ixgbe_isc_rxd_available */ - len = le16toh(cur->wb.upper.length); - ptype = le32toh(cur->wb.lower.lo_dword.data) & - IXGBE_RXDADV_PKTTYPE_MASK; +/************************************************************************ + * ixgbe_isc_rxd_pkt_get + * + * Routine sends data which has been dma'ed into host memory + * to upper layer. Initialize ri structure. + * + * Returns 0 upon success, errno on failure + ************************************************************************/ + +static int +ixgbe_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) +{ + struct adapter *adapter = arg; + struct ix_rx_queue *que = &adapter->rx_queues[ri->iri_qsidx]; + struct rx_ring *rxr = &que->rxr; + struct ifnet *ifp = iflib_get_ifp(adapter->ctx); + union ixgbe_adv_rx_desc *rxd; + + u16 pkt_info, len, cidx, i; + u16 vtag = 0; + u32 ptype; + u32 staterr = 0; + bool eop; + + i = 0; + cidx = ri->iri_cidx; + do { + rxd = &rxr->rx_base[cidx]; + staterr = le32toh(rxd->wb.upper.status_error); + pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info); + + /* Error Checking then decrement count */ + MPASS ((staterr & IXGBE_RXD_STAT_DD) != 0); + + len = le16toh(rxd->wb.upper.length); + ptype = le32toh(rxd->wb.lower.lo_dword.data) & + IXGBE_RXDADV_PKTTYPE_MASK; + + ri->iri_len += len; + rxr->bytes += len; + + rxd->wb.upper.status_error = 0; eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); + if (staterr & IXGBE_RXD_STAT_VP) { + vtag = le16toh(rxd->wb.upper.vlan); + } else { + vtag = 0; + } /* Make sure bad packets are discarded */ if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { + #if __FreeBSD_version >= 1100036 if (adapter->feat_en & IXGBE_FEATURE_VF) if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); #endif + rxr->rx_discarded++; - ixgbe_rx_discard(rxr, i); - goto next_desc; + return (EBADMSG); } + ri->iri_frags[i].irf_flid = 0; + ri->iri_frags[i].irf_idx = cidx; + ri->iri_frags[i].irf_len = len; + if (++cidx == adapter->shared->isc_nrxd[0]) + cidx = 0; + i++; + /* even a 16K packet shouldn't consume more than 8 clusters */ + MPASS(i < 9); + } while (!eop); - bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD); + rxr->rx_packets++; + rxr->packets++; + rxr->rx_bytes += ri->iri_len; - /* - * On 82599 which supports a hardware - * LRO (called HW RSC), packets need - * not be fragmented across sequential - * descriptors, rather the next descriptor - * is indicated in bits of the descriptor. - * This also means that we might proceses - * more than one packet at a time, something - * that has never been true before, it - * required eliminating global chain pointers - * in favor of what we are doing here. -jfv - */ - if (!eop) { - /* - * Figure out the next descriptor - * of this frame. - */ - if (rxr->hw_rsc == TRUE) { - rsc = ixgbe_rsc_count(cur); - rxr->rsc_num += (rsc - 1); - } - if (rsc) { /* Get hardware index */ - nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >> - IXGBE_RXDADV_NEXTP_SHIFT); - } else { /* Just sequential */ - nextp = i + 1; - if (nextp == adapter->num_rx_desc) - nextp = 0; - } - nbuf = &rxr->rx_buffers[nextp]; - prefetch(nbuf); - } - /* - * Rather than using the fmp/lmp global pointers - * we now keep the head of a packet chain in the - * buffer struct and pass this along from one - * descriptor to the next, until we get EOP. - */ - mp->m_len = len; - /* - * See if there is a stored head - * that determines what we are - */ - sendmp = rbuf->fmp; - if (sendmp != NULL) { /* secondary frag */ - rbuf->buf = rbuf->fmp = NULL; - mp->m_flags &= ~M_PKTHDR; - sendmp->m_pkthdr.len += mp->m_len; - } else { - /* - * Optimize. This might be a small packet, - * maybe just a TCP ACK. Do a fast copy that - * is cache aligned into a new mbuf, and - * leave the old mbuf+cluster for re-use. - */ - if (eop && len <= IXGBE_RX_COPY_LEN) { - sendmp = m_gethdr(M_NOWAIT, MT_DATA); - if (sendmp != NULL) { - sendmp->m_data += IXGBE_RX_COPY_ALIGN; - ixgbe_bcopy(mp->m_data, sendmp->m_data, - len); - sendmp->m_len = len; - rxr->rx_copies++; - rbuf->flags |= IXGBE_RX_COPY; - } - } - if (sendmp == NULL) { - rbuf->buf = rbuf->fmp = NULL; - sendmp = mp; - } - - /* first desc of a non-ps chain */ - sendmp->m_flags |= M_PKTHDR; - sendmp->m_pkthdr.len = mp->m_len; - } - ++processed; - - /* Pass the head pointer on */ - if (eop == 0) { - nbuf->fmp = sendmp; - sendmp = NULL; - mp->m_next = nbuf->buf; - } else { /* Sending this frame */ - sendmp->m_pkthdr.rcvif = ifp; - rxr->rx_packets++; - /* capture data for AIM */ - rxr->bytes += sendmp->m_pkthdr.len; - rxr->rx_bytes += sendmp->m_pkthdr.len; - /* Process vlan info */ - if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP)) - vtag = le16toh(cur->wb.upper.vlan); - if (vtag) { - sendmp->m_pkthdr.ether_vtag = vtag; - sendmp->m_flags |= M_VLANTAG; - } - if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) - ixgbe_rx_checksum(staterr, sendmp, ptype); - - /* - * In case of multiqueue, we have RXCSUM.PCSD bit set - * and never cleared. This means we have RSS hash - * available to be used. - */ - if (adapter->num_queues > 1) { - sendmp->m_pkthdr.flowid = - le32toh(cur->wb.lower.hi_dword.rss); - switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { - case IXGBE_RXDADV_RSSTYPE_IPV4: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_IPV4); - break; - case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_TCP_IPV4); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_IPV6); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_TCP_IPV6); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6_EX: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_IPV6_EX); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_TCP_IPV6_EX); - break; -#if __FreeBSD_version > 1100000 - case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_UDP_IPV4); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_UDP_IPV6); - break; - case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_RSS_UDP_IPV6_EX); - break; -#endif - default: - M_HASHTYPE_SET(sendmp, - M_HASHTYPE_OPAQUE_HASH); - } - } else { - sendmp->m_pkthdr.flowid = que->msix; - M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); - } - } -next_desc: - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - /* Advance our pointers to the next descriptor. */ - if (++i == rxr->num_desc) - i = 0; - - /* Now send to the stack or do LRO */ - if (sendmp != NULL) { - rxr->next_to_check = i; - IXGBE_RX_UNLOCK(rxr); - ixgbe_rx_input(rxr, ifp, sendmp, ptype); - IXGBE_RX_LOCK(rxr); - i = rxr->next_to_check; - } - - /* Every 8 descriptors we go to refresh mbufs */ - if (processed == 8) { - ixgbe_refresh_mbufs(rxr, i); - processed = 0; - } - } - - /* Refresh any remaining buf structs */ - if (ixgbe_rx_unrefreshed(rxr)) - ixgbe_refresh_mbufs(rxr, i); - - rxr->next_to_check = i; - - IXGBE_RX_UNLOCK(rxr); - - /* - * Flush any outstanding LRO work - */ - tcp_lro_flush_all(lro); - - /* - * Still have cleaning to do? - */ - if ((staterr & IXGBE_RXD_STAT_DD) != 0) - return (TRUE); - - return (FALSE); -} /* ixgbe_rxeof */ + if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) + ixgbe_rx_checksum(staterr, ri, ptype); + ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss); + ri->iri_rsstype = ixgbe_determine_rsstype(pkt_info); + ri->iri_vtag = vtag; + ri->iri_nfrags = i; + if (vtag) + ri->iri_flags |= M_VLANTAG; + return (0); +} /* ixgbe_isc_rxd_pkt_get */ /************************************************************************ * ixgbe_rx_checksum @@ -1922,7 +480,7 @@ next_desc: * doesn't spend time verifying the checksum. ************************************************************************/ static void -ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype) +ixgbe_rx_checksum(u32 staterr, if_rxd_info_t ri, u32 ptype) { u16 status = (u16)staterr; u8 errors = (u8)(staterr >> 24); @@ -1930,270 +488,59 @@ ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype) if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) - sctp = true; + sctp = TRUE; /* IPv4 checksum */ if (status & IXGBE_RXD_STAT_IPCS) { - mp->m_pkthdr.csum_flags |= CSUM_L3_CALC; - /* IP Checksum Good */ - if (!(errors & IXGBE_RXD_ERR_IPE)) - mp->m_pkthdr.csum_flags |= CSUM_L3_VALID; + if (!(errors & IXGBE_RXD_ERR_IPE)) { + /* IP Checksum Good */ + ri->iri_csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID; + } else + ri->iri_csum_flags = 0; } /* TCP/UDP/SCTP checksum */ if (status & IXGBE_RXD_STAT_L4CS) { - mp->m_pkthdr.csum_flags |= CSUM_L4_CALC; + u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); +#if __FreeBSD_version >= 800000 + if (sctp) + type = CSUM_SCTP_VALID; +#endif if (!(errors & IXGBE_RXD_ERR_TCPE)) { - mp->m_pkthdr.csum_flags |= CSUM_L4_VALID; + ri->iri_csum_flags |= type; if (!sctp) - mp->m_pkthdr.csum_data = htons(0xffff); + ri->iri_csum_data = htons(0xffff); } } } /* ixgbe_rx_checksum */ /************************************************************************ - * ixgbe_dmamap_cb - Manage DMA'able memory. - ************************************************************************/ -static void -ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error) -{ - if (error) - return; - *(bus_addr_t *)arg = segs->ds_addr; - - return; -} /* ixgbe_dmamap_cb */ - -/************************************************************************ - * ixgbe_dma_malloc + * ixgbe_determine_rsstype + * + * Parse the packet type to determine the appropriate hash ************************************************************************/ static int -ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size, - struct ixgbe_dma_alloc *dma, int mapflags) +ixgbe_determine_rsstype(u16 pkt_info) { - device_t dev = adapter->dev; - int r; - - r = bus_dma_tag_create( - /* parent */ bus_get_dma_tag(adapter->dev), - /* alignment */ DBA_ALIGN, - /* bounds */ 0, - /* lowaddr */ BUS_SPACE_MAXADDR, - /* highaddr */ BUS_SPACE_MAXADDR, - /* filter */ NULL, - /* filterarg */ NULL, - /* maxsize */ size, - /* nsegments */ 1, - /* maxsegsize */ size, - /* flags */ BUS_DMA_ALLOCNOW, - /* lockfunc */ NULL, - /* lockfuncarg */ NULL, - &dma->dma_tag); - if (r != 0) { - device_printf(dev, - "ixgbe_dma_malloc: bus_dma_tag_create failed; error %u\n", - r); - goto fail_0; + switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { + case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: + return M_HASHTYPE_RSS_TCP_IPV4; + case IXGBE_RXDADV_RSSTYPE_IPV4: + return M_HASHTYPE_RSS_IPV4; + case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: + return M_HASHTYPE_RSS_TCP_IPV6; + case IXGBE_RXDADV_RSSTYPE_IPV6_EX: + return M_HASHTYPE_RSS_IPV6_EX; + case IXGBE_RXDADV_RSSTYPE_IPV6: + return M_HASHTYPE_RSS_IPV6; + case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: + return M_HASHTYPE_RSS_TCP_IPV6_EX; + case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: + return M_HASHTYPE_RSS_UDP_IPV4; + case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: + return M_HASHTYPE_RSS_UDP_IPV6; + case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: + return M_HASHTYPE_RSS_UDP_IPV6_EX; + default: + return M_HASHTYPE_OPAQUE; } - r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr, - BUS_DMA_NOWAIT, &dma->dma_map); - if (r != 0) { - device_printf(dev, - "ixgbe_dma_malloc: bus_dmamem_alloc failed; error %u\n", r); - goto fail_1; - } - r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size, - ixgbe_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); - if (r != 0) { - device_printf(dev, - "ixgbe_dma_malloc: bus_dmamap_load failed; error %u\n", r); - goto fail_2; - } - dma->dma_size = size; - - return (0); -fail_2: - bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); -fail_1: - bus_dma_tag_destroy(dma->dma_tag); -fail_0: - dma->dma_tag = NULL; - - return (r); -} /* ixgbe_dma_malloc */ - -/************************************************************************ - * ixgbe_dma_free - ************************************************************************/ -static void -ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma) -{ - bus_dmamap_sync(dma->dma_tag, dma->dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(dma->dma_tag, dma->dma_map); - bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); - bus_dma_tag_destroy(dma->dma_tag); -} /* ixgbe_dma_free */ - - -/************************************************************************ - * ixgbe_allocate_queues - * - * Allocate memory for the transmit and receive rings, and then - * the descriptors associated with each, called only once at attach. - ************************************************************************/ -int -ixgbe_allocate_queues(struct adapter *adapter) -{ - device_t dev = adapter->dev; - struct ix_queue *que; - struct tx_ring *txr; - struct rx_ring *rxr; - int rsize, tsize, error = IXGBE_SUCCESS; - int txconf = 0, rxconf = 0; - - /* First, allocate the top level queue structs */ - adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO); - if (adapter->queues == NULL) { - device_printf(dev, "Unable to allocate queue memory\n"); - error = ENOMEM; - goto fail; - } - - /* Second, allocate the TX ring struct memory */ - adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO); - if (adapter->tx_rings == NULL) { - device_printf(dev, "Unable to allocate TX ring memory\n"); - error = ENOMEM; - goto tx_fail; - } - - /* Third, allocate the RX ring */ - adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) * - adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO); - if (adapter->rx_rings == NULL) { - device_printf(dev, "Unable to allocate RX ring memory\n"); - error = ENOMEM; - goto rx_fail; - } - - /* For the ring itself */ - tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc), - DBA_ALIGN); - - /* - * Now set up the TX queues, txconf is needed to handle the - * possibility that things fail midcourse and we need to - * undo memory gracefully - */ - for (int i = 0; i < adapter->num_queues; i++, txconf++) { - /* Set up some basics */ - txr = &adapter->tx_rings[i]; - txr->adapter = adapter; - txr->br = NULL; - /* In case SR-IOV is enabled, align the index properly */ - txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool, - i); - txr->num_desc = adapter->num_tx_desc; - - /* Initialize the TX side lock */ - snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", - device_get_nameunit(dev), txr->me); - mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); - - if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma, - BUS_DMA_NOWAIT)) { - device_printf(dev, - "Unable to allocate TX Descriptor memory\n"); - error = ENOMEM; - goto err_tx_desc; - } - txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr; - bzero((void *)txr->tx_base, tsize); - - /* Now allocate transmit buffers for the ring */ - if (ixgbe_allocate_transmit_buffers(txr)) { - device_printf(dev, - "Critical Failure setting up transmit buffers\n"); - error = ENOMEM; - goto err_tx_desc; - } - if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) { - /* Allocate a buf ring */ - txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF, - M_WAITOK, &txr->tx_mtx); - if (txr->br == NULL) { - device_printf(dev, - "Critical Failure setting up buf ring\n"); - error = ENOMEM; - goto err_tx_desc; - } - } - } - - /* - * Next the RX queues... - */ - rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc), - DBA_ALIGN); - for (int i = 0; i < adapter->num_queues; i++, rxconf++) { - rxr = &adapter->rx_rings[i]; - /* Set up some basics */ - rxr->adapter = adapter; - /* In case SR-IOV is enabled, align the index properly */ - rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool, - i); - rxr->num_desc = adapter->num_rx_desc; - - /* Initialize the RX side lock */ - snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", - device_get_nameunit(dev), rxr->me); - mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); - - if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma, - BUS_DMA_NOWAIT)) { - device_printf(dev, - "Unable to allocate RxDescriptor memory\n"); - error = ENOMEM; - goto err_rx_desc; - } - rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr; - bzero((void *)rxr->rx_base, rsize); - - /* Allocate receive buffers for the ring */ - if (ixgbe_allocate_receive_buffers(rxr)) { - device_printf(dev, - "Critical Failure setting up receive buffers\n"); - error = ENOMEM; - goto err_rx_desc; - } - } - - /* - * Finally set up the queue holding structs - */ - for (int i = 0; i < adapter->num_queues; i++) { - que = &adapter->queues[i]; - que->adapter = adapter; - que->me = i; - que->txr = &adapter->tx_rings[i]; - que->rxr = &adapter->rx_rings[i]; - } - - return (0); - -err_rx_desc: - for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) - ixgbe_dma_free(adapter, &rxr->rxdma); -err_tx_desc: - for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) - ixgbe_dma_free(adapter, &txr->txdma); - free(adapter->rx_rings, M_DEVBUF); -rx_fail: - free(adapter->tx_rings, M_DEVBUF); -tx_fail: - free(adapter->queues, M_DEVBUF); -fail: - return (error); -} /* ixgbe_allocate_queues */ +} /* ixgbe_determine_rsstype */ diff --git a/sys/dev/ixgbe/ixgbe.h b/sys/dev/ixgbe/ixgbe.h index 969b3a55112..e41f2db1f88 100644 --- a/sys/dev/ixgbe/ixgbe.h +++ b/sys/dev/ixgbe/ixgbe.h @@ -58,20 +58,13 @@ #include #include -#include #include #include +#include #include #include #include -#include -#include -#include -#include -#include - -#include #include #include @@ -85,7 +78,7 @@ #include #include #include -#include +#include #include #include #include @@ -106,7 +99,7 @@ * bytes. Performance tests have show the 2K value to be optimal for top * performance. */ -#define DEFAULT_TXD 1024 +#define DEFAULT_TXD 2048 #define PERFORM_TXD 2048 #define MAX_TXD 4096 #define MIN_TXD 64 @@ -121,7 +114,7 @@ * against the system mbuf pool limit, you can tune nmbclusters * to adjust for this. */ -#define DEFAULT_RXD 1024 +#define DEFAULT_RXD 2048 #define PERFORM_RXD 2048 #define MAX_RXD 4096 #define MIN_RXD 64 @@ -219,6 +212,11 @@ #define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP) #endif +#define IXGBE_CAPS (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 | IFCAP_TSO | \ + IFCAP_LRO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | \ + IFCAP_VLAN_HWCSUM | IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU | \ + IFCAP_HWSTATS | IFCAP_VLAN_HWFILTER | IFCAP_WOL) + /* Backward compatibility items for very old versions */ #ifndef pci_find_cap #define pci_find_cap pci_find_extcap @@ -241,7 +239,6 @@ IXGBE_EITR_ITR_INT_MASK) - /************************************************************************ * vendor_info_array * @@ -262,23 +259,8 @@ struct ixgbe_bp_data { u32 log; }; -struct ixgbe_tx_buf { - union ixgbe_adv_tx_desc *eop; - struct mbuf *m_head; - bus_dmamap_t map; -}; - -struct ixgbe_rx_buf { - struct mbuf *buf; - struct mbuf *fmp; - bus_dmamap_t pmap; - u_int flags; -#define IXGBE_RX_COPY 0x01 - uint64_t addr; -}; /* - * Bus dma allocation structure used by ixgbe_dma_malloc and ixgbe_dma_free */ struct ixgbe_dma_alloc { bus_addr_t dma_paddr; @@ -295,47 +277,19 @@ struct ixgbe_mc_addr { u32 vmdq; }; -/* - * Driver queue struct: this is the interrupt container - * for the associated tx and rx ring. - */ -struct ix_queue { - struct adapter *adapter; - u32 msix; /* This queue's MSI-X vector */ - u32 eims; /* This queue's EIMS bit */ - u32 eitr_setting; - u32 me; - struct resource *res; - void *tag; - int busy; - struct tx_ring *txr; - struct rx_ring *rxr; - struct task que_task; - struct taskqueue *tq; - u64 irqs; -}; - /* * The transmit ring, one per queue */ struct tx_ring { struct adapter *adapter; - struct mtx tx_mtx; - u32 me; - u32 tail; - int busy; union ixgbe_adv_tx_desc *tx_base; - struct ixgbe_tx_buf *tx_buffers; - struct ixgbe_dma_alloc txdma; - volatile u16 tx_avail; - u16 next_avail_desc; - u16 next_to_clean; - u16 num_desc; - u32 txd_cmd; - bus_dma_tag_t txtag; - char mtx_name[16]; - struct buf_ring *br; - struct task txq_task; + uint64_t tx_paddr; + u32 tail; + qidx_t *tx_rsq; + qidx_t tx_rs_cidx; + qidx_t tx_rs_pidx; + qidx_t tx_cidx_processed; + uint8_t me; /* Flow Director */ u16 atr_sample; @@ -345,9 +299,6 @@ struct tx_ring { u32 packets; /* Soft Stats */ u64 tso_tx; - u64 no_tx_map_avail; - u64 no_tx_dma_setup; - u64 no_desc_avail; u64 total_packets; }; @@ -356,22 +307,14 @@ struct tx_ring { * The Receive ring, one per rx queue */ struct rx_ring { + struct ix_rx_queue *que; struct adapter *adapter; - struct mtx rx_mtx; u32 me; u32 tail; union ixgbe_adv_rx_desc *rx_base; - struct ixgbe_dma_alloc rxdma; - struct lro_ctrl lro; - bool lro_enabled; bool hw_rsc; bool vtag_strip; - u16 next_to_refresh; - u16 next_to_check; - u16 num_desc; - u16 mbuf_sz; - char mtx_name[16]; - struct ixgbe_rx_buf *rx_buffers; + uint64_t rx_paddr; bus_dma_tag_t ptag; u32 bytes; /* Used for AIM calc */ @@ -389,12 +332,35 @@ struct rx_ring { u64 flm; }; +/* + * Driver queue struct: this is the interrupt container + * for the associated tx and rx ring. + */ +struct ix_rx_queue { + struct adapter *adapter; + u32 msix; /* This queue's MSIX vector */ + u32 eims; /* This queue's EIMS bit */ + u32 eitr_setting; + struct resource *res; + void *tag; + int busy; + struct rx_ring rxr; + struct if_irq que_irq; + u64 irqs; +}; + +struct ix_tx_queue { + struct adapter *adapter; + u32 msix; /* This queue's MSIX vector */ + struct tx_ring txr; +}; + #define IXGBE_MAX_VF_MC 30 /* Max number of multicast entries */ struct ixgbe_vf { u_int pool; u_int rar_index; - u_int max_frame_size; + u_int maximum_frame_size; uint32_t flags; uint8_t ether_addr[ETHER_ADDR_LEN]; uint16_t mc_hash[IXGBE_MAX_VF_MC]; @@ -408,33 +374,32 @@ struct ixgbe_vf { struct adapter { struct ixgbe_hw hw; struct ixgbe_osdep osdep; + if_ctx_t ctx; + if_softc_ctx_t shared; +#define num_tx_queues shared->isc_ntxqsets +#define num_rx_queues shared->isc_nrxqsets +#define max_frame_size shared->isc_max_frame_size +#define intr_type shared->isc_intr device_t dev; struct ifnet *ifp; struct resource *pci_mem; - struct resource *msix_mem; /* * Interrupt resources: this set is * either used for legacy, or for Link * when doing MSI-X */ + struct if_irq irq; void *tag; struct resource *res; - struct ifmedia media; - struct callout timer; - int link_rid; + struct ifmedia *media; int if_flags; - - struct mtx core_mtx; - - eventhandler_tag vlan_attach; - eventhandler_tag vlan_detach; + int msix; u16 num_vlans; - u16 num_queues; /* * Shadow VFTA table, this is needed because @@ -446,9 +411,7 @@ struct adapter { /* Info about the interface */ int advertise; /* link speeds */ - int enable_aim; /* adaptive interrupt moderation */ bool link_active; - u16 max_frame_size; u16 num_segs; u32 link_speed; bool link_up; @@ -465,17 +428,16 @@ struct adapter { /* Support for pluggable optics */ bool sfp_probe; - struct task link_task; /* Link tasklet */ - struct task mod_task; /* SFP tasklet */ - struct task msf_task; /* Multispeed Fiber */ - struct task mbx_task; /* VF -> PF mailbox interrupt */ + struct grouptask mod_task; /* SFP tasklet */ + struct grouptask msf_task; /* Multispeed Fiber */ + struct grouptask mbx_task; /* VF -> PF mailbox interrupt */ + int sfp_reinit; /* Flow Director */ int fdir_reinit; - struct task fdir_task; + struct grouptask fdir_task; - struct task phy_task; /* PHY intr tasklet */ - struct taskqueue *tq; + struct grouptask phy_task; /* PHY intr tasklet */ /* * Queues: @@ -483,24 +445,9 @@ struct adapter { * and RX/TX pair or rings associated * with it. */ - struct ix_queue *queues; - - /* - * Transmit rings - * Allocated at run time, an array of rings - */ - struct tx_ring *tx_rings; - u32 num_tx_desc; - u32 tx_process_limit; - - /* - * Receive rings - * Allocated at run time, an array of rings - */ - struct rx_ring *rx_rings; - u64 active_queues; - u32 num_rx_desc; - u32 rx_process_limit; + struct ix_tx_queue *tx_queues; + struct ix_rx_queue *rx_queues; + u64 active_queues; /* Multicast array memory */ struct ixgbe_mc_addr *mta; @@ -514,13 +461,8 @@ struct adapter { /* Bypass */ struct ixgbe_bp_data bypass; - /* Netmap */ - void (*init_locked)(struct adapter *); - void (*stop_locked)(void *); - /* Misc stats maintained by the driver */ unsigned long dropped_pkts; - unsigned long mbuf_defrag_failed; unsigned long mbuf_header_failed; unsigned long mbuf_packet_failed; unsigned long watchdog_events; @@ -547,29 +489,12 @@ struct adapter { u32 feat_en; }; - /* Precision Time Sync (IEEE 1588) defines */ #define ETHERTYPE_IEEE1588 0x88F7 #define PICOSECS_PER_TICK 20833 #define TSYNC_UDP_PORT 319 /* UDP port for the protocol */ #define IXGBE_ADVTXD_TSTAMP 0x00080000 - -#define IXGBE_CORE_LOCK_INIT(_sc, _name) \ - mtx_init(&(_sc)->core_mtx, _name, "IXGBE Core Lock", MTX_DEF) -#define IXGBE_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->core_mtx) -#define IXGBE_TX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->tx_mtx) -#define IXGBE_RX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_mtx) -#define IXGBE_CORE_LOCK(_sc) mtx_lock(&(_sc)->core_mtx) -#define IXGBE_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_mtx) -#define IXGBE_TX_TRYLOCK(_sc) mtx_trylock(&(_sc)->tx_mtx) -#define IXGBE_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_mtx) -#define IXGBE_CORE_UNLOCK(_sc) mtx_unlock(&(_sc)->core_mtx) -#define IXGBE_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_mtx) -#define IXGBE_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_mtx) -#define IXGBE_CORE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->core_mtx, MA_OWNED) -#define IXGBE_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_mtx, MA_OWNED) - /* For backward compatibility */ #if !defined(PCIER_LINK_STA) #define PCIER_LINK_STA PCIR_EXPRESS_LINK_STA @@ -626,34 +551,13 @@ static __inline int drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br) { #ifdef ALTQ - if (ALTQ_IS_ENABLED(&ifp->if_snd)) - return (1); + if (ALTQ_IS_ENABLED(&ifp->if_snd)) + return (1); #endif - return (!buf_ring_empty(br)); + return (!buf_ring_empty(br)); } #endif -/* - * Find the number of unrefreshed RX descriptors - */ -static inline u16 -ixgbe_rx_unrefreshed(struct rx_ring *rxr) -{ - if (rxr->next_to_check > rxr->next_to_refresh) - return (rxr->next_to_check - rxr->next_to_refresh - 1); - else - return ((rxr->num_desc + rxr->next_to_check) - - rxr->next_to_refresh - 1); -} - -static inline int -ixgbe_legacy_ring_empty(struct ifnet *ifp, struct buf_ring *dummy) -{ - UNREFERENCED_1PARAMETER(dummy); - - return IFQ_DRV_IS_EMPTY(&ifp->if_snd); -} - /* * This checks for a zero mac addr, something that will be likely * unless the Admin on the Host has created one. @@ -671,25 +575,16 @@ ixv_check_ether_addr(u8 *addr) } /* Shared Prototypes */ -void ixgbe_legacy_start(struct ifnet *); -int ixgbe_legacy_start_locked(struct ifnet *, struct tx_ring *); -int ixgbe_mq_start(struct ifnet *, struct mbuf *); -int ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *); -void ixgbe_qflush(struct ifnet *); -void ixgbe_deferred_mq_start(void *, int); int ixgbe_allocate_queues(struct adapter *); int ixgbe_setup_transmit_structures(struct adapter *); void ixgbe_free_transmit_structures(struct adapter *); int ixgbe_setup_receive_structures(struct adapter *); void ixgbe_free_receive_structures(struct adapter *); -void ixgbe_txeof(struct tx_ring *); -bool ixgbe_rxeof(struct ix_queue *); +int ixgbe_get_regs(SYSCTL_HANDLER_ARGS); #include "ixgbe_bypass.h" -#include "ixgbe_sriov.h" #include "ixgbe_fdir.h" #include "ixgbe_rss.h" -#include "ixgbe_netmap.h" #endif /* _IXGBE_H_ */ diff --git a/sys/dev/ixgbe/ixgbe_common.c b/sys/dev/ixgbe/ixgbe_common.c index 6c20c0758b5..3bb8f4943aa 100644 --- a/sys/dev/ixgbe/ixgbe_common.c +++ b/sys/dev/ixgbe/ixgbe_common.c @@ -223,6 +223,7 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) ERROR_REPORT2(IXGBE_ERROR_UNSUPPORTED, "Device %x does not support flow control autoneg", hw->device_id); + return supported; } @@ -2001,7 +2002,7 @@ static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw) usec_delay(5); ixgbe_standby_eeprom(hw); - }; + } /* * On some parts, SPI write time could vary from 0-20mSec on 3.3V @@ -2087,7 +2088,7 @@ static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data, * EEPROM */ mask = mask >> 1; - }; + } /* We leave the "DI" bit set to "0" when we leave this routine. */ eec &= ~IXGBE_EEC_DI; @@ -3518,7 +3519,6 @@ s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index) if (index > 3) return IXGBE_ERR_PARAM; - ret_val = hw->mac.ops.prot_autoc_read(hw, &locked, &autoc_reg); if (ret_val != IXGBE_SUCCESS) goto out; @@ -3715,7 +3715,7 @@ u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw) * @vmdq: VMDq pool to assign * * Puts an ethernet address into a receive address register, or - * finds the rar that it is aleady in; adds to the pool list + * finds the rar that it is already in; adds to the pool list **/ s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq) { @@ -4126,7 +4126,7 @@ s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw) for (offset = 0; offset < IXGBE_VLVF_ENTRIES; offset++) { IXGBE_WRITE_REG(hw, IXGBE_VLVF(offset), 0); IXGBE_WRITE_REG(hw, IXGBE_VLVFB(offset * 2), 0); - IXGBE_WRITE_REG(hw, IXGBE_VLVFB(offset * 2 + 1), 0); + IXGBE_WRITE_REG(hw, IXGBE_VLVFB((offset * 2) + 1), 0); } return IXGBE_SUCCESS; diff --git a/sys/dev/ixgbe/ixgbe_fdir.h b/sys/dev/ixgbe/ixgbe_fdir.h index 1b106eef6d1..f5e1593072a 100644 --- a/sys/dev/ixgbe/ixgbe_fdir.h +++ b/sys/dev/ixgbe/ixgbe_fdir.h @@ -52,7 +52,7 @@ void ixgbe_init_fdir(struct adapter *); #endif -void ixgbe_reinit_fdir(void *, int); +void ixgbe_reinit_fdir(void *); void ixgbe_atr(struct tx_ring *, struct mbuf *); #endif /* _IXGBE_FDIR_H_ */ diff --git a/sys/dev/ixgbe/ixgbe_netmap.c b/sys/dev/ixgbe/ixgbe_netmap.c deleted file mode 100644 index fd37ba8f7fb..00000000000 --- a/sys/dev/ixgbe/ixgbe_netmap.c +++ /dev/null @@ -1,521 +0,0 @@ -/****************************************************************************** - - Copyright (c) 2001-2017, Intel Corporation - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. Neither the name of the Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - -******************************************************************************/ -/*$FreeBSD$*/ - -/* - * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $FreeBSD$ - * - * netmap support for: ixgbe - * - * This file is meant to be a reference on how to implement - * netmap support for a network driver. - * This file contains code but only static or inline functions used - * by a single driver. To avoid replication of code we just #include - * it near the beginning of the standard driver. - */ - -#ifdef DEV_NETMAP -/* - * Some drivers may need the following headers. Others - * already include them by default - -#include -#include - - */ -#include "ixgbe.h" - -/* - * device-specific sysctl variables: - * - * ix_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. - * During regular operations the CRC is stripped, but on some - * hardware reception of frames not multiple of 64 is slower, - * so using crcstrip=0 helps in benchmarks. - * - * ix_rx_miss, ix_rx_miss_bufs: - * count packets that might be missed due to lost interrupts. - */ -SYSCTL_DECL(_dev_netmap); -static int ix_rx_miss, ix_rx_miss_bufs; -int ix_crcstrip; -SYSCTL_INT(_dev_netmap, OID_AUTO, ix_crcstrip, - CTLFLAG_RW, &ix_crcstrip, 0, "strip CRC on rx frames"); -SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss, - CTLFLAG_RW, &ix_rx_miss, 0, "potentially missed rx intr"); -SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss_bufs, - CTLFLAG_RW, &ix_rx_miss_bufs, 0, "potentially missed rx intr bufs"); - - -static void -set_crcstrip(struct ixgbe_hw *hw, int onoff) -{ - /* crc stripping is set in two places: - * IXGBE_HLREG0 (modified on init_locked and hw reset) - * IXGBE_RDRXCTL (set by the original driver in - * ixgbe_setup_hw_rsc() called in init_locked. - * We disable the setting when netmap is compiled in). - * We update the values here, but also in ixgbe.c because - * init_locked sometimes is called outside our control. - */ - uint32_t hl, rxc; - - hl = IXGBE_READ_REG(hw, IXGBE_HLREG0); - rxc = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); - if (netmap_verbose) - D("%s read HLREG 0x%x rxc 0x%x", - onoff ? "enter" : "exit", hl, rxc); - /* hw requirements ... */ - rxc &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; - rxc |= IXGBE_RDRXCTL_RSCACKC; - if (onoff && !ix_crcstrip) { - /* keep the crc. Fast rx */ - hl &= ~IXGBE_HLREG0_RXCRCSTRP; - rxc &= ~IXGBE_RDRXCTL_CRCSTRIP; - } else { - /* reset default mode */ - hl |= IXGBE_HLREG0_RXCRCSTRP; - rxc |= IXGBE_RDRXCTL_CRCSTRIP; - } - if (netmap_verbose) - D("%s write HLREG 0x%x rxc 0x%x", - onoff ? "enter" : "exit", hl, rxc); - IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hl); - IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc); -} - - -/* - * Register/unregister. We are already under netmap lock. - * Only called on the first register or the last unregister. - */ -static int -ixgbe_netmap_reg(struct netmap_adapter *na, int onoff) -{ - struct ifnet *ifp = na->ifp; - struct adapter *adapter = ifp->if_softc; - - IXGBE_CORE_LOCK(adapter); - adapter->stop_locked(adapter); - - set_crcstrip(&adapter->hw, onoff); - /* enable or disable flags and callbacks in na and ifp */ - if (onoff) { - nm_set_native_flags(na); - } else { - nm_clear_native_flags(na); - } - adapter->init_locked(adapter); /* also enables intr */ - set_crcstrip(&adapter->hw, onoff); // XXX why twice ? - IXGBE_CORE_UNLOCK(adapter); - return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); -} - - -/* - * Reconcile kernel and user view of the transmit ring. - * - * All information is in the kring. - * Userspace wants to send packets up to the one before kring->rhead, - * kernel knows kring->nr_hwcur is the first unsent packet. - * - * Here we push packets out (as many as possible), and possibly - * reclaim buffers from previously completed transmission. - * - * The caller (netmap) guarantees that there is only one instance - * running at any time. Any interference with other driver - * methods should be handled by the individual drivers. - */ -static int -ixgbe_netmap_txsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - /* - * interrupts on every tx packet are expensive so request - * them every half ring, or where NS_REPORT is set - */ - u_int report_frequency = kring->nkr_num_slots >> 1; - - /* device-specific */ - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = &adapter->tx_rings[kring->ring_id]; - int reclaim_tx; - - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_POSTREAD); - - /* - * First part: process new packets to send. - * nm_i is the current index in the netmap ring, - * nic_i is the corresponding index in the NIC ring. - * The two numbers differ because upon a *_init() we reset - * the NIC ring but leave the netmap ring unchanged. - * For the transmit ring, we have - * - * nm_i = kring->nr_hwcur - * nic_i = IXGBE_TDT (not tracked in the driver) - * and - * nm_i == (nic_i + kring->nkr_hwofs) % ring_size - * - * In this driver kring->nkr_hwofs >= 0, but for other - * drivers it might be negative as well. - */ - - /* - * If we have packets to send (kring->nr_hwcur != kring->rhead) - * iterate over the netmap ring, fetch length and update - * the corresponding slot in the NIC ring. Some drivers also - * need to update the buffer's physical address in the NIC slot - * even NS_BUF_CHANGED is not set (PNMB computes the addresses). - * - * The netmap_reload_map() calls is especially expensive, - * even when (as in this case) the tag is 0, so do only - * when the buffer has actually changed. - * - * If possible do not set the report/intr bit on all slots, - * but only a few times per ring or when NS_REPORT is set. - * - * Finally, on 10G and faster drivers, it might be useful - * to prefetch the next slot and txr entry. - */ - - nm_i = kring->nr_hwcur; - if (nm_i != head) { /* we have new packets to send */ - nic_i = netmap_idx_k2n(kring, nm_i); - - __builtin_prefetch(&ring->slot[nm_i]); - __builtin_prefetch(&txr->tx_buffers[nic_i]); - - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - u_int len = slot->len; - uint64_t paddr; - void *addr = PNMB(na, slot, &paddr); - - /* device-specific */ - union ixgbe_adv_tx_desc *curr = &txr->tx_base[nic_i]; - struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[nic_i]; - int flags = (slot->flags & NS_REPORT || - nic_i == 0 || nic_i == report_frequency) ? - IXGBE_TXD_CMD_RS : 0; - - /* prefetch for next round */ - __builtin_prefetch(&ring->slot[nm_i + 1]); - __builtin_prefetch(&txr->tx_buffers[nic_i + 1]); - - NM_CHECK_ADDR_LEN(na, addr, len); - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - netmap_reload_map(na, txr->txtag, txbuf->map, addr); - } - slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); - - /* Fill the slot in the NIC ring. */ - /* Use legacy descriptor, they are faster? */ - curr->read.buffer_addr = htole64(paddr); - curr->read.olinfo_status = 0; - curr->read.cmd_type_len = htole32(len | flags | - IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP); - - /* make sure changes to the buffer are synced */ - bus_dmamap_sync(txr->txtag, txbuf->map, - BUS_DMASYNC_PREWRITE); - - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - - /* synchronize the NIC ring */ - bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - - /* (re)start the tx unit up to slot nic_i (excluded) */ - IXGBE_WRITE_REG(&adapter->hw, txr->tail, nic_i); - } - - /* - * Second part: reclaim buffers for completed transmissions. - * Because this is expensive (we read a NIC register etc.) - * we only do it in specific cases (see below). - */ - if (flags & NAF_FORCE_RECLAIM) { - reclaim_tx = 1; /* forced reclaim */ - } else if (!nm_kr_txempty(kring)) { - reclaim_tx = 0; /* have buffers, no reclaim */ - } else { - /* - * No buffers available. Locate previous slot with - * REPORT_STATUS set. - * If the slot has DD set, we can reclaim space, - * otherwise wait for the next interrupt. - * This enables interrupt moderation on the tx - * side though it might reduce throughput. - */ - struct ixgbe_legacy_tx_desc *txd = - (struct ixgbe_legacy_tx_desc *)txr->tx_base; - - nic_i = txr->next_to_clean + report_frequency; - if (nic_i > lim) - nic_i -= lim + 1; - // round to the closest with dd set - nic_i = (nic_i < kring->nkr_num_slots / 4 || - nic_i >= kring->nkr_num_slots*3/4) ? - 0 : report_frequency; - reclaim_tx = txd[nic_i].upper.fields.status & IXGBE_TXD_STAT_DD; // XXX cpu_to_le32 ? - } - if (reclaim_tx) { - /* - * Record completed transmissions. - * We (re)use the driver's txr->next_to_clean to keep - * track of the most recently completed transmission. - * - * The datasheet discourages the use of TDH to find - * out the number of sent packets, but we only set - * REPORT_STATUS in a few slots so TDH is the only - * good way. - */ - nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(kring->ring_id)); - if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ - D("TDH wrap %d", nic_i); - nic_i -= kring->nkr_num_slots; - } - if (nic_i != txr->next_to_clean) { - /* some tx completed, increment avail */ - txr->next_to_clean = nic_i; - kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); - } - } - - return 0; -} - - -/* - * Reconcile kernel and user view of the receive ring. - * Same as for the txsync, this routine must be efficient. - * The caller guarantees a single invocations, but races against - * the rest of the driver should be handled here. - * - * On call, kring->rhead is the first packet that userspace wants - * to keep, and kring->rcur is the wakeup point. - * The kernel has previously reported packets up to kring->rtail. - * - * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective - * of whether or not we received an interrupt. - */ -static int -ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct ifnet *ifp = na->ifp; - struct netmap_ring *ring = kring->ring; - u_int nm_i; /* index into the netmap ring */ - u_int nic_i; /* index into the NIC ring */ - u_int n; - u_int const lim = kring->nkr_num_slots - 1; - u_int const head = kring->rhead; - int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - - /* device-specific */ - struct adapter *adapter = ifp->if_softc; - struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id]; - - if (head > lim) - return netmap_ring_reinit(kring); - - /* XXX check sync modes */ - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - - /* - * First part: import newly received packets. - * - * nm_i is the index of the next free slot in the netmap ring, - * nic_i is the index of the next received packet in the NIC ring, - * and they may differ in case if_init() has been called while - * in netmap mode. For the receive ring we have - * - * nic_i = rxr->next_to_check; - * nm_i = kring->nr_hwtail (previous) - * and - * nm_i == (nic_i + kring->nkr_hwofs) % ring_size - * - * rxr->next_to_check is set to 0 on a ring reinit - */ - if (netmap_no_pendintr || force_update) { - int crclen = (ix_crcstrip) ? 0 : 4; - uint16_t slot_flags = kring->nkr_slot_flags; - - nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail) - nm_i = netmap_idx_n2k(kring, nic_i); - - for (n = 0; ; n++) { - union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i]; - uint32_t staterr = le32toh(curr->wb.upper.status_error); - - if ((staterr & IXGBE_RXD_STAT_DD) == 0) - break; - ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen; - ring->slot[nm_i].flags = slot_flags; - bus_dmamap_sync(rxr->ptag, - rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - if (n) { /* update the state variables */ - if (netmap_no_pendintr && !force_update) { - /* diagnostics */ - ix_rx_miss ++; - ix_rx_miss_bufs += n; - } - rxr->next_to_check = nic_i; - kring->nr_hwtail = nm_i; - } - kring->nr_kflags &= ~NKR_PENDINTR; - } - - /* - * Second part: skip past packets that userspace has released. - * (kring->nr_hwcur to kring->rhead excluded), - * and make the buffers available for reception. - * As usual nm_i is the index in the netmap ring, - * nic_i is the index in the NIC ring, and - * nm_i == (nic_i + kring->nkr_hwofs) % ring_size - */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - nic_i = netmap_idx_k2n(kring, nm_i); - for (n = 0; nm_i != head; n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - uint64_t paddr; - void *addr = PNMB(na, slot, &paddr); - - union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i]; - struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[nic_i]; - - if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ - goto ring_reset; - - if (slot->flags & NS_BUF_CHANGED) { - /* buffer has changed, reload map */ - netmap_reload_map(na, rxr->ptag, rxbuf->pmap, addr); - slot->flags &= ~NS_BUF_CHANGED; - } - curr->wb.upper.status_error = 0; - curr->read.pkt_addr = htole64(paddr); - bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_PREREAD); - nm_i = nm_next(nm_i, lim); - nic_i = nm_next(nic_i, lim); - } - kring->nr_hwcur = head; - - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - /* - * IMPORTANT: we must leave one free slot in the ring, - * so move nic_i back by one unit - */ - nic_i = nm_prev(nic_i, lim); - IXGBE_WRITE_REG(&adapter->hw, rxr->tail, nic_i); - } - - return 0; - -ring_reset: - return netmap_ring_reinit(kring); -} - - -/* - * The attach routine, called near the end of ixgbe_attach(), - * fills the parameters for netmap_attach() and calls it. - * It cannot fail, in the worst case (such as no memory) - * netmap mode will be disabled and the driver will only - * operate in standard mode. - */ -void -ixgbe_netmap_attach(struct adapter *adapter) -{ - struct netmap_adapter na; - - bzero(&na, sizeof(na)); - - na.ifp = adapter->ifp; - na.na_flags = NAF_BDG_MAYSLEEP; - na.num_tx_desc = adapter->num_tx_desc; - na.num_rx_desc = adapter->num_rx_desc; - na.nm_txsync = ixgbe_netmap_txsync; - na.nm_rxsync = ixgbe_netmap_rxsync; - na.nm_register = ixgbe_netmap_reg; - na.num_tx_rings = na.num_rx_rings = adapter->num_queues; - netmap_attach(&na); -} - -#endif /* DEV_NETMAP */ - -/* end of file */ diff --git a/sys/dev/ixgbe/ixgbe_netmap.h b/sys/dev/ixgbe/ixgbe_netmap.h deleted file mode 100644 index 014bb5f7e74..00000000000 --- a/sys/dev/ixgbe/ixgbe_netmap.h +++ /dev/null @@ -1,59 +0,0 @@ -/****************************************************************************** - - Copyright (c) 2001-2017, Intel Corporation - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. Neither the name of the Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - -******************************************************************************/ -/*$FreeBSD$*/ - - -#ifndef _IXGBE_NETMAP_H_ -#define _IXGBE_NETMAP_H_ - -#ifdef DEV_NETMAP - -#include -#include -#include - -extern int ix_crcstrip; - -/* - * ixgbe_netmap.c contains functions for netmap - * support that extend the standard driver. See additional - * comments in ixgbe_netmap.c. - */ -void ixgbe_netmap_attach(struct adapter *adapter); - -#else -#define ixgbe_netmap_attach(a) -#define netmap_detach(a) -#endif /* DEV_NETMAP */ - -#endif /* _IXGBE_NETMAP_H_ */ diff --git a/sys/dev/ixgbe/ixgbe_osdep.c b/sys/dev/ixgbe/ixgbe_osdep.c index 4584718b261..e45531ea8c3 100644 --- a/sys/dev/ixgbe/ixgbe_osdep.c +++ b/sys/dev/ixgbe/ixgbe_osdep.c @@ -1,31 +1,31 @@ /****************************************************************************** - Copyright (c) 2001-2017, Intel Corporation + Copyright (c) 2001-2017, Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without + + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, + + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - - 3. Neither the name of the Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from this software without specific prior written permission. - + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/sys/dev/ixgbe/ixgbe_osdep.h b/sys/dev/ixgbe/ixgbe_osdep.h index 9d643f31840..e1b96440098 100644 --- a/sys/dev/ixgbe/ixgbe_osdep.h +++ b/sys/dev/ixgbe/ixgbe_osdep.h @@ -1,32 +1,32 @@ /****************************************************************************** SPDX-License-Identifier: BSD-3-Clause - Copyright (c) 2001-2017, Intel Corporation + Copyright (c) 2001-2017, Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without + + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, + + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - - 3. Neither the name of the Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from this software without specific prior written permission. - + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/sys/dev/ixgbe/ixgbe_phy.c b/sys/dev/ixgbe/ixgbe_phy.c index 2b758a751cd..28692fe0c2e 100644 --- a/sys/dev/ixgbe/ixgbe_phy.c +++ b/sys/dev/ixgbe/ixgbe_phy.c @@ -1491,21 +1491,18 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) hw->phy.type = ixgbe_phy_sfp_intel; break; default: - if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) - hw->phy.type = - ixgbe_phy_sfp_passive_unknown; - else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) - hw->phy.type = - ixgbe_phy_sfp_active_unknown; - else - hw->phy.type = ixgbe_phy_sfp_unknown; + hw->phy.type = ixgbe_phy_sfp_unknown; break; } } /* Allow any DA cable vendor */ if (cable_tech & (IXGBE_SFF_DA_PASSIVE_CABLE | - IXGBE_SFF_DA_ACTIVE_CABLE)) { + IXGBE_SFF_DA_ACTIVE_CABLE)) { + if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) + hw->phy.type = ixgbe_phy_sfp_passive_unknown; + else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) + hw->phy.type = ixgbe_phy_sfp_active_unknown; status = IXGBE_SUCCESS; goto out; } diff --git a/sys/dev/ixgbe/ixgbe_sriov.h b/sys/dev/ixgbe/ixgbe_sriov.h index bfe58d4dd24..36ca7cacc85 100644 --- a/sys/dev/ixgbe/ixgbe_sriov.h +++ b/sys/dev/ixgbe/ixgbe_sriov.h @@ -41,6 +41,7 @@ #include #include #include +#include #include "ixgbe_mbx.h" #define IXGBE_VF_CTS (1 << 0) /* VF is clear to send. */ @@ -57,7 +58,7 @@ #define IXGBE_VF_GET_QUEUES_RESP_LEN 5 -#define IXGBE_API_VER_1_0 0 +#define IXGBE_API_VER_1_0 0 #define IXGBE_API_VER_2_0 1 /* Solaris API. Not supported. */ #define IXGBE_API_VER_1_1 2 #define IXGBE_API_VER_UNKNOWN UINT16_MAX @@ -66,15 +67,16 @@ #define IXGBE_32_VM 32 #define IXGBE_64_VM 64 -int ixgbe_add_vf(device_t, u16, const nvlist_t *); -int ixgbe_init_iov(device_t, u16, const nvlist_t *); -void ixgbe_uninit_iov(device_t); +int ixgbe_if_iov_vf_add(if_ctx_t, u16, const nvlist_t *); +int ixgbe_if_iov_init(if_ctx_t, u16, const nvlist_t *); +void ixgbe_if_iov_uninit(if_ctx_t); void ixgbe_initialize_iov(struct adapter *); void ixgbe_recalculate_max_frame(struct adapter *); void ixgbe_ping_all_vfs(struct adapter *); int ixgbe_pci_iov_detach(device_t); void ixgbe_define_iov_schemas(device_t, int *); void ixgbe_align_all_queue_indices(struct adapter *); +int ixgbe_vf_que_index(int, int, int); u32 ixgbe_get_mtqc(int); u32 ixgbe_get_mrqc(int); @@ -91,12 +93,13 @@ u32 ixgbe_get_mrqc(int); #define ixgbe_pci_iov_detach(_a) 0 #define ixgbe_define_iov_schemas(_a,_b) #define ixgbe_align_all_queue_indices(_a) +#define ixgbe_vf_que_index(_a, _b, _c) (_c) #define ixgbe_get_mtqc(_a) IXGBE_MTQC_64Q_1PB #define ixgbe_get_mrqc(_a) 0 #endif /* PCI_IOV */ -void ixgbe_handle_mbx(void *, int); -int ixgbe_vf_que_index(int, int, int); +void ixgbe_if_init(if_ctx_t ctx); +void ixgbe_handle_mbx(void *); #endif diff --git a/sys/dev/ixgbe/ixgbe_type.h b/sys/dev/ixgbe/ixgbe_type.h index c9eac6ed989..68a97feb55c 100644 --- a/sys/dev/ixgbe/ixgbe_type.h +++ b/sys/dev/ixgbe/ixgbe_type.h @@ -1563,7 +1563,7 @@ struct ixgbe_dmac_config { #define IXGBE_MDIO_GLOBAL_ALARM_1 0xCC00 /* Global alarm 1 */ #define IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT 0x0010 /* device fault */ #define IXGBE_MDIO_GLOBAL_ALM_1_HI_TMP_FAIL 0x4000 /* high temp failure */ -#define IXGBE_MDIO_GLOBAL_FAULT_MSG 0xC850 /* Global Fault Message */ +#define IXGBE_MDIO_GLOBAL_FAULT_MSG 0xC850 /* Global Fault Message */ #define IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP 0x8007 /* high temp failure */ #define IXGBE_MDIO_GLOBAL_INT_MASK 0xD400 /* Global int mask */ #define IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN 0x1000 /* autoneg vendor alarm int enable */ diff --git a/sys/dev/ixgbe/ixgbe_vf.h b/sys/dev/ixgbe/ixgbe_vf.h index 320e977a980..cdc59704dca 100644 --- a/sys/dev/ixgbe/ixgbe_vf.h +++ b/sys/dev/ixgbe/ixgbe_vf.h @@ -1,32 +1,32 @@ /****************************************************************************** SPDX-License-Identifier: BSD-3-Clause - Copyright (c) 2001-2017, Intel Corporation + Copyright (c) 2001-2017, Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without + + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, + + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - - 3. Neither the name of the Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from this software without specific prior written permission. - + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/sys/modules/ix/Makefile b/sys/modules/ix/Makefile index 4cf4e856b0a..421ca104a2e 100644 --- a/sys/modules/ix/Makefile +++ b/sys/modules/ix/Makefile @@ -3,10 +3,9 @@ .PATH: ${SRCTOP}/sys/dev/ixgbe KMOD = if_ix -SRCS = device_if.h bus_if.h pci_if.h pci_iov_if.h +SRCS = device_if.h bus_if.h pci_if.h pci_iov_if.h ifdi_if.h SRCS += opt_inet.h opt_inet6.h opt_rss.h SRCS += if_ix.c if_bypass.c if_fdir.c if_sriov.c ix_txrx.c ixgbe_osdep.c -SRCS += ixgbe_netmap.c # Shared source SRCS += ixgbe_common.c ixgbe_api.c ixgbe_phy.c ixgbe_mbx.c ixgbe_vf.c SRCS += ixgbe_dcb.c ixgbe_dcb_82598.c ixgbe_dcb_82599.c diff --git a/sys/modules/ixv/Makefile b/sys/modules/ixv/Makefile index 26bde06a5d4..573af918951 100644 --- a/sys/modules/ixv/Makefile +++ b/sys/modules/ixv/Makefile @@ -3,9 +3,9 @@ .PATH: ${SRCTOP}/sys/dev/ixgbe KMOD = if_ixv -SRCS = device_if.h bus_if.h pci_if.h pci_iov_if.h +SRCS = device_if.h bus_if.h pci_if.h pci_iov_if.h ifdi_if.h SRCS += opt_inet.h opt_inet6.h opt_rss.h -SRCS += if_ixv.c if_fdir.c if_sriov.c ix_txrx.c ixgbe_osdep.c ixgbe_netmap.c +SRCS += if_ixv.c if_fdir.c ix_txrx.c ixgbe_osdep.c # Shared source SRCS += ixgbe_common.c ixgbe_api.c ixgbe_phy.c ixgbe_mbx.c ixgbe_vf.c SRCS += ixgbe_dcb.c ixgbe_dcb_82598.c ixgbe_dcb_82599.c From 5cf10fb96a24a673f0ddbbcda322e0eb7af14dae Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Wed, 20 Dec 2017 18:23:22 +0000 Subject: [PATCH 054/115] Add a new kernel config option, MD_ROOT_READONLY, which forces on the MD_READONLY flag for the md device automatically instantiated during kernel init for an mdroot filesystem. Note that there is specifically and by design no tunable or sysctl control over this feature. Without this option, you already have control over whether the mdroot fs is writeable using vfs.root.mountfrom.options from loader(8), the root_rw_mount rcvar, and by using "mount -u[rw] /" or equivelent on the fly. This option is being added to provide a way to make the mdroot fs truly immutable before userland code begins running. Differential Revision: https://reviews.freebsd.org/D13411 --- share/man/man4/md.4 | 5 +++-- sys/conf/NOTES | 3 +++ sys/conf/options | 1 + sys/dev/md/md.c | 10 ++++++++-- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/share/man/man4/md.4 b/share/man/man4/md.4 index f13aa4d2f67..ec734a913c1 100644 --- a/share/man/man4/md.4 +++ b/share/man/man4/md.4 @@ -7,7 +7,7 @@ .\" .\" $FreeBSD$ .\" -.Dd November 5, 2017 +.Dd December 7, 2017 .Dt MD 4 .Os .Sh NAME @@ -79,7 +79,8 @@ To create a kernel with a ramdisk or MD file system, your kernel config needs the following options: .Bd -literal -offset indent options MD_ROOT # MD is a potential root device -options MD_ROOT_SIZE=8192 # 8MB ram disk +options MD_ROOT_READONLY # disallow mounting root writeable +options MD_ROOT_SIZE=8192 # 8MB ram disk makeoptions MFS_IMAGE=/h/foo/ARM-MD options ROOTDEVNAME=\\"ufs:md0\\" .Ed diff --git a/sys/conf/NOTES b/sys/conf/NOTES index be9572b5ee5..9d1ec64895b 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -1101,6 +1101,9 @@ options MD_ROOT_SIZE=10 # images of type mfs_root or md_root. options MD_ROOT +# Write-protect the md root device so that it may not be mounted writeable. +options MD_ROOT_READONLY + # Disk quotas are supported when this option is enabled. options QUOTA #enable disk quotas diff --git a/sys/conf/options b/sys/conf/options index 6c109924342..b83926f18a9 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -165,6 +165,7 @@ MAC_STUB opt_dontuse.h MAC_TEST opt_dontuse.h MD_ROOT opt_md.h MD_ROOT_FSTYPE opt_md.h +MD_ROOT_READONLY opt_md.h MD_ROOT_SIZE opt_md.h MFI_DEBUG opt_mfi.h MFI_DECODE_LOG opt_mfi.h diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index ee76b4f54ba..3c4075094a1 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -1791,9 +1791,15 @@ md_preloaded(u_char *image, size_t length, const char *name) sc->start = mdstart_preload; if (name != NULL) strlcpy(sc->file, name, sizeof(sc->file)); -#if defined(MD_ROOT) && !defined(ROOTDEVNAME) - if (sc->unit == 0) +#ifdef MD_ROOT + if (sc->unit == 0) { +#ifndef ROOTDEVNAME rootdevnames[0] = MD_ROOT_FSTYPE ":/dev/md0"; +#endif +#ifdef MD_ROOT_READONLY + sc->flags |= MD_READONLY; +#endif + } #endif mdinit(sc); if (name != NULL) { From 4484c8f5d2985ba3b9a03f178c73dcb7342b3ae1 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Wed, 20 Dec 2017 19:13:55 +0000 Subject: [PATCH 055/115] Return domain, bus, slot, and function for the transport settings in PATH_INQ requests for nvme. Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D13546 --- sys/cam/cam_ccb.h | 5 +++++ sys/dev/nvme/nvme_sim.c | 8 +++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/sys/cam/cam_ccb.h b/sys/cam/cam_ccb.h index 982e2130cac..0942d90124a 100644 --- a/sys/cam/cam_ccb.h +++ b/sys/cam/cam_ccb.h @@ -633,6 +633,11 @@ struct ccb_pathinq_settings_sas { struct ccb_pathinq_settings_nvme { uint32_t nsid; /* Namespace ID for this path */ + uint32_t domain; + uint8_t bus; + uint8_t slot; + uint8_t function; + uint8_t extra; }; #define PATHINQ_SETTINGS_SIZE 128 diff --git a/sys/dev/nvme/nvme_sim.c b/sys/dev/nvme/nvme_sim.c index 775a4827821..de6c79b879e 100644 --- a/sys/dev/nvme/nvme_sim.c +++ b/sys/dev/nvme/nvme_sim.c @@ -182,7 +182,8 @@ nvme_sim_action(struct cam_sim *sim, union ccb *ccb) break; case XPT_PATH_INQ: /* Path routing inquiry */ { - struct ccb_pathinq *cpi = &ccb->cpi; + struct ccb_pathinq *cpi = &ccb->cpi; + device_t dev = ctrlr->dev; /* * NVMe may have multiple LUNs on the same path. Current generation @@ -210,6 +211,11 @@ nvme_sim_action(struct cam_sim *sim, union ccb *ccb) cpi->protocol = PROTO_NVME; cpi->protocol_version = nvme_mmio_read_4(ctrlr, vs); cpi->xport_specific.nvme.nsid = ns->id; + cpi->xport_specific.nvme.domain = pci_get_domain(dev); + cpi->xport_specific.nvme.bus = pci_get_bus(dev); + cpi->xport_specific.nvme.slot = pci_get_slot(dev); + cpi->xport_specific.nvme.function = pci_get_function(dev); + cpi->xport_specific.nvme.extra = 0; cpi->ccb_h.status = CAM_REQ_CMP; break; } From bb0107830db786c368abe1c6bd11e31caea5e735 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Wed, 20 Dec 2017 19:14:05 +0000 Subject: [PATCH 056/115] Add device location wiring to the pci bus. This allows one to specify, for example, that if there's an igb card in bus 12, slot 0, function 0, it should be assigned igb5. If there isn't, or there's one in a different slot, normal numbering rules apply (hinted units are skipped). Adding 'hint.igb.5.at="pci12:0:0"' or 'hint.igb.5.at="pci0:12:0:0"' to /boot/device.hints will accomplish this. The double quotes are important. The kernel only accepts the strings (in shell notation): pci$d:$b:$s:$f and pci$b:$s:$f where $d is the pci domain, $b is the pci bus number, $s is the slot number and $f is the function number. A string compare is done with the current device to avoid another string parser in the kernel. All numbers are unsigned decimal without leading zeros. Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D13546 --- sys/dev/pci/pci.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index 79321945ce8..42cb89547c7 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -123,6 +123,8 @@ static void pci_resume_msi(device_t dev); static void pci_resume_msix(device_t dev); static int pci_remap_intr_method(device_t bus, device_t dev, u_int irq); +static void pci_hint_device_unit(device_t acdev, device_t child, + const char *name, int *unitp); static int pci_get_id_method(device_t dev, device_t child, enum pci_id_type type, uintptr_t *rid); @@ -162,6 +164,7 @@ static device_method_t pci_methods[] = { DEVMETHOD(bus_child_detached, pci_child_detached), DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method), DEVMETHOD(bus_child_location_str, pci_child_location_str_method), + DEVMETHOD(bus_hint_device_unit, pci_hint_device_unit), DEVMETHOD(bus_remap_intr, pci_remap_intr_method), DEVMETHOD(bus_suspend_child, pci_suspend_child), DEVMETHOD(bus_resume_child, pci_resume_child), @@ -4220,6 +4223,31 @@ pci_detach(device_t dev) return (device_delete_children(dev)); } +static void +pci_hint_device_unit(device_t dev, device_t child, const char *name, int *unitp) +{ + int line, unit; + const char *at; + char me1[24], me2[32]; + uint8_t b, s, f; + uint32_t d; + + d = pci_get_domain(child); + b = pci_get_bus(child); + s = pci_get_slot(child); + f = pci_get_function(child); + snprintf(me1, sizeof(me1), "pci%u:%u:%u", b, s, f); + snprintf(me2, sizeof(me2), "pci%u:%u:%u:%u", d, b, s, f); + line = 0; + while (resource_find_dev(&line, name, &unit, "at", NULL) == 0) { + resource_string_value(name, unit, "at", &at); + if (strcmp(at, me1) != 0 && strcmp(at, me2) != 0) + continue; /* No match, try next candidate */ + *unitp = unit; + return; + } +} + static void pci_set_power_child(device_t dev, device_t child, int state) { From 9b94cc2482c41ae7bfebbe0091762f044652ea03 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Wed, 20 Dec 2017 19:14:11 +0000 Subject: [PATCH 057/115] Document new pci device hints wiring. Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D13546 --- share/man/man4/pci.4 | 53 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/share/man/man4/pci.4 b/share/man/man4/pci.4 index 7623a9a907d..2fcf4d846aa 100644 --- a/share/man/man4/pci.4 +++ b/share/man/man4/pci.4 @@ -492,6 +492,59 @@ The value of the tunable is the raw IRQ value to use for the INTx interrupt pin identified by the tunable name. Mapping of IRQ values to platform interrupt sources is machine dependent. .El +.Sh DEVICE WIRING +You can wire the device unit at a given location with device.hints. +Entries of the form +.Va hints...at="pci::" +or +.Va hints...at="pci:::" +will force the driver +.Va name +to probe and attach at unit +.Va unit +for any PCI device found to match the specification, where: +.Bl -tag -width -indent +.It +The domain +.Pq or segment +of the PCI device in decimal. +Defaults to 0 if unspecified +.It +The bus address of the PCI device in decimal. +.It +The slot of the PCI device in decimal. +.It +The function of the PCI device in decimal. +.El +.Pp +The code to do the matching requires an exact string match. +Do not specify the angle brackets +.Pq < > +in the hints file. +Wiring multiple devices to the same +.Va name +and +.Va unit +produces undefined results. +.Ss Examples +Given the following lines in +.Pa /boot/device.hints : +.Cd hint.nvme.3.at="pci6:0:0" +.Cd hint.igb.8.at="pci14:0:0" +If there is a device that supports +.Xr igb 4 +at PCI bus 14 slot 0 function 0, +then it will be assigned igb8 for probe and attach. +Likewise, if there is an +.Xr nvme 4 +card at PCI bus 6 slot 0 function 0, +then it will be assigned nvme3 for probe and attach. +If another type of card is in either of these locations, the name and +unit of that card will be the default names and will be unaffected by +these hints. +If other igb or nvme cards are located elsewhere, they will be +assigned their unit numbers sequentially, skipping the unit numbers +that have 'at' hints. .Sh FILES .Bl -tag -width /dev/pci -compact .It Pa /dev/pci From 4571c92f4b8f44bc746b39d44aa1323c0e55f867 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Wed, 20 Dec 2017 19:14:16 +0000 Subject: [PATCH 058/115] Simplify the code a bit. Replace clumsy for(;;) { if (foo) break; ...} with simpler while (!foo) { ... }. Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D13546 --- sys/dev/acpica/acpi.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sys/dev/acpica/acpi.c b/sys/dev/acpica/acpi.c index cb57f13d6f0..ab907e92274 100644 --- a/sys/dev/acpica/acpi.c +++ b/sys/dev/acpica/acpi.c @@ -1014,10 +1014,7 @@ acpi_hint_device_unit(device_t acdev, device_t child, const char *name, * name to see if one's resources are a subset of this device. */ line = 0; - for (;;) { - if (resource_find_dev(&line, name, &unit, "at", NULL) != 0) - break; - + while (resource_find_dev(&line, name, &unit, "at", NULL) == 0) { /* Must have an "at" for acpi or isa. */ resource_string_value(name, unit, "at", &s); if (!(strcmp(s, "acpi0") == 0 || strcmp(s, "acpi") == 0 || From 5ee9f0508595903f73a5f91f5958feffc63f6261 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Wed, 20 Dec 2017 19:14:20 +0000 Subject: [PATCH 059/115] Add nvme wiring example Add NVMe and MMC/SD card support info. Sponsored by: Netflix --- share/man/man4/scsi.4 | 49 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/share/man/man4/scsi.4 b/share/man/man4/scsi.4 index c4b9cbe9a30..a250fb72a8c 100644 --- a/share/man/man4/scsi.4 +++ b/share/man/man4/scsi.4 @@ -54,13 +54,17 @@ The .Nm subsystem provides a uniform and modular system for the implementation of drivers to control various -.Tn SCSI +.Tn SCSI , +.Tn ATA , +.Tn NMVe , and -.Tn ATA +.Tn MMC / SD devices, and to utilize different -.Tn SCSI +.Tn SCSI , +.Tn ATA , +.Tn NVMe , and -.Tn ATA +.Tn MMC / SD host adapters through host adapter drivers. When the system probes buses, it attaches any devices it finds to the appropriate drivers. @@ -213,6 +217,31 @@ and use the first respective counted instances. These examples can be combined together to allow a peripheral device to be wired to any particular controller, bus, target, and/or unit instance. .Pp +This also works with +.Xr nvme 4 +drives as well. +.Bd -literal -offset indent +hint.nvme.4.at="pci7:0:0" +hint.scbus.10.at="nvme4" +hint.nda.10.at="scbus10" +hint.nda.10.target="1" +hint.nda.10.unit="12" +hint.nda.11.at="scbus10" +hint.nda.11.target="1" +hint.nda.11.unit="2" +.Ed +.Pp +This assigns the NVMe card living at PCI bus 7 to scbus 10 (in PCIe, +slot and function are rarely used and usually 0). +The target for +.Xr nda 4 +devices is always 1. +The unit is the namespace identifier from the drive. +The namespace id 1 is exported as +.Tn nda10 +and namespace id 2 is exported as +.Tn nda11 . +.Pp When you have a mixture of wired down and counted devices then the counting begins with the first non-wired down unit for a particular type. @@ -225,9 +254,11 @@ The system allows common device drivers to work through many different types of adapters. The adapters take requests from the upper layers and do all IO between the -.Tn SCSI +.Tn SCSI , +.Tn ATA , +.Tn NVMe , or -.Tn ATA +.Tn MMC / SD bus and the system. The maximum size of a transfer is governed by the adapter. @@ -322,6 +353,7 @@ for details. .Xr cd 4 , .Xr ch 4 , .Xr da 4 , +.Xr nvme 4 , .Xr pass 4 , .Xr pt 4 , .Xr sa 4 , @@ -351,3 +383,8 @@ The .Tn ATA support was added by .An Alexander Motin Aq Mt mav@FreeBSD.org . +The +.Nm +.Tn NVMe +support was added by +.An Warner Losh Aq Mt imp@FreeBSD.org . From 33d72c30f1d883cd9c9574e4732f2fc48a565bcc Mon Sep 17 00:00:00 2001 From: "Pedro F. Giffuni" Date: Wed, 20 Dec 2017 20:25:28 +0000 Subject: [PATCH 060/115] Revert r327005 - SPDX tags for license similar to BSD-2-Clause. After consultation with SPDX experts and their matching guidelines[1], the licensing doesn't exactly match the BSD-2-Clause. It yet remains to be determined if they are equivalent or if there is a recognized license that matches but it is safer to just revert the tags. Let this also be a reminder that on FreeBSD, SPDX tags are only advisory and have no legal value (but IANAL). Pointyhat to: pfg Thanks to: Rodney Grimes, Gary O'Neall [1] https://spdx.org/spdx-license-list/matching-guidelines --- sbin/ipfw/altq.c | 2 -- sbin/ipfw/dummynet.c | 4 +--- sbin/ipfw/ipfw2.c | 2 -- sbin/ipfw/ipfw2.h | 2 -- sbin/ipfw/ipv6.c | 2 -- sbin/ipfw/main.c | 2 -- sbin/ipfw/nat.c | 2 -- sys/sys/msg.h | 2 -- sys/sys/snoop.h | 2 -- usr.sbin/watch/watch.c | 2 -- 10 files changed, 1 insertion(+), 21 deletions(-) diff --git a/sbin/ipfw/altq.c b/sbin/ipfw/altq.c index a78852cfd63..85f6d719197 100644 --- a/sbin/ipfw/altq.c +++ b/sbin/ipfw/altq.c @@ -1,6 +1,4 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause - * * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich diff --git a/sbin/ipfw/dummynet.c b/sbin/ipfw/dummynet.c index e3a04b389eb..6be21cfb230 100644 --- a/sbin/ipfw/dummynet.c +++ b/sbin/ipfw/dummynet.c @@ -1,6 +1,4 @@ -/*. - * SPDX-License-Identifier: BSD-2-Clause - * +/*- * Codel/FQ_Codel and PIE/FQ_PIE Code: * Copyright (C) 2016 Centre for Advanced Internet Architectures, * Swinburne University of Technology, Melbourne, Australia. diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c index 455669763bf..82cb23a9a5c 100644 --- a/sbin/ipfw/ipfw2.c +++ b/sbin/ipfw/ipfw2.c @@ -1,6 +1,4 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause - * * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich diff --git a/sbin/ipfw/ipfw2.h b/sbin/ipfw/ipfw2.h index 274c0008a95..3fe15e93431 100644 --- a/sbin/ipfw/ipfw2.h +++ b/sbin/ipfw/ipfw2.h @@ -1,6 +1,4 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause - * * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich diff --git a/sbin/ipfw/ipv6.c b/sbin/ipfw/ipv6.c index 5c643ca911e..103e7438c8c 100644 --- a/sbin/ipfw/ipv6.c +++ b/sbin/ipfw/ipv6.c @@ -1,6 +1,4 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause - * * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich diff --git a/sbin/ipfw/main.c b/sbin/ipfw/main.c index 0ea0bb7cb63..a012c4de822 100644 --- a/sbin/ipfw/main.c +++ b/sbin/ipfw/main.c @@ -1,6 +1,4 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause - * * Copyright (c) 2002-2003,2010 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich diff --git a/sbin/ipfw/nat.c b/sbin/ipfw/nat.c index 70a50de947c..aced8ea10ec 100644 --- a/sbin/ipfw/nat.c +++ b/sbin/ipfw/nat.c @@ -1,6 +1,4 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause - * * Copyright (c) 2002-2003 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich diff --git a/sys/sys/msg.h b/sys/sys/msg.h index d2e9f756d5d..c2bb2d25440 100644 --- a/sys/sys/msg.h +++ b/sys/sys/msg.h @@ -6,8 +6,6 @@ * * Author: Daniel Boulet * - * SPDX-License-Identifier: BSD-2-Clause - * * Copyright 1993 Daniel Boulet and RTMX Inc. * * This system call was implemented by Daniel Boulet under contract from RTMX. diff --git a/sys/sys/snoop.h b/sys/sys/snoop.h index 1a5e117976f..b25aa494ff8 100644 --- a/sys/sys/snoop.h +++ b/sys/sys/snoop.h @@ -1,6 +1,4 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause - * * Copyright (c) 1995 Ugen J.S.Antsilevich * * Redistribution and use in source forms, with and without modification, diff --git a/usr.sbin/watch/watch.c b/usr.sbin/watch/watch.c index 3585d4555b1..85583882ea4 100644 --- a/usr.sbin/watch/watch.c +++ b/usr.sbin/watch/watch.c @@ -1,6 +1,4 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause - * * Copyright (c) 1995 Ugen J.S.Antsilevich * * Redistribution and use in source forms, with and without modification, From 5d83601f1fc1946c1c5969283f3f20dd2eac39bf Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Wed, 20 Dec 2017 20:41:51 +0000 Subject: [PATCH 061/115] Remove arm-specific implementations of atomic_load/store_xxx() now that they are provided by sys/atomic_common.h. --- sys/arm/include/atomic.h | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/sys/arm/include/atomic.h b/sys/arm/include/atomic.h index 3c7b145813a..25c663bc251 100644 --- a/sys/arm/include/atomic.h +++ b/sys/arm/include/atomic.h @@ -55,32 +55,6 @@ #include #endif /* Arch >= v6 */ -static __inline int -atomic_load_32(volatile uint32_t *v) -{ - - return (*v); -} - -static __inline void -atomic_store_32(volatile uint32_t *dst, uint32_t src) -{ - *dst = src; -} - -static __inline int -atomic_load_long(volatile u_long *v) -{ - - return (*v); -} - -static __inline void -atomic_store_long(volatile u_long *dst, u_long src) -{ - *dst = src; -} - #define atomic_clear_ptr atomic_clear_32 #define atomic_clear_acq_ptr atomic_clear_acq_32 #define atomic_clear_rel_ptr atomic_clear_rel_32 @@ -94,7 +68,6 @@ atomic_store_long(volatile u_long *dst, u_long src) #define atomic_cmpset_acq_ptr atomic_cmpset_acq_32 #define atomic_cmpset_rel_ptr atomic_cmpset_rel_32 #define atomic_load_acq_ptr atomic_load_acq_32 -#define atomic_store_ptr atomic_store_32 #define atomic_store_rel_ptr atomic_store_rel_32 #define atomic_swap_ptr atomic_swap_32 #define atomic_readandclear_ptr atomic_readandclear_32 From df1e0a51ec335f79ae983766c65dc0871e18be1b Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Wed, 20 Dec 2017 20:46:12 +0000 Subject: [PATCH 062/115] Restore the ability to use EARLY_PRINTF support during most of initarm(). The real kernel page tables are set up much earlier in initarm() now than they were when early printf support was first added, and they end up undoing the mapping made in locore.S for early printf support. This re-adds the mapping after switching to the new/real kernel page tables, making early printf work again right after switching to them. --- sys/arm/arm/machdep.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sys/arm/arm/machdep.c b/sys/arm/arm/machdep.c index 8d1584cd8e7..d1aa26c24b4 100644 --- a/sys/arm/arm/machdep.c +++ b/sys/arm/arm/machdep.c @@ -1128,6 +1128,19 @@ initarm(struct arm_boot_params *abp) pmap_set_tex(); pmap_bootstrap_prepare(lastaddr); + /* + * If EARLY_PRINTF support is enabled, we need to re-establish the + * mapping after pmap_bootstrap_prepare() switches to new page tables. + * Note that we can only do the remapping if the VA is outside the + * kernel, now that we have real virtual (not VA=PA) mappings in effect. + * Early printf does not work between the time pmap_set_tex() does + * cp15_prrr_set() and this code remaps the VA. + */ +#if defined(EARLY_PRINTF) && defined(SOCDEV_PA) && defined(SOCDEV_VA) && SOCDEV_VA < KERNBASE + pmap_preboot_map_attr(SOCDEV_PA, SOCDEV_VA, 1024 * 1024, + VM_PROT_READ | VM_PROT_WRITE, VM_MEMATTR_DEVICE); +#endif + /* * Now that proper page tables are installed, call cpu_setup() to enable * instruction and data caches and other chip-specific features. From e3842da22f28b41d10913e5899ff9312d8cc0fe4 Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Wed, 20 Dec 2017 22:17:27 +0000 Subject: [PATCH 063/115] Allow pmap_kremove() to remove 1MB section mappings as well as 4K pages. This will allow it to undo temporary device mappings such as those made with pmap_preboot_map_attr(). Reviewed by: cognet --- sys/arm/arm/pmap-v6.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c index 423c7dbba34..cc21d2f2c2e 100644 --- a/sys/arm/arm/pmap-v6.c +++ b/sys/arm/arm/pmap-v6.c @@ -1310,10 +1310,16 @@ pmap_kenter(vm_offset_t va, vm_paddr_t pa) PMAP_INLINE void pmap_kremove(vm_offset_t va) { + pt1_entry_t *pte1p; pt2_entry_t *pte2p; - pte2p = pt2map_entry(va); - pte2_clear(pte2p); + pte1p = kern_pte1(va); + if (pte1_is_section(pte1_load(pte1p))) { + pte1_clear(pte1p); + } else { + pte2p = pt2map_entry(va); + pte2_clear(pte2p); + } } /* From 68359587f6a241a068bdea9c70157e775f6f1155 Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Wed, 20 Dec 2017 22:19:11 +0000 Subject: [PATCH 064/115] If a temporary mapping is made to support EARLY_PRINTF, undo that mapping after cninit() runs, otherwise we leave a bogus device-memory mapping in userspace VA in the kernel pmap forever. Pointed out by: cognet --- sys/arm/arm/machdep.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sys/arm/arm/machdep.c b/sys/arm/arm/machdep.c index d1aa26c24b4..e33252c1427 100644 --- a/sys/arm/arm/machdep.c +++ b/sys/arm/arm/machdep.c @@ -1204,6 +1204,14 @@ initarm(struct arm_boot_params *abp) platform_gpio_init(); cninit(); + /* + * If we made a mapping for EARLY_PRINTF after pmap_bootstrap_prepare(), + * undo it now that the normal console printf works. + */ +#if defined(EARLY_PRINTF) && defined(SOCDEV_PA) && defined(SOCDEV_VA) && SOCDEV_VA < KERNBASE + pmap_kremove(SOCDEV_VA); +#endif + debugf("initarm: console initialized\n"); debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp); debugf(" boothowto = 0x%08x\n", boothowto); From 788a1230541c6ee28d5d6038dd227ab99af8dd38 Mon Sep 17 00:00:00 2001 From: Eric Joyner Date: Thu, 21 Dec 2017 00:35:14 +0000 Subject: [PATCH 065/115] ixgbe(4): Fix build error on i386. Reported by: markj --- sys/dev/ixgbe/if_ixv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/ixgbe/if_ixv.c b/sys/dev/ixgbe/if_ixv.c index 7114b51b3bb..f0361cfb665 100644 --- a/sys/dev/ixgbe/if_ixv.c +++ b/sys/dev/ixgbe/if_ixv.c @@ -1791,7 +1791,7 @@ ixv_add_stats_sysctls(struct adapter *adapter) CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); - SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx", + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &(txr->tso_tx), "TSO Packets"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &(txr->total_packets), "TX Packets"); From 25ac1dd5c7fb740cadec19794b9e12814c921cda Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Thu, 21 Dec 2017 01:22:36 +0000 Subject: [PATCH 066/115] Don't call tcp_lro_rx() unless hardware verified TCP/UDP csum It seems that tcp_lro_rx() doesn't verify TCP checksums, so if there are bad checksums in the packets caused by invalid data, the invalid data will pass through without errors. This was noticed with the igb driver and a specific internet host: fetch http://www.mpfr.org/mpfr-current/mpfr-3.1.6.tar.xz -o test.bin && sha256 test.bin Would result in a different value sometimes. This ends up making LRO require RXCSUM to be enabled, and RXCSUM to support TCP and UDP checksums. PR: 224346 Reported by: gjb Reviewed by: sbruno Sponsored by: Limelight Networks Differential Revision: https://reviews.freebsd.org/D13561 --- sys/net/iflib.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 370efbb7da7..9e11f56a4da 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -2632,8 +2632,11 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) mt = mf = NULL; } } - if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) + if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC|CSUM_L4_VALID)) == + (CSUM_L4_CALC|CSUM_L4_VALID)) { + if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) continue; + } } #endif if (lro_possible) { From 5eff7c4116172df41f8ca062305562019df7dce6 Mon Sep 17 00:00:00 2001 From: Marius Strobl Date: Thu, 21 Dec 2017 01:27:32 +0000 Subject: [PATCH 067/115] Remove MD atomic_load_{32,64,int,long,ptr}(9) obsolete since the addition of (conflicting) MI ones in r326971. --- sys/sparc64/include/atomic.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sys/sparc64/include/atomic.h b/sys/sparc64/include/atomic.h index e34853b5399..427faf619c9 100644 --- a/sys/sparc64/include/atomic.h +++ b/sys/sparc64/include/atomic.h @@ -258,11 +258,6 @@ atomic_fcmpset_rel_ ## name(volatile ptype p, vtype *ep, vtype s) \ } \ \ static __inline vtype \ -atomic_load_ ## name(volatile ptype p) \ -{ \ - return ((vtype)atomic_cas((p), 0, 0, sz)); \ -} \ -static __inline vtype \ atomic_load_acq_ ## name(volatile ptype p) \ { \ return ((vtype)atomic_cas_acq((p), 0, 0, sz)); \ From 41ec95c47fd3bac8549d0b5d424eaa2b6420e47d Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Thu, 21 Dec 2017 04:21:59 +0000 Subject: [PATCH 068/115] Bump number that's an insane number of devices from 1,000 to 10,000. I have access to machines that are pushing 400 devices. When 1,000 was selected, it was rare to get even 40 or 50 devices. Bump the limit by 10x to keep up with the times. Sponsored by: Netflix --- lib/libdevinfo/devinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libdevinfo/devinfo.c b/lib/libdevinfo/devinfo.c index 98fe6d1f673..bcdb735d154 100644 --- a/lib/libdevinfo/devinfo.c +++ b/lib/libdevinfo/devinfo.c @@ -204,7 +204,7 @@ devinfo_init_devices(int generation) * Stop after a fairly insane number to avoid death in the case * of kernel corruption. */ - for (dev_idx = 0; dev_idx < 1000; dev_idx++) { + for (dev_idx = 0; dev_idx < 10000; dev_idx++) { /* * Get the device information. From 4d21bbd1e7e7d8ac1921d18daf26188332a60980 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Thu, 21 Dec 2017 04:23:00 +0000 Subject: [PATCH 069/115] Provide a nda man page. Add cross referneces. Bump dates. Also, CAM is a storage subsystem, not a SCSI/ATA one these days. Sponsored by: Netflix --- share/man/man4/Makefile | 2 + share/man/man4/ada.4 | 3 +- share/man/man4/da.4 | 3 +- share/man/man4/nda.4 | 87 +++++++++++++++++++++++++++++++++++++++++ share/man/man4/pci.4 | 2 +- share/man/man4/scsi.4 | 5 ++- 6 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 share/man/man4/nda.4 diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile index 06561c1b583..9496d0988c2 100644 --- a/share/man/man4/Makefile +++ b/share/man/man4/Makefile @@ -382,6 +382,7 @@ MAN= aac.4 \ ${_ntb_hw_intel.4} \ ${_ntb_hw_plx.4} \ ${_ntb_transport.4} \ + ${_nda.4} \ ${_if_ntb.4} \ null.4 \ numa.4 \ @@ -808,6 +809,7 @@ _if_wpi.4= if_wpi.4 _ipmi.4= ipmi.4 _io.4= io.4 _linux.4= linux.4 +_nda.4= nda.4 _ndis.4= ndis.4 _nfe.4= nfe.4 _nfsmb.4= nfsmb.4 diff --git a/share/man/man4/ada.4 b/share/man/man4/ada.4 index 58219e31c0a..0d1c0506425 100644 --- a/share/man/man4/ada.4 +++ b/share/man/man4/ada.4 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd June 19, 2015 +.Dd December 20, 2017 .Dt ADA 4 .Os .Sh NAME @@ -157,6 +157,7 @@ ATA device nodes .Xr cam 4 , .Xr da 4 , .Xr mvs 4 , +.Xr nda 4 , .Xr siis 4 .Sh HISTORY The diff --git a/share/man/man4/da.4 b/share/man/man4/da.4 index eb1c09a8ef4..ed820c63775 100644 --- a/share/man/man4/da.4 +++ b/share/man/man4/da.4 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 11, 2015 +.Dd December 20, 2017 .Dt DA 4 .Os .Sh NAME @@ -214,6 +214,7 @@ None. .Xr ada 4 , .Xr cam 4 , .Xr geom 4 , +.Xr nda 4 , .Xr gpart 8 .Sh HISTORY The diff --git a/share/man/man4/nda.4 b/share/man/man4/nda.4 new file mode 100644 index 00000000000..bc1240e2c01 --- /dev/null +++ b/share/man/man4/nda.4 @@ -0,0 +1,87 @@ +.\" Copyright (c) 2017 Netflix, Inc +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd December 20, 2017 +.Dt NDA 4 +.Os +.Sh NAME +.Nm nda +.Nd NVMe Direct Access device driver +.Sh SYNOPSIS +.Cd device nvme +.Cd device scbus +.Sh DESCRIPTION +The +.Nm +driver provides support for direct access devices, implementing the +.Tn NVMe +command protocol, that are attached to the system through a host adapter +supported by the CAM subsystem. +.Sh SYSCTL VARIABLES +The following variables are available as both +.Xr sysctl 8 +variables and +.Xr loader 8 +tunables: +.Bl -tag -width 12 +.It Va kern.cam.nda.sort_io_queue +.Pp +This variable determines whether the software queued entries are +sorted in LBA order or not. +Sorting is almost always a waste of time. +The default is to not sort. +.El +.Pp +The following report per-device settings, and are read-only unless +otherwise indicated. Replace +.Va N +with the device unit number. +.Bl -tag -width 12 +.It Va kern.cam.nda.N.rotating +.Pp +This variable reports whether the storage volume is spinning or +flash. +It's value is hard coded to 0 indicating flash. +.It Va kern.cam.nda.N.unmapped_io +This variable reports whether the +.Nm +driver accepts unmapped I/O for this unit. +.Sh FILES +.Bl -tag -width ".Pa /dev/nda*" -compact +.It Pa /dev/nda* +NVMe storage device nodes +.El +.Sh SEE ALSO +.Xr nvme 4 , +.Xr nvd 4 +.Sh HISTORY +The +.Nm +driver first appeared in +.Fx 12.0 . +.Sh AUTHORS +.An Warner Losh Aq Mt imp@FreeBSD.org diff --git a/share/man/man4/pci.4 b/share/man/man4/pci.4 index 2fcf4d846aa..d1ed7714907 100644 --- a/share/man/man4/pci.4 +++ b/share/man/man4/pci.4 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 8, 2016 +.Dd December 20, 2017 .Dt PCI 4 .Os .Sh NAME diff --git a/share/man/man4/scsi.4 b/share/man/man4/scsi.4 index a250fb72a8c..60f66eca418 100644 --- a/share/man/man4/scsi.4 +++ b/share/man/man4/scsi.4 @@ -24,12 +24,12 @@ .\" SUCH DAMAGE. .\" .\" $FreeBSD$ -.Dd January 15, 2017 +.Dd December 20, 2017 .Dt CAM 4 .Os .Sh NAME .Nm CAM -.Nd Common Access Method SCSI/ATA subsystem +.Nd Common Access Method Storage subsystem .Sh SYNOPSIS .Cd "device scbus" .Cd "device ada" @@ -353,6 +353,7 @@ for details. .Xr cd 4 , .Xr ch 4 , .Xr da 4 , +.Xr nda 4 , .Xr nvme 4 , .Xr pass 4 , .Xr pt 4 , From da9fba5447d254b680a4efbf540919aef3c0455a Mon Sep 17 00:00:00 2001 From: Bruce Evans Date: Thu, 21 Dec 2017 09:17:48 +0000 Subject: [PATCH 070/115] Use resume_cpus() instead of restart_cpus() to resume from ACPI suspension. restart_cpus() worked well enough by accident. Before this set of fixes, resume_cpus() used the same cpuset (started_cpus, meaning CPUs directed to restart) as restart_cpus(). resume_cpus() waited for the wrong cpuset (stopped_cpus) to become empty, but since mixtures of stopped and suspended CPUs are not close to working, stopped_cpus must be empty when resuming so the wait is null -- restart_cpus just allows the other CPUs to restart and returns without waiting. Fix resume_cpus() to wait on a non-wrong cpuset for the ACPI case, and add further kludges to try to keep it working for the XEN case. It was only used for XEN. It waited on suspended_cpus. This works for XEN. However, for ACPI, resuming is a 2-step process. ACPI has already woken up the other CPUs and removed them from suspended_cpus. This fix records the move by putting them in a new cpuset resuming_cpus. Waiting on suspended_cpus would give the same null wait as waiting on stopped_cpus. Wait on resuming_cpus instead. Add a cpuset toresume_cpus to map the CPUs being told to resume to keep this separate from the cpuset started_cpus for mapping the CPUs being told to restart. Mixtures of stopped and suspended/resuming CPUs are still far from working. Describe new and some old cpusets in comments. Add further kludges to cpususpend_handler() to try to avoid breaking it for XEN. XEN doesn't use resumectx(), so it doesn't use the second return path for savectx(), and it goes from the suspended state directly to the restarted state, while ACPI resume goes through the resuming state. Enter the resuming state early for all cases so that resume_cpus can test for being in this state and not have to worry about the intermediate !suspended state for ACPI only. Reviewed by: kib --- sys/kern/subr_smp.c | 9 +++++++-- sys/sys/smp.h | 11 +++++++---- sys/x86/acpica/acpi_wakeup.c | 2 +- sys/x86/x86/mp_x86.c | 19 +++++++++++++++---- 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index 1700cde7a7e..7a3d04caf52 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -351,13 +351,18 @@ generic_restart_cpus(cpuset_t map, u_int type) #if X86 if (type == IPI_SUSPEND) - cpus = &suspended_cpus; + cpus = &resuming_cpus; else #endif cpus = &stopped_cpus; /* signal other cpus to restart */ - CPU_COPY_STORE_REL(&map, &started_cpus); +#if X86 + if (type == IPI_SUSPEND) + CPU_COPY_STORE_REL(&map, &toresume_cpus); + else +#endif + CPU_COPY_STORE_REL(&map, &started_cpus); #if X86 if (!nmi_is_broadcast || nmi_kdb_lock == 0) { diff --git a/sys/sys/smp.h b/sys/sys/smp.h index fe5e4e64ec2..35b068a4e24 100644 --- a/sys/sys/smp.h +++ b/sys/sys/smp.h @@ -155,10 +155,13 @@ struct cpu_group *smp_topo_find(struct cpu_group *top, int cpu); extern void (*cpustop_restartfunc)(void); extern int smp_cpus; -extern volatile cpuset_t started_cpus; -extern volatile cpuset_t stopped_cpus; -extern volatile cpuset_t suspended_cpus; -extern cpuset_t hlt_cpus_mask; +/* The suspend/resume cpusets are x86 only, but minimize ifdefs. */ +extern volatile cpuset_t resuming_cpus; /* woken up cpus in suspend pen */ +extern volatile cpuset_t started_cpus; /* cpus to let out of stop pen */ +extern volatile cpuset_t stopped_cpus; /* cpus in stop pen */ +extern volatile cpuset_t suspended_cpus; /* cpus [near] sleeping in susp pen */ +extern volatile cpuset_t toresume_cpus; /* cpus to let out of suspend pen */ +extern cpuset_t hlt_cpus_mask; /* XXX 'mask' is detail in old impl */ extern cpuset_t logical_cpus_mask; #endif /* SMP */ diff --git a/sys/x86/acpica/acpi_wakeup.c b/sys/x86/acpica/acpi_wakeup.c index 853ead4a90a..a1c40fef718 100644 --- a/sys/x86/acpica/acpi_wakeup.c +++ b/sys/x86/acpica/acpi_wakeup.c @@ -310,7 +310,7 @@ acpi_wakeup_machdep(struct acpi_softc *sc, int state, int sleep_result, #ifdef SMP if (!CPU_EMPTY(&suspcpus)) - restart_cpus(suspcpus); + resume_cpus(suspcpus); #endif mca_resume(); #ifdef __amd64__ diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c index bc2f3526369..a19609c2887 100644 --- a/sys/x86/x86/mp_x86.c +++ b/sys/x86/x86/mp_x86.c @@ -124,6 +124,9 @@ struct cpu_ops cpu_ops; static volatile cpuset_t ipi_stop_nmi_pending; +volatile cpuset_t resuming_cpus; +volatile cpuset_t toresume_cpus; + /* used to hold the AP's until we are ready to release them */ struct mtx ap_boot_mtx; @@ -1379,6 +1382,13 @@ cpususpend_handler(void) #endif wbinvd(); CPU_SET_ATOMIC(cpu, &suspended_cpus); + /* + * Hack for xen, which does not use resumectx() so never + * uses the next clause: set resuming_cpus early so that + * resume_cpus() can wait on the same bitmap for acpi and + * xen. resuming_cpus now means eventually_resumable_cpus. + */ + CPU_SET_ATOMIC(cpu, &resuming_cpus); } else { #ifdef __amd64__ fpuresume(susppcbs[cpu]->sp_fpususpend); @@ -1390,12 +1400,12 @@ cpususpend_handler(void) PCPU_SET(switchtime, 0); PCPU_SET(switchticks, ticks); - /* Indicate that we are resumed */ + /* Indicate that we are resuming */ CPU_CLR_ATOMIC(cpu, &suspended_cpus); } - /* Wait for resume */ - while (!CPU_ISSET(cpu, &started_cpus)) + /* Wait for resume directive */ + while (!CPU_ISSET(cpu, &toresume_cpus)) ia32_pause(); #ifdef __i386__ @@ -1416,8 +1426,9 @@ cpususpend_handler(void) lapic_setup(0); /* Indicate that we are resumed */ + CPU_CLR_ATOMIC(cpu, &resuming_cpus); CPU_CLR_ATOMIC(cpu, &suspended_cpus); - CPU_CLR_ATOMIC(cpu, &started_cpus); + CPU_CLR_ATOMIC(cpu, &toresume_cpus); } From 4be29fb3c55b8b352e7d02ff93df793b2133dffe Mon Sep 17 00:00:00 2001 From: Ed Schouten Date: Thu, 21 Dec 2017 09:21:40 +0000 Subject: [PATCH 071/115] Make truss work for CloudABI executables on i386. The system call convention is different from i386 binaries running on FreeBSD/amd64, but this is not noticeable by executables. On FreeBSD/amd64, the vDSO already does padding of arguments and return values to 64-bit values. On i386, it does not, meaning that system call return values are simply stored in registers. --- usr.bin/truss/Makefile | 1 + usr.bin/truss/i386-cloudabi32.c | 98 +++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 usr.bin/truss/i386-cloudabi32.c diff --git a/usr.bin/truss/Makefile b/usr.bin/truss/Makefile index ad4f0f5ac30..7eddb012a64 100644 --- a/usr.bin/truss/Makefile +++ b/usr.bin/truss/Makefile @@ -18,6 +18,7 @@ ABIS+= cloudabi64 .endif .if ${MACHINE_CPUARCH} == "i386" ABIS+= i386-linux +ABIS+= cloudabi32 .endif .if ${MACHINE_CPUARCH} == "amd64" ABIS+= amd64-linux diff --git a/usr.bin/truss/i386-cloudabi32.c b/usr.bin/truss/i386-cloudabi32.c new file mode 100644 index 00000000000..2d8c1128ec2 --- /dev/null +++ b/usr.bin/truss/i386-cloudabi32.c @@ -0,0 +1,98 @@ +/*- + * Copyright (c) 2015-2017 Nuxi, https://nuxi.nl/ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include + +#include +#include +#include + +#include "truss.h" + +static int +i386_cloudabi32_fetch_args(struct trussinfo *trussinfo, unsigned int narg) +{ + struct current_syscall *cs; + struct ptrace_io_desc iorequest; + struct reg regs; + lwpid_t tid; + + if (narg > 0) { + /* Fetch registers, containing the stack pointer. */ + tid = trussinfo->curthread->tid; + if (ptrace(PT_GETREGS, tid, (caddr_t)®s, 0) == -1) { + fprintf(trussinfo->outfile, + "-- CANNOT READ REGISTERS --\n"); + return (-1); + } + + /* Fetch arguments. */ + cs = &trussinfo->curthread->cs; + iorequest.piod_op = PIOD_READ_D; + iorequest.piod_offs = (void **)regs.r_esp + 1; + iorequest.piod_addr = cs->args; + iorequest.piod_len = sizeof(cs->args[0]) * narg; + if (ptrace(PT_IO, tid, (caddr_t)&iorequest, 0) == -1 || + iorequest.piod_len == 0) + return (-1); + } + return (0); +} + +static int +i386_cloudabi32_fetch_retval(struct trussinfo *trussinfo, long *retval, + int *errorp) +{ + struct reg regs; + lwpid_t tid; + + tid = trussinfo->curthread->tid; + if (ptrace(PT_GETREGS, tid, (caddr_t)®s, 0) == -1) { + fprintf(trussinfo->outfile, "-- CANNOT READ REGISTERS --\n"); + return (-1); + } + + retval[0] = regs.r_eax; + retval[1] = regs.r_edx; + *errorp = (regs.r_eflags & PSL_C) != 0; + return (0); +} + +static struct procabi i386_cloudabi32 = { + "CloudABI ELF32", + SYSDECODE_ABI_CLOUDABI32, + i386_cloudabi32_fetch_args, + i386_cloudabi32_fetch_retval, + STAILQ_HEAD_INITIALIZER(i386_cloudabi32.extra_syscalls), + { NULL } +}; + +PROCABI(i386_cloudabi32); From 5e6e2d38c13308a1848ea1afe40f3b19912139a2 Mon Sep 17 00:00:00 2001 From: Michael Zhilin Date: Thu, 21 Dec 2017 12:21:35 +0000 Subject: [PATCH 072/115] [boot/efi] scan all display modes rather than sequential try-fail way This patch allows to scan all display modes in boot1 as loader does. Before system tried to select optimal display mode by sequential scan of modes and if error then stop scanning. This way is not good, because if mode N is not present, mode N+1 may exist. In loader we use conout->Mode->MaxMode to identify maximum number of modes. This commit is to use same way in boot1 as in loader. Reported by: Andrey Pustovetov Reviewed by: tsoome Differential Revision: https://reviews.freebsd.org/D13541 --- stand/efi/boot1/boot1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stand/efi/boot1/boot1.c b/stand/efi/boot1/boot1.c index 7b51f03e14a..f00c058e37d 100644 --- a/stand/efi/boot1/boot1.c +++ b/stand/efi/boot1/boot1.c @@ -430,10 +430,10 @@ efi_main(EFI_HANDLE Ximage, EFI_SYSTEM_TABLE *Xsystab) conout = ST->ConOut; conout->Reset(conout, TRUE); max_dim = best_mode = 0; - for (i = 0; ; i++) { + for (i = 0; i < conout->Mode->MaxMode; i++) { status = conout->QueryMode(conout, i, &cols, &rows); if (EFI_ERROR(status)) - break; + continue; if (cols * rows > max_dim) { max_dim = cols * rows; best_mode = i; From 426b6bd53c678e5c60ddf1f84a75aa3c07377fa3 Mon Sep 17 00:00:00 2001 From: Navdeep Parhar Date: Thu, 21 Dec 2017 15:19:43 +0000 Subject: [PATCH 073/115] cxgbe(4): Read the MFG diags version from the VPD and make it available in the sysctl MIB. MFC after: 1 week Sponsored by: Chelsio Communications --- sys/dev/cxgbe/common/common.h | 4 ++- sys/dev/cxgbe/common/t4_hw.c | 65 +++++++++++++++++++++++------------ sys/dev/cxgbe/t4_main.c | 5 ++- 3 files changed, 50 insertions(+), 24 deletions(-) diff --git a/sys/dev/cxgbe/common/common.h b/sys/dev/cxgbe/common/common.h index 6adca041665..9de636ce171 100644 --- a/sys/dev/cxgbe/common/common.h +++ b/sys/dev/cxgbe/common/common.h @@ -44,6 +44,7 @@ enum { EC_LEN = 16, /* E/C length */ ID_LEN = 16, /* ID length */ PN_LEN = 16, /* Part Number length */ + MD_LEN = 16, /* MFG diags version length */ MACADDR_LEN = 12, /* MAC Address length */ }; @@ -258,6 +259,7 @@ struct vpd_params { u8 id[ID_LEN + 1]; u8 pn[PN_LEN + 1]; u8 na[MACADDR_LEN + 1]; + u8 md[MD_LEN + 1]; }; struct pci_params { @@ -590,7 +592,7 @@ int t4_get_vpd_version(struct adapter *adapter, u32 *vers); int t4_get_version_info(struct adapter *adapter); int t4_init_hw(struct adapter *adapter, u32 fw_params); const struct chip_params *t4_get_chip_params(int chipid); -int t4_prep_adapter(struct adapter *adapter, u8 *buf); +int t4_prep_adapter(struct adapter *adapter, u32 *buf); int t4_shutdown_adapter(struct adapter *adapter); int t4_init_devlog_params(struct adapter *adapter, int fw_attach); int t4_init_sge_params(struct adapter *adapter); diff --git a/sys/dev/cxgbe/common/t4_hw.c b/sys/dev/cxgbe/common/t4_hw.c index 77ceb675e9c..40f1c686ddb 100644 --- a/sys/dev/cxgbe/common/t4_hw.c +++ b/sys/dev/cxgbe/common/t4_hw.c @@ -2664,13 +2664,16 @@ void t4_get_regs(struct adapter *adap, u8 *buf, size_t buf_size) } /* - * Partial EEPROM Vital Product Data structure. Includes only the ID and - * VPD-R sections. + * Partial EEPROM Vital Product Data structure. The VPD starts with one ID + * header followed by one or more VPD-R sections, each with its own header. */ struct t4_vpd_hdr { u8 id_tag; u8 id_len[2]; u8 id_data[ID_LEN]; +}; + +struct t4_vpdr_hdr { u8 vpdr_tag; u8 vpdr_len[2]; }; @@ -2905,32 +2908,43 @@ int t4_seeprom_wp(struct adapter *adapter, int enable) /** * get_vpd_keyword_val - Locates an information field keyword in the VPD - * @v: Pointer to buffered vpd data structure + * @vpd: Pointer to buffered vpd data structure * @kw: The keyword to search for + * @region: VPD region to search (starting from 0) * * Returns the value of the information field keyword or * -ENOENT otherwise. */ -static int get_vpd_keyword_val(const struct t4_vpd_hdr *v, const char *kw) +static int get_vpd_keyword_val(const u8 *vpd, const char *kw, int region) { - int i; - unsigned int offset , len; - const u8 *buf = (const u8 *)v; - const u8 *vpdr_len = &v->vpdr_len[0]; - offset = sizeof(struct t4_vpd_hdr); - len = (u16)vpdr_len[0] + ((u16)vpdr_len[1] << 8); + int i, tag; + unsigned int offset, len; + const struct t4_vpdr_hdr *vpdr; - if (len + sizeof(struct t4_vpd_hdr) > VPD_LEN) { + offset = sizeof(struct t4_vpd_hdr); + vpdr = (const void *)(vpd + offset); + tag = vpdr->vpdr_tag; + len = (u16)vpdr->vpdr_len[0] + ((u16)vpdr->vpdr_len[1] << 8); + while (region--) { + offset += sizeof(struct t4_vpdr_hdr) + len; + vpdr = (const void *)(vpd + offset); + if (++tag != vpdr->vpdr_tag) + return -ENOENT; + len = (u16)vpdr->vpdr_len[0] + ((u16)vpdr->vpdr_len[1] << 8); + } + offset += sizeof(struct t4_vpdr_hdr); + + if (offset + len > VPD_LEN) { return -ENOENT; } for (i = offset; i + VPD_INFO_FLD_HDR_SIZE <= offset + len;) { - if(memcmp(buf + i , kw , 2) == 0){ + if (memcmp(vpd + i , kw , 2) == 0){ i += VPD_INFO_FLD_HDR_SIZE; return i; } - i += VPD_INFO_FLD_HDR_SIZE + buf[i+2]; + i += VPD_INFO_FLD_HDR_SIZE + vpd[i+2]; } return -ENOENT; @@ -2946,18 +2960,18 @@ static int get_vpd_keyword_val(const struct t4_vpd_hdr *v, const char *kw) * Reads card parameters stored in VPD EEPROM. */ static int get_vpd_params(struct adapter *adapter, struct vpd_params *p, - u8 *vpd) + u32 *buf) { int i, ret, addr; - int ec, sn, pn, na; + int ec, sn, pn, na, md; u8 csum; - const struct t4_vpd_hdr *v; + const u8 *vpd = (const u8 *)buf; /* * Card information normally starts at VPD_BASE but early cards had * it at 0. */ - ret = t4_seeprom_read(adapter, VPD_BASE, (u32 *)(vpd)); + ret = t4_seeprom_read(adapter, VPD_BASE, buf); if (ret) return (ret); @@ -2971,14 +2985,13 @@ static int get_vpd_params(struct adapter *adapter, struct vpd_params *p, addr = *vpd == CHELSIO_VPD_UNIQUE_ID ? VPD_BASE : VPD_BASE_OLD; for (i = 0; i < VPD_LEN; i += 4) { - ret = t4_seeprom_read(adapter, addr + i, (u32 *)(vpd + i)); + ret = t4_seeprom_read(adapter, addr + i, buf++); if (ret) return ret; } - v = (const struct t4_vpd_hdr *)vpd; #define FIND_VPD_KW(var,name) do { \ - var = get_vpd_keyword_val(v , name); \ + var = get_vpd_keyword_val(vpd, name, 0); \ if (var < 0) { \ CH_ERR(adapter, "missing VPD keyword " name "\n"); \ return -EINVAL; \ @@ -3001,7 +3014,7 @@ static int get_vpd_params(struct adapter *adapter, struct vpd_params *p, FIND_VPD_KW(na, "NA"); #undef FIND_VPD_KW - memcpy(p->id, v->id_data, ID_LEN); + memcpy(p->id, vpd + offsetof(struct t4_vpd_hdr, id_data), ID_LEN); strstrip(p->id); memcpy(p->ec, vpd + ec, EC_LEN); strstrip(p->ec); @@ -3015,6 +3028,14 @@ static int get_vpd_params(struct adapter *adapter, struct vpd_params *p, memcpy(p->na, vpd + na, min(i, MACADDR_LEN)); strstrip((char *)p->na); + md = get_vpd_keyword_val(vpd, "VF", 1); + if (md < 0) { + snprintf(p->md, sizeof(p->md), "unknown"); + } else { + i = vpd[md - VPD_INFO_FLD_HDR_SIZE + 2]; + memcpy(p->md, vpd + md, min(i, MD_LEN)); + } + return 0; } @@ -7997,7 +8018,7 @@ const struct chip_params *t4_get_chip_params(int chipid) * values for some adapter tunables, take PHYs out of reset, and * initialize the MDIO interface. */ -int t4_prep_adapter(struct adapter *adapter, u8 *buf) +int t4_prep_adapter(struct adapter *adapter, u32 *buf) { int ret; uint16_t device_id; diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index e701b74db51..467421a4afa 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -836,7 +836,7 @@ t4_attach(device_t dev) struct make_dev_args mda; struct intrs_and_queues iaq; struct sge *s; - uint8_t *buf; + uint32_t *buf; #ifdef TCP_OFFLOAD int ofld_rqidx, ofld_tqidx; #endif @@ -5127,6 +5127,9 @@ t4_sysctls(struct adapter *sc) SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "ec", CTLFLAG_RD, sc->params.vpd.ec, 0, "engineering change"); + SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "md_version", + CTLFLAG_RD, sc->params.vpd.md, 0, "manufacturing diags version"); + SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "na", CTLFLAG_RD, sc->params.vpd.na, 0, "network address"); From c844f14e37e57b9f259fb60423456b297b3951e1 Mon Sep 17 00:00:00 2001 From: "Pedro F. Giffuni" Date: Thu, 21 Dec 2017 16:19:10 +0000 Subject: [PATCH 074/115] patch: rejname[] is also -r option buffer, and should be PATH_MAX. Obtained from: OpenBSD (CVS 1.64) --- usr.bin/patch/patch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr.bin/patch/patch.c b/usr.bin/patch/patch.c index 822610671b4..b52919fe046 100644 --- a/usr.bin/patch/patch.c +++ b/usr.bin/patch/patch.c @@ -112,7 +112,7 @@ static bool reverse_flag_specified = false; static bool Vflag = false; /* buffer holding the name of the rejected patch file. */ -static char rejname[NAME_MAX + 1]; +static char rejname[PATH_MAX]; /* how many input lines have been irretractibly output */ static LINENUM last_frozen_line = 0; From 76df519fe099c90ba2188cd1ca6846181b24b4b2 Mon Sep 17 00:00:00 2001 From: "Pedro F. Giffuni" Date: Thu, 21 Dec 2017 16:25:33 +0000 Subject: [PATCH 075/115] patch: further cleanup to git-style diffs. Fix adding and removing files with git-style a/ b/ diffs: only skip six letters if they actually match "--- a/" and "+++ b/" instead of laxer checks. Obtained from: OpenBSD (CVS 1.59) --- usr.bin/patch/pch.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/usr.bin/patch/pch.c b/usr.bin/patch/pch.c index d9087bb3239..0b583ea7eb3 100644 --- a/usr.bin/patch/pch.c +++ b/usr.bin/patch/pch.c @@ -311,14 +311,16 @@ intuit_diff_type(void) &names[OLD_FILE].exists, strippath); else if (strnEQ(s, "--- ", 4)) { size_t off = 4; - if (piece_of_git && strippath == 957) + if (piece_of_git && strippath == 957 && + strnEQ(s, "--- a/", 6)) off = 6; names[NEW_FILE].path = fetchname(s + off, &names[NEW_FILE].exists, strippath); } else if (strnEQ(s, "+++ ", 4)) { /* pretend it is the old name */ size_t off = 4; - if (piece_of_git && strippath == 957) + if (piece_of_git && strippath == 957 && + strnEQ(s, "+++ b/", 6)) off = 6; names[OLD_FILE].path = fetchname(s + off, &names[OLD_FILE].exists, strippath); From 9e33a616939fcff87f7539e3c41323deca5c74ce Mon Sep 17 00:00:00 2001 From: Tycho Nightingale Date: Thu, 21 Dec 2017 18:30:11 +0000 Subject: [PATCH 076/115] Recognize a pending virtual interrupt while emulating the halt instruction. Reviewed by: grehan, rgrimes Sponsored by: Dell EMC Isilon Differential Revision: https://reviews.freebsd.org/D13573 --- sys/amd64/vmm/intel/vmx.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index e563bed6212..f803862079c 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -3174,8 +3174,28 @@ vmx_pending_intr(struct vlapic *vlapic, int *vecptr) pir_desc = vlapic_vtx->pir_desc; pending = atomic_load_acq_long(&pir_desc->pending); - if (!pending) - return (0); /* common case */ + if (!pending) { + /* + * While a virtual interrupt may have already been + * processed the actual delivery maybe pending the + * interruptibility of the guest. Recognize a pending + * interrupt by reevaluating virtual interrupts + * following Section 29.2.1 in the Intel SDM Volume 3. + */ + uint64_t val; + uint8_t rvi, ppr; + + vmx_getreg(vlapic_vtx->vmx, vlapic->vcpuid, + VMCS_IDENT(VMCS_GUEST_INTR_STATUS), &val); + rvi = val & APIC_TPR_INT; + lapic = vlapic->apic_page; + ppr = lapic->ppr & APIC_TPR_INT; + if (rvi > ppr) { + return (1); + } + + return (0); + } /* * If there is an interrupt pending then it will be recognized only @@ -3185,7 +3205,7 @@ vmx_pending_intr(struct vlapic *vlapic, int *vecptr) * interrupt will be recognized. */ lapic = vlapic->apic_page; - ppr = lapic->ppr & 0xf0; + ppr = lapic->ppr & APIC_TPR_INT; if (ppr == 0) return (1); @@ -3195,7 +3215,7 @@ vmx_pending_intr(struct vlapic *vlapic, int *vecptr) for (i = 3; i >= 0; i--) { pirval = pir_desc->pir[i]; if (pirval != 0) { - vpr = (i * 64 + flsl(pirval) - 1) & 0xf0; + vpr = (i * 64 + flsl(pirval) - 1) & APIC_TPR_INT; return (vpr > ppr); } } From 33860097371c3a35cb1aabf29b1af62c096c4a8f Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Thu, 21 Dec 2017 18:51:47 +0000 Subject: [PATCH 077/115] Implement "-p dev" to print the path to the given device back to the nexus. With redirection, could also be used to test if the device exists in the device tree. Sponsored by: Netflix --- usr.sbin/devinfo/devinfo.8 | 4 +++ usr.sbin/devinfo/devinfo.c | 51 ++++++++++++++++++++++++++++++++------ 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/usr.sbin/devinfo/devinfo.8 b/usr.sbin/devinfo/devinfo.8 index a1743fff385..876466511cd 100644 --- a/usr.sbin/devinfo/devinfo.8 +++ b/usr.sbin/devinfo/devinfo.8 @@ -38,6 +38,8 @@ .Op Fl rv .Nm .Fl u +.Nm +.Fl p dev .Sh DESCRIPTION The .Nm @@ -62,6 +64,8 @@ the IRQ consumers together. Display all devices in the driver tree, not just those that are attached or busy. Without this flag, only those devices that have attached are reported. +.It Fl p dev +Display the path of dev back to the root of the device tree. .El .Sh SEE ALSO .Xr systat 1 , diff --git a/usr.sbin/devinfo/devinfo.c b/usr.sbin/devinfo/devinfo.c index 68804bc6ca6..5bc97fe8ace 100644 --- a/usr.sbin/devinfo/devinfo.c +++ b/usr.sbin/devinfo/devinfo.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include "devinfo.h" @@ -196,15 +197,47 @@ print_rman(struct devinfo_rman *rman, void *arg __unused) return(0); } +static void __dead2 +usage(void) +{ + fprintf(stderr, "%s\n%s\n%s\n", + "usage: devinfo [-rv]", + " devinfo -u", + " devifno -p dev"); + exit(1); +} + + +static int +print_path(struct devinfo_dev *dev, void *xname) +{ + const char *name = xname; + int rv; + + if (strcmp(dev->dd_name, name) == 0) { + printf("%s", dev->dd_name); + return (1); + } + + rv = devinfo_foreach_device_child(dev, print_path, xname); + if (rv == 1) + printf(" %s", dev->dd_name[0] ? dev->dd_name : "unknown"); + return (rv); +} + int main(int argc, char *argv[]) { struct devinfo_dev *root; int c, uflag; + char *path; uflag = 0; - while ((c = getopt(argc, argv, "ruv")) != -1) { + while ((c = getopt(argc, argv, "p:ruv")) != -1) { switch(c) { + case 'p': + path = optarg; + break; case 'r': rflag++; break; @@ -215,21 +248,25 @@ main(int argc, char *argv[]) vflag++; break; default: - fprintf(stderr, "%s\n%s\n", - "usage: devinfo [-rv]", - " devinfo -u"); - exit(1); + usage(); } } + if (path && (rflag || uflag)) + usage(); + if (devinfo_init()) err(1, "devinfo_init"); if ((root = devinfo_handle_to_device(DEVINFO_ROOT_DEVICE)) == NULL) errx(1, "can't find root device"); - /* print resource usage? */ - if (uflag) { + if (path) { + if (devinfo_foreach_device_child(root, print_path, (void *)path) == 0) + errx(1, "%s: Not found", path); + printf("\n"); + } else if (uflag) { + /* print resource usage? */ devinfo_foreach_rman(print_rman, NULL); } else { /* print device hierarchy */ From 3ca6eb9faf8567ffc6aa107d0e752ea1324c28a8 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Thu, 21 Dec 2017 18:58:14 +0000 Subject: [PATCH 078/115] Fix markup and bump .Dd. --- usr.sbin/devinfo/devinfo.8 | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/usr.sbin/devinfo/devinfo.8 b/usr.sbin/devinfo/devinfo.8 index 876466511cd..7b264d903d5 100644 --- a/usr.sbin/devinfo/devinfo.8 +++ b/usr.sbin/devinfo/devinfo.8 @@ -27,7 +27,7 @@ .\" .\" $FreeBSD$ .\" -.Dd November 28, 2005 +.Dd December 21, 2017 .Dt DEVINFO 8 .Os .Sh NAME @@ -39,7 +39,7 @@ .Nm .Fl u .Nm -.Fl p dev +.Fl p Ar dev .Sh DESCRIPTION The .Nm @@ -64,8 +64,10 @@ the IRQ consumers together. Display all devices in the driver tree, not just those that are attached or busy. Without this flag, only those devices that have attached are reported. -.It Fl p dev -Display the path of dev back to the root of the device tree. +.It Fl p Ar dev +Display the path of +.Ar dev +back to the root of the device tree. .El .Sh SEE ALSO .Xr systat 1 , From b68de8941f7be489a320b9d91ec61852d74f4f9d Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Thu, 21 Dec 2017 19:19:43 +0000 Subject: [PATCH 079/115] When -v is specified with -p dev, print the same verbose output as when listing the whole tree. The list, however, is from the requested device to the root (so it backwards from the normal tree). Sponsored by: Netflix --- usr.sbin/devinfo/devinfo.8 | 3 +- usr.sbin/devinfo/devinfo.c | 64 +++++++++++++++++++++++--------------- 2 files changed, 41 insertions(+), 26 deletions(-) diff --git a/usr.sbin/devinfo/devinfo.8 b/usr.sbin/devinfo/devinfo.8 index 7b264d903d5..72fd73dba82 100644 --- a/usr.sbin/devinfo/devinfo.8 +++ b/usr.sbin/devinfo/devinfo.8 @@ -39,7 +39,7 @@ .Nm .Fl u .Nm -.Fl p Ar dev +.Fl p Ar dev Op Fl v .Sh DESCRIPTION The .Nm @@ -64,6 +64,7 @@ the IRQ consumers together. Display all devices in the driver tree, not just those that are attached or busy. Without this flag, only those devices that have attached are reported. +This flag also displays verbose information about each device. .It Fl p Ar dev Display the path of .Ar dev diff --git a/usr.sbin/devinfo/devinfo.c b/usr.sbin/devinfo/devinfo.c index 5bc97fe8ace..c4ec2eeccfd 100644 --- a/usr.sbin/devinfo/devinfo.c +++ b/usr.sbin/devinfo/devinfo.c @@ -131,6 +131,22 @@ print_device_rman_resources(struct devinfo_rman *rman, void *arg) return(0); } +static void +print_dev(struct devinfo_dev *dev) +{ + + printf("%s", dev->dd_name[0] ? dev->dd_name : "unknown"); + if (vflag && *dev->dd_pnpinfo) + printf(" pnpinfo %s", dev->dd_pnpinfo); + if (vflag && *dev->dd_location) + printf(" at %s", dev->dd_location); + if (!(dev->dd_flags & DF_ENABLED)) + printf(" (disabled)"); + else if (dev->dd_flags & DF_SUSPENDED) + printf(" (suspended)"); +} + + /* * Print information about a device. */ @@ -144,15 +160,7 @@ print_device(struct devinfo_dev *dev, void *arg) indent = (int)(intptr_t)arg; for (i = 0; i < indent; i++) printf(" "); - printf("%s", dev->dd_name[0] ? dev->dd_name : "unknown"); - if (vflag && *dev->dd_pnpinfo) - printf(" pnpinfo %s", dev->dd_pnpinfo); - if (vflag && *dev->dd_location) - printf(" at %s", dev->dd_location); - if (!(dev->dd_flags & DF_ENABLED)) - printf(" (disabled)"); - else if (dev->dd_flags & DF_SUSPENDED) - printf(" (suspended)"); + print_dev(dev); printf("\n"); if (rflag) { ia.indent = indent + 4; @@ -197,17 +205,6 @@ print_rman(struct devinfo_rman *rman, void *arg __unused) return(0); } -static void __dead2 -usage(void) -{ - fprintf(stderr, "%s\n%s\n%s\n", - "usage: devinfo [-rv]", - " devinfo -u", - " devifno -p dev"); - exit(1); -} - - static int print_path(struct devinfo_dev *dev, void *xname) { @@ -215,22 +212,38 @@ print_path(struct devinfo_dev *dev, void *xname) int rv; if (strcmp(dev->dd_name, name) == 0) { - printf("%s", dev->dd_name); + print_dev(dev); + if (vflag) + printf("\n"); return (1); } rv = devinfo_foreach_device_child(dev, print_path, xname); - if (rv == 1) - printf(" %s", dev->dd_name[0] ? dev->dd_name : "unknown"); + if (rv == 1) { + printf(" "); + print_dev(dev); + if (vflag) + printf("\n"); + } return (rv); } +static void __dead2 +usage(void) +{ + fprintf(stderr, "%s\n%s\n%s\n", + "usage: devinfo [-rv]", + " devinfo -u", + " devifno -p dev [-v]"); + exit(1); +} + int main(int argc, char *argv[]) { struct devinfo_dev *root; int c, uflag; - char *path; + char *path = NULL; uflag = 0; while ((c = getopt(argc, argv, "p:ruv")) != -1) { @@ -264,7 +277,8 @@ main(int argc, char *argv[]) if (path) { if (devinfo_foreach_device_child(root, print_path, (void *)path) == 0) errx(1, "%s: Not found", path); - printf("\n"); + if (!vflag) + printf("\n"); } else if (uflag) { /* print resource usage? */ devinfo_foreach_rman(print_rman, NULL); From 97755e83f548c62a5cc818e83d5428f45277494c Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 21 Dec 2017 23:05:13 +0000 Subject: [PATCH 080/115] Fix build for kernels with SCHED_4BSD. Sponsored by: The FreeBSD Foundation --- sys/net/iflib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 9e11f56a4da..3decf974020 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -5127,7 +5127,7 @@ find_thread(int cpu, int thread_num) static int find_thread(int cpu, int thread_num __unused) { - return cpu_id; + return cpu; } #endif From 95c8838c2a692b506fecf8bb8d380bf4f47cc9ec Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 21 Dec 2017 23:08:10 +0000 Subject: [PATCH 081/115] Fix build for LP64 arches with gcc. gcc complaints that the comparision is always false due to the value range, and the cast does not prevent the analysis. Split the LP64 vs. ILP32 clamping as a workaround. Sponsored by: The FreeBSD Foundation --- sys/fs/nfsclient/nfs_clvnops.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index 27ff9e29470..dcee07e6f7a 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -3461,7 +3461,11 @@ nfs_pathconf(struct vop_pathconf_args *ap) } switch (ap->a_name) { case _PC_LINK_MAX: +#ifdef _LP64 + *ap->a_retval = pc.pc_linkmax; +#else *ap->a_retval = MIN(LONG_MAX, pc.pc_linkmax); +#endif break; case _PC_NAME_MAX: *ap->a_retval = pc.pc_namemax; From 37f48d5abae83362d0df5c6ccbd6076e4d470b8d Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 21 Dec 2017 23:39:00 +0000 Subject: [PATCH 082/115] Fix mips build after introduction of MD definitions of atomic_load_64 and atomic_store_64. The MD definitions are provided for LP64 only, while mips also uses them for 32bit and n32. Only define mips variants for 32bit and n32 and change the syntax to match common definitions. Note that this commit does not fix 32bit asm implementation to follow new KBI, this will be fixed later. The functions are only used for 8 byte ddb accesses so the known bug does not prevent normal kernel operations. Sponsored by: The FreeBSD Foundation --- sys/mips/include/atomic.h | 17 +++++++++-------- sys/mips/mips/db_interface.c | 12 ++++++------ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/sys/mips/include/atomic.h b/sys/mips/include/atomic.h index 668d311d24a..9838a6953ac 100644 --- a/sys/mips/include/atomic.h +++ b/sys/mips/include/atomic.h @@ -342,20 +342,21 @@ atomic_store_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\ ATOMIC_STORE_LOAD(32) ATOMIC_STORE_LOAD(64) #if !defined(__mips_n64) && !defined(__mips_n32) -void atomic_store_64(__volatile uint64_t *, uint64_t *); -void atomic_load_64(__volatile uint64_t *, uint64_t *); -#else +void atomic_store_64(__volatile uint64_t *, uint64_t); +uint64_t atomic_load_64(__volatile uint64_t *); +#elif defined (__mips_n32) static __inline void -atomic_store_64(__volatile uint64_t *p, uint64_t *v) +atomic_store_64(__volatile uint64_t *p, uint64_t v) { - *p = *v; + *p = v; } -static __inline void -atomic_load_64(__volatile uint64_t *p, uint64_t *v) +static __inline uint64_t +atomic_load_64(__volatile uint64_t *p) { - *v = *p; + return (*p); } +/* #else atomic_common.h definitions of atomic_load/store_64 are used */ #endif #undef ATOMIC_STORE_LOAD diff --git a/sys/mips/mips/db_interface.c b/sys/mips/mips/db_interface.c index 9a7f346d95f..3426bf48a00 100644 --- a/sys/mips/mips/db_interface.c +++ b/sys/mips/mips/db_interface.c @@ -164,9 +164,9 @@ db_read_bytes(vm_offset_t addr, size_t size, char *data) *(uint32_t *)data = *(uint32_t *)addr; break; case 8: - atomic_load_64((volatile u_int64_t *)addr, - (u_int64_t *)data); - break; + *(uint64_t *)data = atomic_load_64( + (void *)addr); + break; } } else { char *src; @@ -207,9 +207,9 @@ db_write_bytes(vm_offset_t addr, size_t size, char *data) *(uint32_t *)addr = *(uint32_t *)data; break; case 8: - atomic_store_64((volatile u_int64_t *)addr, - (u_int64_t *)data); - break; + atomic_store_64((uint64_t *)addr, + *(uint64_t *)data); + break; } } else { char *dst; From f1ab57eead4f8db3e01f618b95b7163d375da02b Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Fri, 22 Dec 2017 01:46:25 +0000 Subject: [PATCH 083/115] Add soft float abi caching form armv7, it would allow people with old binaries to run them. Reviewed by: imp --- etc/rc.d/ldconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/rc.d/ldconfig b/etc/rc.d/ldconfig index 02b52287686..3af9562d3bf 100755 --- a/etc/rc.d/ldconfig +++ b/etc/rc.d/ldconfig @@ -63,7 +63,7 @@ ldconfig_start() esac case `sysctl -n hw.machine_arch` in - armv6) + armv[67]) for i in ${ldconfig_localsoft_dirs}; do if [ -d "${i}" ]; then _files=`find ${i} -type f` From 56f3600c8b722ead4e74371bfe8b4afdfc7714e7 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Fri, 22 Dec 2017 16:59:50 +0000 Subject: [PATCH 084/115] PC Card PNP tables are terminated by a NULL sentinel. This shouldn't be recorded in the linker hints, so subtract one to omit it. --- sys/dev/pccard/pccardvar.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/dev/pccard/pccardvar.h b/sys/dev/pccard/pccardvar.h index c26da6f3bd1..ab2fdd2dccd 100644 --- a/sys/dev/pccard/pccardvar.h +++ b/sys/dev/pccard/pccardvar.h @@ -95,7 +95,7 @@ struct pccard_product { */ #define PCCARD_PNP_DESCR "D:#;V32:manufacturer;V32:product;Z:cisvendor;Z:cisproduct;" #define PCCARD_PNP_INFO(t) \ - MODULE_PNP_INFO(PCCARD_PNP_DESCR, pccard, t, t, sizeof(t[0]), sizeof(t) / sizeof(t[0])); \ + MODULE_PNP_INFO(PCCARD_PNP_DESCR, pccard, t, t, sizeof(t[0]), sizeof(t) / sizeof(t[0]) - 1); \ typedef int (*pccard_product_match_fn) (device_t dev, const struct pccard_product *ent, int vpfmatch); From a5ec991c278ef70379fc904b4213c280adc0e287 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Fri, 22 Dec 2017 17:13:54 +0000 Subject: [PATCH 085/115] Need to NULL terminate this list. It worked before by accidental data in the module following it that terminated the search. --- sys/dev/fdc/fdc_pccard.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/dev/fdc/fdc_pccard.c b/sys/dev/fdc/fdc_pccard.c index c6329505237..2b7a3196f2a 100644 --- a/sys/dev/fdc/fdc_pccard.c +++ b/sys/dev/fdc/fdc_pccard.c @@ -49,6 +49,7 @@ static int fdc_pccard_attach(device_t); static const struct pccard_product fdc_pccard_products[] = { PCMCIA_CARD(YEDATA, EXTERNAL_FDD), + { NULL } }; static int From fdac1d623b70cb0d255de629bb8ec8ebcbe542f1 Mon Sep 17 00:00:00 2001 From: "Pedro F. Giffuni" Date: Fri, 22 Dec 2017 17:15:02 +0000 Subject: [PATCH 086/115] SPDX: Reverse License ID tags from the lmc driver. While the BSD-2-Clause license is there, the GPLv2 is also present. I am unsure of the implications of having both licenses as they are here. I'll just leave it untagged and open for interpretation. --- sys/dev/lmc/if_lmc.c | 2 -- sys/dev/lmc/if_lmc.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/sys/dev/lmc/if_lmc.c b/sys/dev/lmc/if_lmc.c index 03b2918606e..e0866e3f145 100644 --- a/sys/dev/lmc/if_lmc.c +++ b/sys/dev/lmc/if_lmc.c @@ -1,6 +1,4 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD - * * $FreeBSD$ * * Copyright (c) 2002-2004 David Boggs. diff --git a/sys/dev/lmc/if_lmc.h b/sys/dev/lmc/if_lmc.h index 93b40049683..23765008b64 100644 --- a/sys/dev/lmc/if_lmc.h +++ b/sys/dev/lmc/if_lmc.h @@ -1,6 +1,4 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD - * * $FreeBSD$ * * Copyright (c) 2002-2004 David Boggs. (boggs@boggs.palo-alto.ca.us) From 77863e4b1d7282722313d324a0fc5fcf5fd5899a Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Fri, 22 Dec 2017 17:52:38 +0000 Subject: [PATCH 087/115] Update HISTORY section for the atomic(9) page. In collaboration with: alc Sponsored by: The FreeBSD Foundation (kib) MFC after: 1 week --- share/man/man9/atomic.9 | 44 +++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/share/man/man9/atomic.9 b/share/man/man9/atomic.9 index 3eff2e11c6d..8392dcadd38 100644 --- a/share/man/man9/atomic.9 +++ b/share/man/man9/atomic.9 @@ -23,7 +23,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 19, 2017 +.Dd December 22, 2017 .Dt ATOMIC 9 .Os .Sh NAME @@ -175,8 +175,11 @@ and semantics. .Pp Atomic operations on memory have up to three variants. -The first variant performs the operation without imposing any ordering -constraints on memory accesses to other locations. +The first, or +.Em relaxed +variant, performs the operation without imposing any ordering constraints on +accesses to other memory locations. +This variant is the default. The second variant has acquire semantics, and the third variant has release semantics. .Pp @@ -546,43 +549,54 @@ The .Fn atomic_set , and .Fn atomic_subtract -operations were first introduced in +operations were introduced in .Fx 3.0 . -This first set only supported the types +Initially, these operations were defined on the types .Dq Li char , .Dq Li short , .Dq Li int , and .Dq Li long . +.Pp The .Fn atomic_cmpset , -.Fn atomic_load , +.Fn atomic_load_acq , .Fn atomic_readandclear , and -.Fn atomic_store +.Fn atomic_store_rel operations were added in .Fx 5.0 . -The types +Simultaneously, the acquire and release variants were introduced, and +support was added for operation on the types .Dq Li 8 , .Dq Li 16 , .Dq Li 32 , .Dq Li 64 , and -.Dq Li ptr -and all of the acquire and release variants -were added in -.Fx 5.0 -as well. +.Dq Li ptr . +.Pp The .Fn atomic_fetchadd -operations were added in +operation was added in .Fx 6.0 . +.Pp The .Fn atomic_swap and .Fn atomic_testandset operations were added in .Fx 10.0 . +.Pp +The .Fn atomic_testandclear -operation was added in +and +.Fn atomic_thread_fence +operations were added in .Fx 11.0 . +.Pp +The relaxed variants of +.Fn atomic_load +and +.Fn atomic_store +were added in +.Fx 12.0 . From d2064cf0303dbd5585de1a36ae54504a13349e65 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Fri, 22 Dec 2017 17:53:27 +0000 Subject: [PATCH 088/115] Use '#' rather than some made up name for fields we want to ignore. --- sys/dev/drm2/i915/i915_drv.c | 2 +- sys/dev/drm2/radeon/radeon_drv.c | 2 +- sys/dev/ed/if_ed_pci.c | 2 +- sys/dev/intpm/intpm.c | 2 +- sys/dev/ioat/ioat.c | 2 +- sys/dev/ntb/ntb_hw/ntb_hw_intel.c | 2 +- sys/net/iflib.h | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sys/dev/drm2/i915/i915_drv.c b/sys/dev/drm2/i915/i915_drv.c index 290e4c1595a..c293b1ab5b9 100644 --- a/sys/dev/drm2/i915/i915_drv.c +++ b/sys/dev/drm2/i915/i915_drv.c @@ -1236,7 +1236,7 @@ MODULE_DEPEND(i915kms, agp, 1, 1, 1); MODULE_DEPEND(i915kms, iicbus, 1, 1, 1); MODULE_DEPEND(i915kms, iic, 1, 1, 1); MODULE_DEPEND(i915kms, iicbb, 1, 1, 1); -MODULE_PNP_INFO("U32:vendor;U32:device;P:#;D:human", vgapci, i915, pciidlist, +MODULE_PNP_INFO("U32:vendor;U32:device;P:#;D:#", vgapci, i915, pciidlist, sizeof(pciidlist[0]), nitems(pciidlist)); /* We give fast paths for the really cool registers */ diff --git a/sys/dev/drm2/radeon/radeon_drv.c b/sys/dev/drm2/radeon/radeon_drv.c index 7fdaf5b993e..6d42e6cbf2a 100644 --- a/sys/dev/drm2/radeon/radeon_drv.c +++ b/sys/dev/drm2/radeon/radeon_drv.c @@ -401,5 +401,5 @@ MODULE_DEPEND(radeonkms, iicbus, 1, 1, 1); MODULE_DEPEND(radeonkms, iic, 1, 1, 1); MODULE_DEPEND(radeonkms, iicbb, 1, 1, 1); MODULE_DEPEND(radeonkms, firmware, 1, 1, 1); -MODULE_PNP_INFO("U32:vendor;U32:device;P:#;D:human", vgapci, radeonkms, +MODULE_PNP_INFO("U32:vendor;U32:device;P:#;D:@", vgapci, radeonkms, pciidlist, sizeof(pciidlist[0]), nitems(pciidlist)); diff --git a/sys/dev/ed/if_ed_pci.c b/sys/dev/ed/if_ed_pci.c index 78f9aff705a..d3bcfbb8ec6 100644 --- a/sys/dev/ed/if_ed_pci.c +++ b/sys/dev/ed/if_ed_pci.c @@ -145,6 +145,6 @@ static driver_t ed_pci_driver = { DRIVER_MODULE(ed, pci, ed_pci_driver, ed_devclass, 0, 0); MODULE_DEPEND(ed, pci, 1, 1, 1); MODULE_DEPEND(ed, ether, 1, 1, 1); -MODULE_PNP_INFO("W32:vendor/device;D:human", pci, ed, pci_ids, sizeof(pci_ids[0]), +MODULE_PNP_INFO("W32:vendor/device;D:@", pci, ed, pci_ids, sizeof(pci_ids[0]), nitems(pci_ids) - 1); diff --git a/sys/dev/intpm/intpm.c b/sys/dev/intpm/intpm.c index a822d54e2a6..4b4cf9ab10c 100644 --- a/sys/dev/intpm/intpm.c +++ b/sys/dev/intpm/intpm.c @@ -895,5 +895,5 @@ DRIVER_MODULE_ORDERED(intsmb, pci, intsmb_driver, intsmb_devclass, 0, 0, DRIVER_MODULE(smbus, intsmb, smbus_driver, smbus_devclass, 0, 0); MODULE_DEPEND(intsmb, smbus, SMBUS_MINVER, SMBUS_PREFVER, SMBUS_MAXVER); MODULE_VERSION(intsmb, 1); -MODULE_PNP_INFO("W32:vendor/device;D:human", pci, intpm, intsmb_products, +MODULE_PNP_INFO("W32:vendor/device;D:#", pci, intpm, intsmb_products, sizeof(intsmb_products[0]), nitems(intsmb_products)); diff --git a/sys/dev/ioat/ioat.c b/sys/dev/ioat/ioat.c index 56681f8a215..744c26dfafd 100644 --- a/sys/dev/ioat/ioat.c +++ b/sys/dev/ioat/ioat.c @@ -240,7 +240,7 @@ static struct _pcsid { 0x20218086, "SKX IOAT" }, }; -MODULE_PNP_INFO("W32:vendor/device;D:human", pci, ioat, pci_ids, +MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ioat, pci_ids, sizeof(pci_ids[0]), nitems(pci_ids)); /* diff --git a/sys/dev/ntb/ntb_hw/ntb_hw_intel.c b/sys/dev/ntb/ntb_hw/ntb_hw_intel.c index cac6f9b60a4..8060cceb22f 100644 --- a/sys/dev/ntb/ntb_hw/ntb_hw_intel.c +++ b/sys/dev/ntb/ntb_hw/ntb_hw_intel.c @@ -3119,5 +3119,5 @@ static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods, DRIVER_MODULE(ntb_hw_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL); MODULE_DEPEND(ntb_hw_intel, ntb, 1, 1, 1); MODULE_VERSION(ntb_hw_intel, 1); -MODULE_PNP_INFO("W32:vendor/device;D:human", pci, ntb_hw_intel, pci_ids, +MODULE_PNP_INFO("W32:vendor/device;D:@", pci, ntb_hw_intel, pci_ids, sizeof(pci_ids[0]), nitems(pci_ids)); diff --git a/sys/net/iflib.h b/sys/net/iflib.h index d1d707beb72..367c1d2fe28 100644 --- a/sys/net/iflib.h +++ b/sys/net/iflib.h @@ -174,7 +174,7 @@ typedef struct pci_vendor_info { #define PVID_END {0, 0, 0, 0, 0, 0, NULL} #define IFLIB_PNP_DESCR "U32:vendor;U32:device;U32:subvendor;U32:subdevice;" \ - "U32:revision;U32:class;D:human" + "U32:revision;U32:class;D:#" #define IFLIB_PNP_INFO(b, u, t) \ MODULE_PNP_INFO(IFLIB_PNP_DESCR, b, u, t, sizeof(t[0]), nitems(t)) From f549e3521d7824a65d23bf4f34e68c6d3392531f Mon Sep 17 00:00:00 2001 From: Navdeep Parhar Date: Fri, 22 Dec 2017 19:10:19 +0000 Subject: [PATCH 089/115] cxgbe(4): Do not forward interrupts to queues with freelists. This leaves the firmware event queue (fwq) as the only queue that can take interrupts for others. This simplifies cfg_itype_and_nqueues and queue allocation in the driver at the cost of a little (never?) used configuration. It also allows service_iq to be split into two specialized variants in the future. MFC after: 2 months Sponsored by: Chelsio Communications --- sys/dev/cxgbe/adapter.h | 12 +- sys/dev/cxgbe/t4_main.c | 279 ++++++++++++++++++++------------------ sys/dev/cxgbe/t4_netmap.c | 14 +- sys/dev/cxgbe/t4_sge.c | 215 ++++++----------------------- sys/dev/cxgbe/t4_vf.c | 5 - 5 files changed, 204 insertions(+), 321 deletions(-) diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index 3b1d14b70a2..05d22253c18 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -169,9 +169,6 @@ enum { DOOMED = (1 << 0), VI_INIT_DONE = (1 << 1), VI_SYSCTL_CTX = (1 << 2), - INTR_RXQ = (1 << 4), /* All NIC rxq's take interrupts */ - INTR_OFLD_RXQ = (1 << 5), /* All TOE rxq's take interrupts */ - INTR_ALL = (INTR_RXQ | INTR_OFLD_RXQ), /* adapter debug_flags */ DF_DUMP_MBOX = (1 << 0), /* Log all mbox cmd/rpl. */ @@ -349,7 +346,7 @@ enum { /* iq flags */ IQ_ALLOCATED = (1 << 0), /* firmware resources allocated */ IQ_HAS_FL = (1 << 1), /* iq associated with a freelist */ - IQ_INTR = (1 << 2), /* iq takes direct interrupt */ + /* 1 << 2 Used to be IQ_INTR */ IQ_LRO_ENABLED = (1 << 3), /* iq is an eth rxq with LRO enabled */ IQ_ADJ_CREDIT = (1 << 4), /* hw is off by 1 credit for this iq */ @@ -956,6 +953,13 @@ struct adapter { /* One for firmware events */ #define T4VF_EXTRA_INTR 1 +static inline int +forwarding_intr_to_fwq(struct adapter *sc) +{ + + return (sc->intr_count == 1); +} + static inline uint32_t t4_read_reg(struct adapter *sc, uint32_t reg) { diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 467421a4afa..cf9531c6d7c 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -470,7 +470,6 @@ struct intrs_and_queues { uint16_t intr_type; /* INTx, MSI, or MSI-X */ uint16_t num_vis; /* number of VIs for each port */ uint16_t nirq; /* Total # of vectors */ - uint16_t intr_flags; /* Interrupt flags for each port */ uint16_t ntxq; /* # of NIC txq's for each port */ uint16_t nrxq; /* # of NIC rxq's for each port */ uint16_t nofldtxq; /* # of TOE txq's for each port */ @@ -1118,7 +1117,6 @@ t4_attach(device_t dev) vi->first_txq = tqidx; vi->tmr_idx = t4_tmr_idx; vi->pktc_idx = t4_pktc_idx; - vi->flags |= iaq.intr_flags & INTR_RXQ; vi->nrxq = j == 0 ? iaq.nrxq : iaq.nrxq_vi; vi->ntxq = j == 0 ? iaq.ntxq : iaq.ntxq_vi; @@ -1135,7 +1133,6 @@ t4_attach(device_t dev) vi->ofld_pktc_idx = t4_pktc_idx_ofld; vi->first_ofld_rxq = ofld_rqidx; vi->first_ofld_txq = ofld_tqidx; - vi->flags |= iaq.intr_flags & INTR_OFLD_RXQ; vi->nofldrxq = j == 0 ? iaq.nofldrxq : iaq.nofldrxq_vi; vi->nofldtxq = j == 0 ? iaq.nofldtxq : iaq.nofldtxq_vi; @@ -2648,26 +2645,43 @@ fixup_devlog_params(struct adapter *sc) return (rc); } -static int -cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq) +static void +update_nirq(struct intrs_and_queues *iaq, int nports) { - int rc, itype, navail, nrxq, nports, n; - int nofldrxq = 0; + int extra = T4_EXTRA_INTR; + + iaq->nirq = extra; + iaq->nirq += nports * (iaq->nrxq + iaq->nofldrxq); + iaq->nirq += nports * (iaq->num_vis - 1) * + max(iaq->nrxq_vi, iaq->nnmrxq_vi); + iaq->nirq += nports * (iaq->num_vis - 1) * iaq->nofldrxq_vi; +} + +/* + * Adjust requirements to fit the number of interrupts available. + */ +static void +calculate_iaq(struct adapter *sc, struct intrs_and_queues *iaq, int itype, + int navail) +{ + int old_nirq; + const int nports = sc->params.nports; - nports = sc->params.nports; MPASS(nports > 0); + MPASS(navail > 0); bzero(iaq, sizeof(*iaq)); + iaq->intr_type = itype; iaq->num_vis = t4_num_vis; iaq->ntxq = t4_ntxq; iaq->ntxq_vi = t4_ntxq_vi; - iaq->nrxq = nrxq = t4_nrxq; + iaq->nrxq = t4_nrxq; iaq->nrxq_vi = t4_nrxq_vi; #ifdef TCP_OFFLOAD if (is_offload(sc)) { iaq->nofldtxq = t4_nofldtxq; iaq->nofldtxq_vi = t4_nofldtxq_vi; - iaq->nofldrxq = nofldrxq = t4_nofldrxq; + iaq->nofldrxq = t4_nofldrxq; iaq->nofldrxq_vi = t4_nofldrxq_vi; } #endif @@ -2676,6 +2690,105 @@ cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq) iaq->nnmrxq_vi = t4_nnmrxq_vi; #endif + update_nirq(iaq, nports); + if (iaq->nirq <= navail && + (itype != INTR_MSI || powerof2(iaq->nirq))) { + /* + * This is the normal case -- there are enough interrupts for + * everything. + */ + goto done; + } + + /* + * If extra VIs have been configured try reducing their count and see if + * that works. + */ + while (iaq->num_vis > 1) { + iaq->num_vis--; + update_nirq(iaq, nports); + if (iaq->nirq <= navail && + (itype != INTR_MSI || powerof2(iaq->nirq))) { + device_printf(sc->dev, "virtual interfaces per port " + "reduced to %d from %d. nrxq=%u, nofldrxq=%u, " + "nrxq_vi=%u nofldrxq_vi=%u, nnmrxq_vi=%u. " + "itype %d, navail %u, nirq %d.\n", + iaq->num_vis, t4_num_vis, iaq->nrxq, iaq->nofldrxq, + iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi, + itype, navail, iaq->nirq); + goto done; + } + } + + /* + * Extra VIs will not be created. Log a message if they were requested. + */ + MPASS(iaq->num_vis == 1); + iaq->ntxq_vi = iaq->nrxq_vi = 0; + iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0; + iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0; + if (iaq->num_vis != t4_num_vis) { + device_printf(sc->dev, "extra virtual interfaces disabled. " + "nrxq=%u, nofldrxq=%u, nrxq_vi=%u nofldrxq_vi=%u, " + "nnmrxq_vi=%u. itype %d, navail %u, nirq %d.\n", + iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi, + iaq->nnmrxq_vi, itype, navail, iaq->nirq); + } + + /* + * Keep reducing the number of NIC rx queues to the next lower power of + * 2 (for even RSS distribution) and halving the TOE rx queues and see + * if that works. + */ + do { + if (iaq->nrxq > 1) { + do { + iaq->nrxq--; + } while (!powerof2(iaq->nrxq)); + } + if (iaq->nofldrxq > 1) + iaq->nofldrxq >>= 1; + + old_nirq = iaq->nirq; + update_nirq(iaq, nports); + if (iaq->nirq <= navail && + (itype != INTR_MSI || powerof2(iaq->nirq))) { + device_printf(sc->dev, "running with reduced number of " + "rx queues because of shortage of interrupts. " + "nrxq=%u, nofldrxq=%u. " + "itype %d, navail %u, nirq %d.\n", iaq->nrxq, + iaq->nofldrxq, itype, navail, iaq->nirq); + goto done; + } + } while (old_nirq != iaq->nirq); + + /* One interrupt for everything. Ugh. */ + device_printf(sc->dev, "running with minimal number of queues. " + "itype %d, navail %u.\n", itype, navail); + iaq->nirq = 1; + MPASS(iaq->nrxq == 1); + iaq->ntxq = 1; + if (iaq->nofldrxq > 1) + iaq->nofldtxq = 1; +done: + MPASS(iaq->num_vis > 0); + if (iaq->num_vis > 1) { + MPASS(iaq->nrxq_vi > 0); + MPASS(iaq->ntxq_vi > 0); + } + MPASS(iaq->nirq > 0); + MPASS(iaq->nrxq > 0); + MPASS(iaq->ntxq > 0); + if (itype == INTR_MSI) { + MPASS(powerof2(iaq->nirq)); + } +} + +static int +cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq) +{ + int rc, itype, navail, nalloc; + for (itype = INTR_MSIX; itype; itype >>= 1) { if ((itype & t4_intr_types) == 0) @@ -2691,126 +2804,33 @@ restart: if (navail == 0) continue; - iaq->intr_type = itype; - iaq->intr_flags = 0; - - /* - * Best option: an interrupt vector for errors, one for the - * firmware event queue, and one for every rxq (NIC and TOE) of - * every VI. The VIs that support netmap use the same - * interrupts for the NIC rx queues and the netmap rx queues - * because only one set of queues is active at a time. - */ - iaq->nirq = T4_EXTRA_INTR; - iaq->nirq += nports * (nrxq + nofldrxq); - iaq->nirq += nports * (iaq->num_vis - 1) * - max(iaq->nrxq_vi, iaq->nnmrxq_vi); /* See comment above. */ - iaq->nirq += nports * (iaq->num_vis - 1) * iaq->nofldrxq_vi; - if (iaq->nirq <= navail && - (itype != INTR_MSI || powerof2(iaq->nirq))) { - iaq->intr_flags = INTR_ALL; - goto allocate; - } - - /* Disable the VIs (and netmap) if there aren't enough intrs */ - if (iaq->num_vis > 1) { - device_printf(sc->dev, "virtual interfaces disabled " - "because num_vis=%u with current settings " - "(nrxq=%u, nofldrxq=%u, nrxq_vi=%u nofldrxq_vi=%u, " - "nnmrxq_vi=%u) would need %u interrupts but " - "only %u are available.\n", iaq->num_vis, nrxq, - nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi, - iaq->nnmrxq_vi, iaq->nirq, navail); - iaq->num_vis = 1; - iaq->ntxq_vi = iaq->nrxq_vi = 0; - iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0; - iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0; - goto restart; - } - - /* - * Second best option: a vector for errors, one for the firmware - * event queue, and vectors for either all the NIC rx queues or - * all the TOE rx queues. The queues that don't get vectors - * will forward their interrupts to those that do. - */ - iaq->nirq = T4_EXTRA_INTR; - if (nrxq >= nofldrxq) { - iaq->intr_flags = INTR_RXQ; - iaq->nirq += nports * nrxq; - } else { - iaq->intr_flags = INTR_OFLD_RXQ; - iaq->nirq += nports * nofldrxq; - } - if (iaq->nirq <= navail && - (itype != INTR_MSI || powerof2(iaq->nirq))) - goto allocate; - - /* - * Next best option: an interrupt vector for errors, one for the - * firmware event queue, and at least one per main-VI. At this - * point we know we'll have to downsize nrxq and/or nofldrxq to - * fit what's available to us. - */ - iaq->nirq = T4_EXTRA_INTR; - iaq->nirq += nports; - if (iaq->nirq <= navail) { - int leftover = navail - iaq->nirq; - int target = max(nrxq, nofldrxq); - - iaq->intr_flags = nrxq >= nofldrxq ? - INTR_RXQ : INTR_OFLD_RXQ; - - n = 1; - while (n < target && leftover >= nports) { - leftover -= nports; - iaq->nirq += nports; - n++; - } - iaq->nrxq = min(n, nrxq); -#ifdef TCP_OFFLOAD - iaq->nofldrxq = min(n, nofldrxq); -#endif - - if (itype != INTR_MSI || powerof2(iaq->nirq)) - goto allocate; - } - - /* - * Least desirable option: one interrupt vector for everything. - */ - iaq->nirq = iaq->nrxq = 1; - iaq->intr_flags = 0; -#ifdef TCP_OFFLOAD - if (is_offload(sc)) - iaq->nofldrxq = 1; -#endif -allocate: - navail = iaq->nirq; + calculate_iaq(sc, iaq, itype, navail); + nalloc = iaq->nirq; rc = 0; if (itype == INTR_MSIX) - rc = pci_alloc_msix(sc->dev, &navail); + rc = pci_alloc_msix(sc->dev, &nalloc); else if (itype == INTR_MSI) - rc = pci_alloc_msi(sc->dev, &navail); + rc = pci_alloc_msi(sc->dev, &nalloc); - if (rc == 0) { - if (navail == iaq->nirq) + if (rc == 0 && nalloc > 0) { + if (nalloc == iaq->nirq) return (0); /* * Didn't get the number requested. Use whatever number - * the kernel is willing to allocate (it's in navail). + * the kernel is willing to allocate. */ device_printf(sc->dev, "fewer vectors than requested, " "type=%d, req=%d, rcvd=%d; will downshift req.\n", - itype, iaq->nirq, navail); + itype, iaq->nirq, nalloc); pci_release_msi(sc->dev); + navail = nalloc; goto restart; } device_printf(sc->dev, "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n", - itype, rc, iaq->nirq, navail); + itype, rc, iaq->nirq, nalloc); } device_printf(sc->dev, @@ -4352,7 +4372,7 @@ t4_setup_intr_handlers(struct adapter *sc) */ irq = &sc->irq[0]; rid = sc->intr_type == INTR_INTX ? 0 : 1; - if (sc->intr_count == 1) + if (forwarding_intr_to_fwq(sc)) return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all")); /* Multiple interrupts. */ @@ -4387,8 +4407,6 @@ t4_setup_intr_handlers(struct adapter *sc) if (vi->nnmrxq > 0) { int n = max(vi->nrxq, vi->nnmrxq); - MPASS(vi->flags & INTR_RXQ); - rxq = &sge->rxq[vi->first_rxq]; #ifdef DEV_NETMAP nm_rxq = &sge->nm_rxq[vi->first_nm_rxq]; @@ -4406,11 +4424,17 @@ t4_setup_intr_handlers(struct adapter *sc) t4_vi_intr, irq, s); if (rc != 0) return (rc); +#ifdef RSS + if (q < vi->nrxq) { + bus_bind_intr(sc->dev, irq->res, + rss_getcpu(q % nbuckets)); + } +#endif irq++; rid++; vi->nintr++; } - } else if (vi->flags & INTR_RXQ) { + } else { for_each_rxq(vi, q, rxq) { snprintf(s, sizeof(s), "%x%c%x", p, 'a' + v, q); @@ -4428,18 +4452,15 @@ t4_setup_intr_handlers(struct adapter *sc) } } #ifdef TCP_OFFLOAD - if (vi->flags & INTR_OFLD_RXQ) { - for_each_ofld_rxq(vi, q, ofld_rxq) { - snprintf(s, sizeof(s), "%x%c%x", p, - 'A' + v, q); - rc = t4_alloc_irq(sc, irq, rid, - t4_intr, ofld_rxq, s); - if (rc != 0) - return (rc); - irq++; - rid++; - vi->nintr++; - } + for_each_ofld_rxq(vi, q, ofld_rxq) { + snprintf(s, sizeof(s), "%x%c%x", p, 'A' + v, q); + rc = t4_alloc_irq(sc, irq, rid, t4_intr, + ofld_rxq, s); + if (rc != 0) + return (rc); + irq++; + rid++; + vi->nintr++; } #endif } diff --git a/sys/dev/cxgbe/t4_netmap.c b/sys/dev/cxgbe/t4_netmap.c index 774be6b1fd0..fa3bbb9fc8f 100644 --- a/sys/dev/cxgbe/t4_netmap.c +++ b/sys/dev/cxgbe/t4_netmap.c @@ -108,16 +108,10 @@ alloc_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int cong) V_FW_IQ_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | FW_LEN16(c)); - if (vi->flags & INTR_RXQ) { - KASSERT(nm_rxq->intr_idx < sc->intr_count, - ("%s: invalid direct intr_idx %d", __func__, - nm_rxq->intr_idx)); - v = V_FW_IQ_CMD_IQANDSTINDEX(nm_rxq->intr_idx); - } else { - CXGBE_UNIMPLEMENTED(__func__); /* XXXNM: needs review */ - v = V_FW_IQ_CMD_IQANDSTINDEX(nm_rxq->intr_idx) | - F_FW_IQ_CMD_IQANDST; - } + MPASS(!forwarding_intr_to_fwq(sc)); + KASSERT(nm_rxq->intr_idx < sc->intr_count, + ("%s: invalid direct intr_idx %d", __func__, nm_rxq->intr_idx)); + v = V_FW_IQ_CMD_IQANDSTINDEX(nm_rxq->intr_idx); c.type_to_iqandstindex = htobe32(v | V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | V_FW_IQ_CMD_VIID(vi->viid) | diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index 0abc2ad678f..51a53a85d3f 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -953,70 +953,6 @@ t4_teardown_adapter_queues(struct adapter *sc) return (0); } -static inline int -first_vector(struct vi_info *vi) -{ - struct adapter *sc = vi->pi->adapter; - - if (sc->intr_count == 1) - return (0); - - return (vi->first_intr); -} - -/* - * Given an arbitrary "index," come up with an iq that can be used by other - * queues (of this VI) for interrupt forwarding, SGE egress updates, etc. - * The iq returned is guaranteed to be something that takes direct interrupts. - */ -static struct sge_iq * -vi_intr_iq(struct vi_info *vi, int idx) -{ - struct adapter *sc = vi->pi->adapter; - struct sge *s = &sc->sge; - struct sge_iq *iq = NULL; - int nintr, i; - - if (sc->intr_count == 1) - return (&sc->sge.fwq); - - nintr = vi->nintr; -#ifdef DEV_NETMAP - /* Do not consider any netmap-only interrupts */ - if (vi->flags & INTR_RXQ && vi->nnmrxq > vi->nrxq) - nintr -= vi->nnmrxq - vi->nrxq; -#endif - KASSERT(nintr != 0, - ("%s: vi %p has no exclusive interrupts, total interrupts = %d", - __func__, vi, sc->intr_count)); - i = idx % nintr; - - if (vi->flags & INTR_RXQ) { - if (i < vi->nrxq) { - iq = &s->rxq[vi->first_rxq + i].iq; - goto done; - } - i -= vi->nrxq; - } -#ifdef TCP_OFFLOAD - if (vi->flags & INTR_OFLD_RXQ) { - if (i < vi->nofldrxq) { - iq = &s->ofld_rxq[vi->first_ofld_rxq + i].iq; - goto done; - } - i -= vi->nofldrxq; - } -#endif - panic("%s: vi %p, intr_flags 0x%lx, idx %d, total intr %d\n", __func__, - vi, vi->flags & INTR_ALL, idx, nintr); -done: - MPASS(iq != NULL); - KASSERT(iq->flags & IQ_INTR, - ("%s: iq %p (vi %p, intr_flags 0x%lx, idx %d)", __func__, iq, vi, - vi->flags & INTR_ALL, idx)); - return (iq); -} - /* Maximum payload that can be delivered with a single iq descriptor */ static inline int mtu_to_max_payload(struct adapter *sc, int mtu, const int toe) @@ -1042,7 +978,7 @@ mtu_to_max_payload(struct adapter *sc, int mtu, const int toe) int t4_setup_vi_queues(struct vi_info *vi) { - int rc = 0, i, j, intr_idx, iqid; + int rc = 0, i, intr_idx, iqidx; struct sge_rxq *rxq; struct sge_txq *txq; struct sge_wrq *ctrlq; @@ -1064,14 +1000,14 @@ t4_setup_vi_queues(struct vi_info *vi) int maxp, mtu = ifp->if_mtu; /* Interrupt vector to start from (when using multiple vectors) */ - intr_idx = first_vector(vi); + intr_idx = vi->first_intr; #ifdef DEV_NETMAP saved_idx = intr_idx; if (ifp->if_capabilities & IFCAP_NETMAP) { /* netmap is supported with direct interrupts only. */ - MPASS(vi->flags & INTR_RXQ); + MPASS(!forwarding_intr_to_fwq(sc)); /* * We don't have buffers to back the netmap rx queues @@ -1090,8 +1026,8 @@ t4_setup_vi_queues(struct vi_info *vi) oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_txq", CTLFLAG_RD, NULL, "tx queues"); for_each_nm_txq(vi, i, nm_txq) { - iqid = vi->first_nm_rxq + (i % vi->nnmrxq); - rc = alloc_nm_txq(vi, nm_txq, iqid, i, oid); + iqidx = vi->first_nm_rxq + (i % vi->nnmrxq); + rc = alloc_nm_txq(vi, nm_txq, iqidx, i, oid); if (rc != 0) goto done; } @@ -1102,15 +1038,12 @@ t4_setup_vi_queues(struct vi_info *vi) #endif /* - * First pass over all NIC and TOE rx queues: - * a) initialize iq and fl - * b) allocate queue iff it will take direct interrupts. + * Allocate rx queues first because a default iqid is required when + * creating a tx queue. */ maxp = mtu_to_max_payload(sc, mtu, 0); - if (vi->flags & INTR_RXQ) { - oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", - CTLFLAG_RD, NULL, "rx queues"); - } + oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", + CTLFLAG_RD, NULL, "rx queues"); for_each_rxq(vi, i, rxq) { init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq); @@ -1119,13 +1052,11 @@ t4_setup_vi_queues(struct vi_info *vi) device_get_nameunit(vi->dev), i); init_fl(sc, &rxq->fl, vi->qsize_rxq / 8, maxp, name); - if (vi->flags & INTR_RXQ) { - rxq->iq.flags |= IQ_INTR; - rc = alloc_rxq(vi, rxq, intr_idx, i, oid); - if (rc != 0) - goto done; - intr_idx++; - } + rc = alloc_rxq(vi, rxq, + forwarding_intr_to_fwq(sc) ? -1 : intr_idx, i, oid); + if (rc != 0) + goto done; + intr_idx++; } #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) @@ -1133,11 +1064,8 @@ t4_setup_vi_queues(struct vi_info *vi) #endif #ifdef TCP_OFFLOAD maxp = mtu_to_max_payload(sc, mtu, 1); - if (vi->flags & INTR_OFLD_RXQ) { - oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", - CTLFLAG_RD, NULL, - "rx queues for offloaded TCP connections"); - } + oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", + CTLFLAG_RD, NULL, "rx queues for offloaded TCP connections"); for_each_ofld_rxq(vi, i, ofld_rxq) { init_iq(&ofld_rxq->iq, sc, vi->ofld_tmr_idx, vi->ofld_pktc_idx, @@ -1147,70 +1075,29 @@ t4_setup_vi_queues(struct vi_info *vi) device_get_nameunit(vi->dev), i); init_fl(sc, &ofld_rxq->fl, vi->qsize_rxq / 8, maxp, name); - if (vi->flags & INTR_OFLD_RXQ) { - ofld_rxq->iq.flags |= IQ_INTR; - rc = alloc_ofld_rxq(vi, ofld_rxq, intr_idx, i, oid); - if (rc != 0) - goto done; - intr_idx++; - } + rc = alloc_ofld_rxq(vi, ofld_rxq, + forwarding_intr_to_fwq(sc) ? -1 : intr_idx, i, oid); + if (rc != 0) + goto done; + intr_idx++; } #endif /* - * Second pass over all NIC and TOE rx queues. The queues forwarding - * their interrupts are allocated now. - */ - j = 0; - if (!(vi->flags & INTR_RXQ)) { - oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", - CTLFLAG_RD, NULL, "rx queues"); - for_each_rxq(vi, i, rxq) { - MPASS(!(rxq->iq.flags & IQ_INTR)); - - intr_idx = vi_intr_iq(vi, j)->abs_id; - - rc = alloc_rxq(vi, rxq, intr_idx, i, oid); - if (rc != 0) - goto done; - j++; - } - } -#ifdef TCP_OFFLOAD - if (vi->nofldrxq != 0 && !(vi->flags & INTR_OFLD_RXQ)) { - oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", - CTLFLAG_RD, NULL, - "rx queues for offloaded TCP connections"); - for_each_ofld_rxq(vi, i, ofld_rxq) { - MPASS(!(ofld_rxq->iq.flags & IQ_INTR)); - - intr_idx = vi_intr_iq(vi, j)->abs_id; - - rc = alloc_ofld_rxq(vi, ofld_rxq, intr_idx, i, oid); - if (rc != 0) - goto done; - j++; - } - } -#endif - - /* - * Now the tx queues. Only one pass needed. + * Now the tx queues. */ oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD, NULL, "tx queues"); - j = 0; for_each_txq(vi, i, txq) { - iqid = vi_intr_iq(vi, j)->cntxt_id; + iqidx = vi->first_rxq + (i % vi->nrxq); snprintf(name, sizeof(name), "%s txq%d", device_get_nameunit(vi->dev), i); - init_eq(sc, &txq->eq, EQ_ETH, vi->qsize_txq, pi->tx_chan, iqid, - name); + init_eq(sc, &txq->eq, EQ_ETH, vi->qsize_txq, pi->tx_chan, + sc->sge.rxq[iqidx].iq.cntxt_id, name); rc = alloc_txq(vi, txq, i, oid); if (rc != 0) goto done; - j++; } #ifdef TCP_OFFLOAD oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_txq", @@ -1218,11 +1105,11 @@ t4_setup_vi_queues(struct vi_info *vi) for_each_ofld_txq(vi, i, ofld_txq) { struct sysctl_oid *oid2; - iqid = vi_intr_iq(vi, j)->cntxt_id; + iqidx = vi->first_ofld_rxq + (i % vi->nofldrxq); snprintf(name, sizeof(name), "%s ofld_txq%d", device_get_nameunit(vi->dev), i); init_eq(sc, &ofld_txq->eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan, - iqid, name); + sc->sge.ofld_rxq[iqidx].iq.cntxt_id, name); snprintf(name, sizeof(name), "%d", i); oid2 = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, @@ -1231,7 +1118,6 @@ t4_setup_vi_queues(struct vi_info *vi) rc = alloc_wrq(sc, vi, ofld_txq, oid2); if (rc != 0) goto done; - j++; } #endif @@ -1243,10 +1129,9 @@ t4_setup_vi_queues(struct vi_info *vi) oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD, NULL, "ctrl queue"); ctrlq = &sc->sge.ctrlq[pi->port_id]; - iqid = vi_intr_iq(vi, 0)->cntxt_id; snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(vi->dev)); - init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, - name); + init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, + sc->sge.rxq[vi->first_rxq].iq.cntxt_id, name); rc = alloc_wrq(sc, vi, ctrlq, oid); done: @@ -1312,33 +1197,15 @@ t4_teardown_vi_queues(struct vi_info *vi) #endif /* - * Then take down the rx queues that forward their interrupts, as they - * reference other rx queues. + * Then take down the rx queues. */ for_each_rxq(vi, i, rxq) { - if ((rxq->iq.flags & IQ_INTR) == 0) - free_rxq(vi, rxq); + free_rxq(vi, rxq); } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, i, ofld_rxq) { - if ((ofld_rxq->iq.flags & IQ_INTR) == 0) - free_ofld_rxq(vi, ofld_rxq); - } -#endif - - /* - * Then take down the rx queues that take direct interrupts. - */ - - for_each_rxq(vi, i, rxq) { - if (rxq->iq.flags & IQ_INTR) - free_rxq(vi, rxq); - } -#ifdef TCP_OFFLOAD - for_each_ofld_rxq(vi, i, ofld_rxq) { - if (ofld_rxq->iq.flags & IQ_INTR) - free_ofld_rxq(vi, ofld_rxq); + free_ofld_rxq(vi, ofld_rxq); } #endif @@ -2715,9 +2582,9 @@ free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, * Returns errno on failure. Resources allocated up to that point may still be * allocated. Caller is responsible for cleanup in case this function fails. * - * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then - * the intr_idx specifies the vector, starting from 0. Otherwise it specifies - * the abs_id of the ingress queue to which its interrupts should be forwarded. + * If the ingress queue will take interrupts directly then the intr_idx + * specifies the vector, starting from 0. -1 means the interrupts for this + * queue should be forwarded to the fwq. */ static int alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl, @@ -2749,12 +2616,15 @@ alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl, if (iq == &sc->sge.fwq) v |= F_FW_IQ_CMD_IQASYNCH; - if (iq->flags & IQ_INTR) { + if (intr_idx < 0) { + /* Forwarded interrupts, all headed to fwq */ + v |= F_FW_IQ_CMD_IQANDST; + v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fwq.cntxt_id); + } else { KASSERT(intr_idx < sc->intr_count, ("%s: invalid direct intr_idx %d", __func__, intr_idx)); - } else - v |= F_FW_IQ_CMD_IQANDST; - v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); + v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); + } c.type_to_iqandstindex = htobe32(v | V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | @@ -3004,7 +2874,6 @@ alloc_fwq(struct adapter *sc) struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE); - fwq->flags |= IQ_INTR; /* always */ if (sc->flags & IS_VF) intr_idx = 0; else { diff --git a/sys/dev/cxgbe/t4_vf.c b/sys/dev/cxgbe/t4_vf.c index e18bcf04d09..8eb664dcae0 100644 --- a/sys/dev/cxgbe/t4_vf.c +++ b/sys/dev/cxgbe/t4_vf.c @@ -62,7 +62,6 @@ __FBSDID("$FreeBSD$"); struct intrs_and_queues { uint16_t intr_type; /* MSI, or MSI-X */ uint16_t nirq; /* Total # of vectors */ - uint16_t intr_flags; /* Interrupt flags for each port */ uint16_t ntxq; /* # of NIC txq's for each port */ uint16_t nrxq; /* # of NIC rxq's for each port */ }; @@ -330,7 +329,6 @@ cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq) continue; iaq->intr_type = itype; - iaq->intr_flags = 0; /* * XXX: The Linux driver reserves an Ingress Queue for @@ -438,7 +436,6 @@ cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq) return (rc); } if (navail == iaq->nirq) { - iaq->intr_flags = INTR_RXQ; return (0); } pci_release_msi(sc->dev); @@ -455,7 +452,6 @@ cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq) device_printf(sc->dev, "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n", itype, rc, iaq->nirq, navail); - iaq->intr_flags = 0; return (rc); } @@ -702,7 +698,6 @@ t4vf_attach(device_t dev) vi->first_txq = tqidx; vi->tmr_idx = t4_tmr_idx; vi->pktc_idx = t4_pktc_idx; - vi->flags |= iaq.intr_flags & INTR_RXQ; vi->nrxq = j == 0 ? iaq.nrxq: 1; vi->ntxq = j == 0 ? iaq.ntxq: 1; From d171d2f2817ece7e89cdf9c6067f5f731ee3c000 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Fri, 22 Dec 2017 20:44:21 +0000 Subject: [PATCH 090/115] Add AHCI/XHCI device IDs found on AMD Ryzen+B350 system. MFC after: 2 weeks --- sys/dev/ahci/ahci_pci.c | 1 + sys/dev/usb/controller/xhci_pci.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/sys/dev/ahci/ahci_pci.c b/sys/dev/ahci/ahci_pci.c index 80e546c6c78..00c3740bc5b 100644 --- a/sys/dev/ahci/ahci_pci.c +++ b/sys/dev/ahci/ahci_pci.c @@ -68,6 +68,7 @@ static const struct { AHCI_Q_ATI_PMP_BUG | AHCI_Q_1MSI}, /* Not sure SB8x0/SB9x0 needs this quirk. Be conservative though */ {0x43951002, 0x00, "AMD SB8x0/SB9x0", AHCI_Q_ATI_PMP_BUG}, + {0x43b71022, 0x00, "AMD 300 Series", 0}, {0x78001022, 0x00, "AMD Hudson-2", 0}, {0x78011022, 0x00, "AMD Hudson-2", 0}, {0x78021022, 0x00, "AMD Hudson-2", 0}, diff --git a/sys/dev/usb/controller/xhci_pci.c b/sys/dev/usb/controller/xhci_pci.c index 16f9a494ad7..6c93c12c426 100644 --- a/sys/dev/usb/controller/xhci_pci.c +++ b/sys/dev/usb/controller/xhci_pci.c @@ -97,6 +97,10 @@ xhci_pci_match(device_t self) uint32_t device_id = pci_get_devid(self); switch (device_id) { + case 0x145c1022: + return ("AMD KERNCZ USB 3.0 controller"); + case 0x43bb1022: + return ("AMD 300 Series USB 3.0 controller"); case 0x78141022: return ("AMD FCH USB 3.0 controller"); From 6efa5583c7ecece2f55003a217dbedcb5fb61b97 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Fri, 22 Dec 2017 20:48:49 +0000 Subject: [PATCH 091/115] Fix typos from last commit, these should have been #. --- sys/dev/drm2/radeon/radeon_drv.c | 2 +- sys/dev/ed/if_ed_pci.c | 2 +- sys/dev/ntb/ntb_hw/ntb_hw_intel.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/dev/drm2/radeon/radeon_drv.c b/sys/dev/drm2/radeon/radeon_drv.c index 6d42e6cbf2a..9574bec4c72 100644 --- a/sys/dev/drm2/radeon/radeon_drv.c +++ b/sys/dev/drm2/radeon/radeon_drv.c @@ -401,5 +401,5 @@ MODULE_DEPEND(radeonkms, iicbus, 1, 1, 1); MODULE_DEPEND(radeonkms, iic, 1, 1, 1); MODULE_DEPEND(radeonkms, iicbb, 1, 1, 1); MODULE_DEPEND(radeonkms, firmware, 1, 1, 1); -MODULE_PNP_INFO("U32:vendor;U32:device;P:#;D:@", vgapci, radeonkms, +MODULE_PNP_INFO("U32:vendor;U32:device;P:#;D:#", vgapci, radeonkms, pciidlist, sizeof(pciidlist[0]), nitems(pciidlist)); diff --git a/sys/dev/ed/if_ed_pci.c b/sys/dev/ed/if_ed_pci.c index d3bcfbb8ec6..d023ab42e66 100644 --- a/sys/dev/ed/if_ed_pci.c +++ b/sys/dev/ed/if_ed_pci.c @@ -145,6 +145,6 @@ static driver_t ed_pci_driver = { DRIVER_MODULE(ed, pci, ed_pci_driver, ed_devclass, 0, 0); MODULE_DEPEND(ed, pci, 1, 1, 1); MODULE_DEPEND(ed, ether, 1, 1, 1); -MODULE_PNP_INFO("W32:vendor/device;D:@", pci, ed, pci_ids, sizeof(pci_ids[0]), +MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ed, pci_ids, sizeof(pci_ids[0]), nitems(pci_ids) - 1); diff --git a/sys/dev/ntb/ntb_hw/ntb_hw_intel.c b/sys/dev/ntb/ntb_hw/ntb_hw_intel.c index 8060cceb22f..3bb57fcb71d 100644 --- a/sys/dev/ntb/ntb_hw/ntb_hw_intel.c +++ b/sys/dev/ntb/ntb_hw/ntb_hw_intel.c @@ -3119,5 +3119,5 @@ static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods, DRIVER_MODULE(ntb_hw_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL); MODULE_DEPEND(ntb_hw_intel, ntb, 1, 1, 1); MODULE_VERSION(ntb_hw_intel, 1); -MODULE_PNP_INFO("W32:vendor/device;D:@", pci, ntb_hw_intel, pci_ids, +MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ntb_hw_intel, pci_ids, sizeof(pci_ids[0]), nitems(pci_ids)); From 0f33cc3472ea98ab873f2b1250212449864a5085 Mon Sep 17 00:00:00 2001 From: Sevan Janiyan Date: Fri, 22 Dec 2017 21:54:39 +0000 Subject: [PATCH 092/115] Drop the NetBSD rcs tag introduced in r326868. Approved by: bcr (mentor) Differential Revision: https://reviews.freebsd.org/D13511 --- share/misc/bsd-family-tree | 1 - 1 file changed, 1 deletion(-) diff --git a/share/misc/bsd-family-tree b/share/misc/bsd-family-tree index a067e393c9c..b7e7becbafd 100644 --- a/share/misc/bsd-family-tree +++ b/share/misc/bsd-family-tree @@ -794,4 +794,3 @@ Copyright (c) 1997-2012 Wolfram Schneider URL: http://svnweb.freebsd.org/base/head/share/misc/bsd-family-tree $FreeBSD$ -$NetBSD: bsd-family-tree,v 1.62 2017/12/14 10:34:06 maya Exp $ From 7aea69e54a9df49786f5cf56fcd86dfb4c513b6f Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Fri, 22 Dec 2017 23:27:03 +0000 Subject: [PATCH 093/115] Remove mips MD atomic_load_64 and atomic_store_64. The only users of the functions were db_read_bytes() and db_write_bytes() ddb(4) interfaces. Replace the calls with direct reads and writes, which are automatically atomic on 64bits and n32. Note that removed assembler implementation for mips32 is not atomic anyway. Reviewed by: jhb Discussed with: imp Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D13586 --- sys/mips/include/atomic.h | 18 ---------- sys/mips/mips/db_interface.c | 8 ++--- sys/mips/mips/support.S | 67 ------------------------------------ 3 files changed, 4 insertions(+), 89 deletions(-) diff --git a/sys/mips/include/atomic.h b/sys/mips/include/atomic.h index 9838a6953ac..d299fb4a1bb 100644 --- a/sys/mips/include/atomic.h +++ b/sys/mips/include/atomic.h @@ -341,24 +341,6 @@ atomic_store_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\ ATOMIC_STORE_LOAD(32) ATOMIC_STORE_LOAD(64) -#if !defined(__mips_n64) && !defined(__mips_n32) -void atomic_store_64(__volatile uint64_t *, uint64_t); -uint64_t atomic_load_64(__volatile uint64_t *); -#elif defined (__mips_n32) -static __inline void -atomic_store_64(__volatile uint64_t *p, uint64_t v) -{ - *p = v; -} - -static __inline uint64_t -atomic_load_64(__volatile uint64_t *p) -{ - return (*p); -} -/* #else atomic_common.h definitions of atomic_load/store_64 are used */ -#endif - #undef ATOMIC_STORE_LOAD /* diff --git a/sys/mips/mips/db_interface.c b/sys/mips/mips/db_interface.c index 3426bf48a00..a9c84e30467 100644 --- a/sys/mips/mips/db_interface.c +++ b/sys/mips/mips/db_interface.c @@ -152,6 +152,7 @@ db_read_bytes(vm_offset_t addr, size_t size, char *data) /* * 'addr' could be a memory-mapped I/O address. Try to * do atomic load/store in unit of size requested. + * size == 8 is only atomic on 64bit or n32 kernel. */ if ((size == 2 || size == 4 || size == 8) && ((addr & (size -1)) == 0) && @@ -164,8 +165,7 @@ db_read_bytes(vm_offset_t addr, size_t size, char *data) *(uint32_t *)data = *(uint32_t *)addr; break; case 8: - *(uint64_t *)data = atomic_load_64( - (void *)addr); + *(uint64_t *)data = *(uint64_t *)addr; break; } } else { @@ -195,6 +195,7 @@ db_write_bytes(vm_offset_t addr, size_t size, char *data) /* * 'addr' could be a memory-mapped I/O address. Try to * do atomic load/store in unit of size requested. + * size == 8 is only atomic on 64bit or n32 kernel. */ if ((size == 2 || size == 4 || size == 8) && ((addr & (size -1)) == 0) && @@ -207,8 +208,7 @@ db_write_bytes(vm_offset_t addr, size_t size, char *data) *(uint32_t *)addr = *(uint32_t *)data; break; case 8: - atomic_store_64((uint64_t *)addr, - *(uint64_t *)data); + *(uint64_t *)addr = *(uint64_t *)data; break; } } else { diff --git a/sys/mips/mips/support.S b/sys/mips/mips/support.S index 44a5ccb3b92..4df367d9e7f 100644 --- a/sys/mips/mips/support.S +++ b/sys/mips/mips/support.S @@ -839,75 +839,8 @@ LEAF(atomic_subtract_8) nop END(atomic_subtract_8) -/* - * atomic 64-bit register read/write assembly language support routines. - */ - .set noreorder # Noreorder is default style! -#if !defined(__mips_n64) && !defined(__mips_n32) - /* - * I don't know if these routines have the right number of - * NOPs in it for all processors. XXX - * - * Maybe it would be better to just leave this undefined in that case. - * - * XXX These routines are not safe in the case of a TLB miss on a1 or - * a0 unless the trapframe is 64-bit, which it just isn't with O32. - * If we take any exception, not just an interrupt, the upper - * 32-bits will be clobbered. Use only N32 and N64 kernels if you - * want to use 64-bit registers while interrupts are enabled or - * with memory operations. Since this isn't even using load-linked - * and store-conditional, perhaps it should just use two registers - * instead, as is right and good with the O32 ABI. - */ -LEAF(atomic_store_64) - mfc0 t1, MIPS_COP_0_STATUS - and t2, t1, ~MIPS_SR_INT_IE - mtc0 t2, MIPS_COP_0_STATUS - nop - nop - nop - nop - ld t0, (a1) - nop - nop - sd t0, (a0) - nop - nop - mtc0 t1,MIPS_COP_0_STATUS - nop - nop - nop - nop - j ra - nop -END(atomic_store_64) - -LEAF(atomic_load_64) - mfc0 t1, MIPS_COP_0_STATUS - and t2, t1, ~MIPS_SR_INT_IE - mtc0 t2, MIPS_COP_0_STATUS - nop - nop - nop - nop - ld t0, (a0) - nop - nop - sd t0, (a1) - nop - nop - mtc0 t1,MIPS_COP_0_STATUS - nop - nop - nop - nop - j ra - nop -END(atomic_load_64) -#endif - #if defined(DDB) || defined(DEBUG) LEAF(kdbpeek) From 79554b40494d38cf003376f22d5f80550c5f2bfe Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Sat, 23 Dec 2017 04:50:52 +0000 Subject: [PATCH 094/115] The device tables end with a sentinel in iflib. Don't include the sentinel in the output. --- sys/net/iflib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/net/iflib.h b/sys/net/iflib.h index 367c1d2fe28..3730f0eadc8 100644 --- a/sys/net/iflib.h +++ b/sys/net/iflib.h @@ -176,7 +176,7 @@ typedef struct pci_vendor_info { #define IFLIB_PNP_DESCR "U32:vendor;U32:device;U32:subvendor;U32:subdevice;" \ "U32:revision;U32:class;D:#" #define IFLIB_PNP_INFO(b, u, t) \ - MODULE_PNP_INFO(IFLIB_PNP_DESCR, b, u, t, sizeof(t[0]), nitems(t)) + MODULE_PNP_INFO(IFLIB_PNP_DESCR, b, u, t, sizeof(t[0]), nitems(t) - 1) typedef struct if_txrx { int (*ift_txd_encap) (void *, if_pkt_info_t); From 60419a9c89ecb8ec9668db3aba3adbd4f4ea08b3 Mon Sep 17 00:00:00 2001 From: Eitan Adler Date: Sat, 23 Dec 2017 05:13:39 +0000 Subject: [PATCH 095/115] fopen.1: document truncation This documentation truncation similar to POSIX and glibc. PR: 202545 Reported by: intron@intron.ac --- lib/libc/stdio/fopen.3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libc/stdio/fopen.3 b/lib/libc/stdio/fopen.3 index f639f858d5b..c170f189dca 100644 --- a/lib/libc/stdio/fopen.3 +++ b/lib/libc/stdio/fopen.3 @@ -72,7 +72,7 @@ Fail if the file does not exist. .It Dq Li w Open for writing. The stream is positioned at the beginning of the file. -Create the file if it does not exist. +Truncate the file to zero length if it exists or create the file if it does not exist. .It Dq Li a Open for writing. The stream is positioned at the end of the file. From a914e889e3644d5b8a05410ab25ff398801c58dd Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Sat, 23 Dec 2017 05:32:20 +0000 Subject: [PATCH 096/115] These drivers have a sentinel at the end of the device list. Exclude it. --- sys/dev/drm2/i915/i915_drv.c | 2 +- sys/dev/drm2/radeon/radeon_drv.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/dev/drm2/i915/i915_drv.c b/sys/dev/drm2/i915/i915_drv.c index c293b1ab5b9..f0c8867501b 100644 --- a/sys/dev/drm2/i915/i915_drv.c +++ b/sys/dev/drm2/i915/i915_drv.c @@ -1237,7 +1237,7 @@ MODULE_DEPEND(i915kms, iicbus, 1, 1, 1); MODULE_DEPEND(i915kms, iic, 1, 1, 1); MODULE_DEPEND(i915kms, iicbb, 1, 1, 1); MODULE_PNP_INFO("U32:vendor;U32:device;P:#;D:#", vgapci, i915, pciidlist, - sizeof(pciidlist[0]), nitems(pciidlist)); + sizeof(pciidlist[0]), nitems(pciidlist) - 1); /* We give fast paths for the really cool registers */ #define NEEDS_FORCE_WAKE(dev_priv, reg) \ diff --git a/sys/dev/drm2/radeon/radeon_drv.c b/sys/dev/drm2/radeon/radeon_drv.c index 9574bec4c72..bf3dd063c17 100644 --- a/sys/dev/drm2/radeon/radeon_drv.c +++ b/sys/dev/drm2/radeon/radeon_drv.c @@ -402,4 +402,4 @@ MODULE_DEPEND(radeonkms, iic, 1, 1, 1); MODULE_DEPEND(radeonkms, iicbb, 1, 1, 1); MODULE_DEPEND(radeonkms, firmware, 1, 1, 1); MODULE_PNP_INFO("U32:vendor;U32:device;P:#;D:#", vgapci, radeonkms, - pciidlist, sizeof(pciidlist[0]), nitems(pciidlist)); + pciidlist, sizeof(pciidlist[0]), nitems(pciidlist) - 1); From 75d037476586d8d42eeea701166b20d847c48455 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Sat, 23 Dec 2017 06:11:19 +0000 Subject: [PATCH 097/115] Expand cryptic comment with inforation I've learned in the mean time about CIS3/CIS4, including studies I've done on my large collection of PC Cards bought off e-bay over the years since the original entry as well as conversations I've had at conferences. --- sys/dev/pccard/pccardvar.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/sys/dev/pccard/pccardvar.h b/sys/dev/pccard/pccardvar.h index ab2fdd2dccd..c6fc94245a0 100644 --- a/sys/dev/pccard/pccardvar.h +++ b/sys/dev/pccard/pccardvar.h @@ -88,10 +88,18 @@ struct pccard_product { }; /** - * Note: There's no cis3 or cis4 reported for NOMATCH / pnpinfo events for pccard - * It's unclear if we actually need that for automatic loading or not. These stirngs - * are informative, according to the standard, but I have a dim memory of using these - * strings to match things, though I can't find the example right now. + * Note: There's no cis3 or cis4 reported for NOMATCH / pnpinfo events for + * pccard It's unclear if we actually need that for automatic loading or + * not. These stirngs are informative, according to the standard. Some Linux + * drivers match on them, for example. However, FreeBSD's hardware probing is a + * little different than Linux so it turns out we don't need them. Some cards + * use CIS3 or CIS4 for a textual representation of the MAC address. In short, + * they aren't needed even though our friends in Linux have them. It is my + * belief that all the entries in Linux don't actually need to be separate there + * either, but it's hard to eliminate them and retest on old, possibly rare, + * hardware so they persist. Despite years of collecting ~300 different PC Cards + * off E-Bay, I've not been able to find any that need CIS3/CIS4 to select which + * device attaches. */ #define PCCARD_PNP_DESCR "D:#;V32:manufacturer;V32:product;Z:cisvendor;Z:cisproduct;" #define PCCARD_PNP_INFO(t) \ From 5fe4723c701cae7118f3293d9d1f173dcbb062a2 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Sat, 23 Dec 2017 06:49:27 +0000 Subject: [PATCH 098/115] Create a new ISA_PNP_INFO macro. Use this macro every where we have ISA PNP card support (replace by hand version in if_ed). Move module declarations to the end of some files. Fix PCCARD_PNP_INFO to use nitems(). Remove some stale comments about pc98, turns out the comment was simply wrong. --- sys/dev/aha/aha_isa.c | 1 + sys/dev/aic/aic_isa.c | 1 + sys/dev/an/if_an_isa.c | 1 + sys/dev/atkbdc/atkbdc_isa.c | 1 + sys/dev/cs/if_cs_isa.c | 1 + sys/dev/ed/if_ed_isa.c | 4 +--- sys/dev/ep/if_ep_isa.c | 1 + sys/dev/ex/if_ex_isa.c | 5 +++-- sys/dev/fdc/fdc_isa.c | 1 + sys/dev/fe/if_fe_isa.c | 5 ++--- sys/dev/joy/joy_isa.c | 1 + sys/dev/le/if_le_isa.c | 9 +++++---- sys/dev/mse/mse_isa.c | 5 +++-- sys/dev/pccard/pccardvar.h | 2 +- sys/dev/pccbb/pccbb_isa.c | 2 +- sys/dev/ppc/ppc_isa.c | 1 + sys/dev/sbni/if_sbni_isa.c | 6 +++--- sys/dev/sio/sio_isa.c | 1 + sys/dev/uart/uart_bus_isa.c | 1 + sys/isa/isavar.h | 4 ++++ 20 files changed, 34 insertions(+), 19 deletions(-) diff --git a/sys/dev/aha/aha_isa.c b/sys/dev/aha/aha_isa.c index 0ada1c7a5a5..4fd4ee601f7 100644 --- a/sys/dev/aha/aha_isa.c +++ b/sys/dev/aha/aha_isa.c @@ -362,3 +362,4 @@ static devclass_t aha_devclass; DRIVER_MODULE(aha, isa, aha_isa_driver, aha_devclass, 0, 0); MODULE_DEPEND(aha, isa, 1, 1, 1); +ISA_PNP_INFO(aha_ids); diff --git a/sys/dev/aic/aic_isa.c b/sys/dev/aic/aic_isa.c index 3258269acde..b4161591e19 100644 --- a/sys/dev/aic/aic_isa.c +++ b/sys/dev/aic/aic_isa.c @@ -242,3 +242,4 @@ extern devclass_t aic_devclass; MODULE_DEPEND(aic, cam, 1,1,1); DRIVER_MODULE(aic, isa, aic_isa_driver, aic_devclass, 0, 0); +ISA_PNP_INFO(aic_ids); diff --git a/sys/dev/an/if_an_isa.c b/sys/dev/an/if_an_isa.c index a6d3c81d52d..627bfaa04e6 100644 --- a/sys/dev/an/if_an_isa.c +++ b/sys/dev/an/if_an_isa.c @@ -150,3 +150,4 @@ static devclass_t an_isa_devclass; DRIVER_MODULE(an, isa, an_isa_driver, an_isa_devclass, 0, 0); MODULE_DEPEND(an, isa, 1, 1, 1); MODULE_DEPEND(an, wlan, 1, 1, 1); +ISA_PNP_INFO(an_ids); diff --git a/sys/dev/atkbdc/atkbdc_isa.c b/sys/dev/atkbdc/atkbdc_isa.c index d83d7ac8bb9..d06d449eabe 100644 --- a/sys/dev/atkbdc/atkbdc_isa.c +++ b/sys/dev/atkbdc/atkbdc_isa.c @@ -322,3 +322,4 @@ atkbdc_isa_release_resource(device_t dev, device_t child, int type, int rid, DRIVER_MODULE(atkbdc, isa, atkbdc_isa_driver, atkbdc_devclass, 0, 0); DRIVER_MODULE(atkbdc, acpi, atkbdc_isa_driver, atkbdc_devclass, 0, 0); +ISA_PNP_INFO(atkbdc_ids); diff --git a/sys/dev/cs/if_cs_isa.c b/sys/dev/cs/if_cs_isa.c index 55446238875..4b1e5d7ba0f 100644 --- a/sys/dev/cs/if_cs_isa.c +++ b/sys/dev/cs/if_cs_isa.c @@ -120,3 +120,4 @@ extern devclass_t cs_devclass; DRIVER_MODULE(cs, isa, cs_isa_driver, cs_devclass, 0, 0); MODULE_DEPEND(cs, isa, 1, 1, 1); MODULE_DEPEND(cs, ether, 1, 1, 1); +ISA_PNP_INFO(cs_ids); diff --git a/sys/dev/ed/if_ed_isa.c b/sys/dev/ed/if_ed_isa.c index ac36f4060b8..7cf15817a0b 100644 --- a/sys/dev/ed/if_ed_isa.c +++ b/sys/dev/ed/if_ed_isa.c @@ -203,6 +203,4 @@ static driver_t ed_isa_driver = { DRIVER_MODULE(ed, isa, ed_isa_driver, ed_devclass, 0, 0); MODULE_DEPEND(ed, isa, 1, 1, 1); MODULE_DEPEND(ed, ether, 1, 1, 1); -MODULE_PNP_INFO("E:pnpid;", isa, ed, ed_ids, sizeof(ed_ids[0]), - nitems(ed_ids) - 1); - +ISA_PNP_INFO(ed_ids); diff --git a/sys/dev/ep/if_ep_isa.c b/sys/dev/ep/if_ep_isa.c index c904e609e6f..6e858edf85f 100644 --- a/sys/dev/ep/if_ep_isa.c +++ b/sys/dev/ep/if_ep_isa.c @@ -395,3 +395,4 @@ DRIVER_MODULE(ep, isa, ep_isa_driver, ep_devclass, 0, 0); #ifdef __i386__ MODULE_DEPEND(ep, elink, 1, 1, 1); #endif +ISA_PNP_INFO(ep_ids); diff --git a/sys/dev/ex/if_ex_isa.c b/sys/dev/ex/if_ex_isa.c index ab572fff1ef..2b0cd0a9f9d 100644 --- a/sys/dev/ex/if_ex_isa.c +++ b/sys/dev/ex/if_ex_isa.c @@ -82,8 +82,6 @@ static driver_t ex_isa_driver = { sizeof(struct ex_softc), }; -DRIVER_MODULE(ex, isa, ex_isa_driver, ex_devclass, 0, 0); - static struct isa_pnp_id ex_ids[] = { { 0x3110d425, NULL }, /* INT1031 */ { 0x3010d425, NULL }, /* INT1030 */ @@ -337,3 +335,6 @@ ex_look_for_card(struct ex_softc *sc) return((count2 & Counter_bits) == ((count1 + 0xc0) & Counter_bits)); } + +DRIVER_MODULE(ex, isa, ex_isa_driver, ex_devclass, 0, 0); +ISA_PNP_INFO(ex_ids); diff --git a/sys/dev/fdc/fdc_isa.c b/sys/dev/fdc/fdc_isa.c index 55d9c85ca9c..ab03c78a691 100644 --- a/sys/dev/fdc/fdc_isa.c +++ b/sys/dev/fdc/fdc_isa.c @@ -224,3 +224,4 @@ static driver_t fdc_driver = { }; DRIVER_MODULE(fdc, isa, fdc_driver, fdc_devclass, 0, 0); +ISA_PNP_INFO(fdc_ids); diff --git a/sys/dev/fe/if_fe_isa.c b/sys/dev/fe/if_fe_isa.c index 092f30eef6d..c30c9340fc1 100644 --- a/sys/dev/fe/if_fe_isa.c +++ b/sys/dev/fe/if_fe_isa.c @@ -68,9 +68,6 @@ static driver_t fe_isa_driver = { sizeof (struct fe_softc) }; -DRIVER_MODULE(fe, isa, fe_isa_driver, fe_devclass, 0, 0); - - static int fe_probe_ssi(device_t); static int fe_probe_jli(device_t); static int fe_probe_fmv(device_t); @@ -1062,3 +1059,5 @@ fe_probe_ubn(device_t dev) return 0; } + +DRIVER_MODULE(fe, isa, fe_isa_driver, fe_devclass, 0, 0); diff --git a/sys/dev/joy/joy_isa.c b/sys/dev/joy/joy_isa.c index 483279e6914..c09c8444c9c 100644 --- a/sys/dev/joy/joy_isa.c +++ b/sys/dev/joy/joy_isa.c @@ -85,3 +85,4 @@ static driver_t joy_isa_driver = { DRIVER_MODULE(joy, isa, joy_isa_driver, joy_devclass, 0, 0); DRIVER_MODULE(joy, acpi, joy_isa_driver, joy_devclass, 0, 0); +ISA_PNP_INFO(joy_ids); diff --git a/sys/dev/le/if_le_isa.c b/sys/dev/le/if_le_isa.c index ba0fc098d1f..39d5e111bdc 100644 --- a/sys/dev/le/if_le_isa.c +++ b/sys/dev/le/if_le_isa.c @@ -135,10 +135,6 @@ static device_method_t le_isa_methods[] = { { 0, 0 } }; -DEFINE_CLASS_0(le, le_isa_driver, le_isa_methods, sizeof(struct le_isa_softc)); -DRIVER_MODULE(le, isa, le_isa_driver, le_devclass, 0, 0); -MODULE_DEPEND(le, ether, 1, 1, 1); - struct le_isa_param { const char *name; u_long iosize; @@ -496,3 +492,8 @@ le_isa_resume(device_t dev) return (0); } + +DEFINE_CLASS_0(le, le_isa_driver, le_isa_methods, sizeof(struct le_isa_softc)); +DRIVER_MODULE(le, isa, le_isa_driver, le_devclass, 0, 0); +MODULE_DEPEND(le, ether, 1, 1, 1); +ISA_PNP_INFO(le_isa_ids); diff --git a/sys/dev/mse/mse_isa.c b/sys/dev/mse/mse_isa.c index ca4c3a8fe51..5013926c3eb 100644 --- a/sys/dev/mse/mse_isa.c +++ b/sys/dev/mse/mse_isa.c @@ -107,8 +107,6 @@ static driver_t mse_driver = { sizeof(mse_softc_t), }; -DRIVER_MODULE(mse, isa, mse_driver, mse_devclass, 0, 0); - static struct isa_pnp_id mse_ids[] = { { 0x000fd041, "Bus mouse" }, /* PNP0F00 */ { 0x020fd041, "InPort mouse" }, /* PNP0F02 */ @@ -390,3 +388,6 @@ mse_getati(struct resource *port, int *dx, int *dy, int *but) bus_write_1(port, MSE_PORTA, MSE_INPORT_MODE); bus_write_1(port, MSE_PORTB, MSE_INPORT_INTREN); } + +DRIVER_MODULE(mse, isa, mse_driver, mse_devclass, 0, 0); +ISA_PNP_INFO(mse_ids); diff --git a/sys/dev/pccard/pccardvar.h b/sys/dev/pccard/pccardvar.h index c6fc94245a0..573c5ec3f4a 100644 --- a/sys/dev/pccard/pccardvar.h +++ b/sys/dev/pccard/pccardvar.h @@ -103,7 +103,7 @@ struct pccard_product { */ #define PCCARD_PNP_DESCR "D:#;V32:manufacturer;V32:product;Z:cisvendor;Z:cisproduct;" #define PCCARD_PNP_INFO(t) \ - MODULE_PNP_INFO(PCCARD_PNP_DESCR, pccard, t, t, sizeof(t[0]), sizeof(t) / sizeof(t[0]) - 1); \ + MODULE_PNP_INFO(PCCARD_PNP_DESCR, pccard, t, t, sizeof(t[0]), nitems(t) - 1); \ typedef int (*pccard_product_match_fn) (device_t dev, const struct pccard_product *ent, int vpfmatch); diff --git a/sys/dev/pccbb/pccbb_isa.c b/sys/dev/pccbb/pccbb_isa.c index 0b139bf75f2..cb50adf9a23 100644 --- a/sys/dev/pccbb/pccbb_isa.c +++ b/sys/dev/pccbb/pccbb_isa.c @@ -106,7 +106,6 @@ SYSCTL_INT(_hw_pcic, OID_AUTO, pd6722_vsense, CTLFLAG_RDTUN, #define DPRINTF(x) do { if (cbb_debug) printf x; } while (0) #define DEVPRINTF(x) do { if (cbb_debug) device_printf x; } while (0) -/* XXX Not sure that PNP0E03 should be claimed, except maybe on pc98 */ static struct isa_pnp_id pcic_ids[] = { {EXCA_PNP_ACTIONTEC, NULL}, /* AEI0218 */ {EXCA_PNP_IBM3765, NULL}, /* IBM3765 */ @@ -255,3 +254,4 @@ static driver_t cbb_isa_driver = { DRIVER_MODULE(cbb, isa, cbb_isa_driver, cbb_devclass, 0, 0); MODULE_DEPEND(cbb, exca, 1, 1, 1); +ISA_PNP_INFO(pcic_ids); diff --git a/sys/dev/ppc/ppc_isa.c b/sys/dev/ppc/ppc_isa.c index 96bfc9bd18a..fb23a8104ee 100644 --- a/sys/dev/ppc/ppc_isa.c +++ b/sys/dev/ppc/ppc_isa.c @@ -274,3 +274,4 @@ error: } DRIVER_MODULE(ppc, isa, ppc_isa_driver, ppc_devclass, 0, 0); +ISA_PNP_INFO(lpc_ids); diff --git a/sys/dev/sbni/if_sbni_isa.c b/sys/dev/sbni/if_sbni_isa.c index f1db8b1a984..9a922b8ec24 100644 --- a/sys/dev/sbni/if_sbni_isa.c +++ b/sys/dev/sbni/if_sbni_isa.c @@ -71,9 +71,6 @@ static struct isa_pnp_id sbni_ids[] = { { 0, NULL } /* we have no pnp sbni cards atm. */ }; -DRIVER_MODULE(sbni, isa, sbni_isa_driver, sbni_isa_devclass, 0, 0); -MODULE_DEPEND(sbni, isa, 1, 1, 1); - static int sbni_probe_isa(device_t dev) { @@ -166,3 +163,6 @@ sbni_attach_isa(device_t dev) return (0); } + +DRIVER_MODULE(sbni, isa, sbni_isa_driver, sbni_isa_devclass, 0, 0); +MODULE_DEPEND(sbni, isa, 1, 1, 1); diff --git a/sys/dev/sio/sio_isa.c b/sys/dev/sio/sio_isa.c index 8c9fae89182..bef2a9ab9fa 100644 --- a/sys/dev/sio/sio_isa.c +++ b/sys/dev/sio/sio_isa.c @@ -175,3 +175,4 @@ DRIVER_MODULE(sio, isa, sio_isa_driver, sio_devclass, 0, 0); #ifndef COM_NO_ACPI DRIVER_MODULE(sio, acpi, sio_isa_driver, sio_devclass, 0, 0); #endif +ISA_PNP_INFO(sio_ids); diff --git a/sys/dev/uart/uart_bus_isa.c b/sys/dev/uart/uart_bus_isa.c index 8d80f88a910..759d4f92a9a 100644 --- a/sys/dev/uart/uart_bus_isa.c +++ b/sys/dev/uart/uart_bus_isa.c @@ -172,3 +172,4 @@ uart_isa_probe(device_t dev) } DRIVER_MODULE(uart, isa, uart_isa_driver, uart_devclass, 0, 0); +ISA_PNP_INFO(isa_ns8250_ids); diff --git a/sys/isa/isavar.h b/sys/isa/isavar.h index cc3ce95b41d..329db19879a 100644 --- a/sys/isa/isavar.h +++ b/sys/isa/isavar.h @@ -140,6 +140,10 @@ enum isa_device_ivars { #define ISACFGATTR_DYNAMIC (1 << 1) /* dynamic configuration */ #define ISACFGATTR_HINTS (1 << 3) /* source of config is hints */ +#define ISA_PNP_DESCR "E:pnpid;D:#" +#define ISA_PNP_INFO(t) \ + MODULE_PNP_INFO(ISA_PNP_DESCR, pccard, t, t, sizeof(t[0]), nitems(t) - 1); \ + /* * Simplified accessors for isa devices */ From 04291531bc73ec999ab40791cad8b518c3e0973a Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Sat, 23 Dec 2017 07:02:45 +0000 Subject: [PATCH 099/115] Fix cut-and-paste error s/pccard/isa/ --- sys/isa/isavar.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/isa/isavar.h b/sys/isa/isavar.h index 329db19879a..1a3e661b67a 100644 --- a/sys/isa/isavar.h +++ b/sys/isa/isavar.h @@ -142,7 +142,7 @@ enum isa_device_ivars { #define ISA_PNP_DESCR "E:pnpid;D:#" #define ISA_PNP_INFO(t) \ - MODULE_PNP_INFO(ISA_PNP_DESCR, pccard, t, t, sizeof(t[0]), nitems(t) - 1); \ + MODULE_PNP_INFO(ISA_PNP_DESCR, isa, t, t, sizeof(t[0]), nitems(t) - 1); \ /* * Simplified accessors for isa devices From 4d68f3daa0e9153dc4e1ccaa74104e9d52b85aa7 Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Sat, 23 Dec 2017 14:27:42 +0000 Subject: [PATCH 100/115] syscon: Introduce kobj and split out fdt bits Allow more flexibility by kobj'ifying syscon and splitting out fdt specific bits in preparation of a move to the extres framework. The generic fdt driver has been moved to syscon_generic.c and the fdt requirement has been removed from the syscon interface, as is common to the extres framework. Reviewed by: strejda Differential Revision: https://reviews.freebsd.org/D13521 --- sys/conf/files | 5 +- sys/dev/syscon/syscon.c | 2 +- sys/dev/syscon/syscon.h | 77 ++++++++++++ sys/dev/syscon/syscon_generic.c | 211 ++++++++++++++++++++++++++++++++ sys/dev/syscon/syscon_if.m | 25 ++-- 5 files changed, 309 insertions(+), 11 deletions(-) create mode 100644 sys/dev/syscon/syscon.h create mode 100644 sys/dev/syscon/syscon_generic.c diff --git a/sys/conf/files b/sys/conf/files index 1f62c7a210e..968957d20da 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3107,8 +3107,9 @@ dev/stg/tmc18c30_subr.c optional stg dev/stge/if_stge.c optional stge dev/sym/sym_hipd.c optional sym \ dependency "$S/dev/sym/sym_{conf,defs}.h" -dev/syscon/syscon.c optional fdt syscon -dev/syscon/syscon_if.m optional fdt syscon +dev/syscon/syscon.c optional syscon +dev/syscon/syscon_generic.c optional fdt syscon +dev/syscon/syscon_if.m optional syscon dev/syscons/blank/blank_saver.c optional blank_saver dev/syscons/daemon/daemon_saver.c optional daemon_saver dev/syscons/dragon/dragon_saver.c optional dragon_saver diff --git a/sys/dev/syscon/syscon.c b/sys/dev/syscon/syscon.c index dbe3e38482f..8bb3e14b700 100644 --- a/sys/dev/syscon/syscon.c +++ b/sys/dev/syscon/syscon.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2015 Michal Meloun + * Copyright (c) 2017 Kyle Evans * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/syscon/syscon.h b/sys/dev/syscon/syscon.h new file mode 100644 index 00000000000..7825dfd844a --- /dev/null +++ b/sys/dev/syscon/syscon.h @@ -0,0 +1,77 @@ +/*- + * Copyright 2017 Kyle Evans + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef DEV_SYSCON_H +#define DEV_SYSCON_H + +#include "opt_platform.h" + +#include +#include +#ifdef FDT +#include +#endif + +struct syscon { + KOBJ_FIELDS; + + TAILQ_ENTRY(syscon) syscon_link; /* Global list entry */ + + device_t pdev; /* provider device */ +#ifdef FDT + phandle_t ofw_node; /* OFW node for syscon */ +#endif + void *softc; /* provider softc */ +}; + +/* + * Shorthands for constructing method tables. + */ +#define SYSCONMETHOD KOBJMETHOD +#define SYSCONMETHOD_END KOBJMETHOD_END +#define syscon_method_t kobj_method_t +#define syscon_class_t kobj_class_t +DECLARE_CLASS(syscon_class); + +void *syscon_get_softc(struct syscon *syscon); + +/* + * Provider interface + */ +struct syscon *syscon_create(device_t pdev, syscon_class_t syscon_class); +struct syscon *syscon_register(struct syscon *syscon); +int syscon_unregister(struct syscon *syscon); + +#ifdef FDT +struct syscon *syscon_create_ofw_node(device_t pdev, + syscon_class_t syscon_class, phandle_t node); +phandle_t syscon_get_ofw_node(struct syscon *syscon); +int syscon_get_by_ofw_property(device_t consumer, phandle_t node, char *name, + struct syscon **syscon); +#endif + +#endif /* DEV_SYSCON_H */ diff --git a/sys/dev/syscon/syscon_generic.c b/sys/dev/syscon/syscon_generic.c new file mode 100644 index 00000000000..04b0343f181 --- /dev/null +++ b/sys/dev/syscon/syscon_generic.c @@ -0,0 +1,211 @@ +/*- + * Copyright (c) 2015 Michal Meloun + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * This is a generic syscon driver, whose purpose is to provide access to + * various unrelated bits packed in a single register space. It is usually used + * as a fallback to more specific driver, but works well enough for simple + * access. + */ + +#include +__FBSDID("$FreeBSD$"); +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include "syscon_if.h" +#include "syscon.h" + +MALLOC_DECLARE(M_SYSCON); + +static uint32_t syscon_generic_read_4(struct syscon *syscon, bus_size_t offset); +static int syscon_generic_write_4(struct syscon *syscon, bus_size_t offset, + uint32_t val); +static int syscon_generic_modify_4(struct syscon *syscon, bus_size_t offset, + uint32_t clear_bits, uint32_t set_bits); + +/* + * Generic syscon driver (FDT) + */ +struct syscon_generic_softc { + device_t dev; + struct syscon *syscon; + struct resource *mem_res; + struct mtx mtx; +}; + +static struct ofw_compat_data compat_data[] = { + {"syscon", 1}, + {NULL, 0} +}; + +#define SYSCON_LOCK(_sc) mtx_lock(&(_sc)->mtx) +#define SYSCON_UNLOCK(_sc) mtx_unlock(&(_sc)->mtx) +#define SYSCON_LOCK_INIT(_sc) mtx_init(&(_sc)->mtx, \ + device_get_nameunit((_sc)->dev), "syscon", MTX_DEF) +#define SYSCON_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->mtx); +#define SYSCON_ASSERT_LOCKED(_sc) mtx_assert(&(_sc)->mtx, MA_OWNED); +#define SYSCON_ASSERT_UNLOCKED(_sc) mtx_assert(&(_sc)->mtx, MA_NOTOWNED); + +static syscon_method_t syscon_generic_methods[] = { + SYSCONMETHOD(syscon_read_4, syscon_generic_read_4), + SYSCONMETHOD(syscon_write_4, syscon_generic_write_4), + SYSCONMETHOD(syscon_modify_4, syscon_generic_modify_4), + + SYSCONMETHOD_END +}; +DEFINE_CLASS_1(syscon_generic, syscon_generic_class, syscon_generic_methods, + 0, syscon_class); + +static uint32_t +syscon_generic_read_4(struct syscon *syscon, bus_size_t offset) +{ + struct syscon_generic_softc *sc; + uint32_t val; + + sc = device_get_softc(syscon->pdev); + + SYSCON_LOCK(sc); + val = bus_read_4(sc->mem_res, offset); + SYSCON_UNLOCK(sc); + return (val); +} + +static int +syscon_generic_write_4(struct syscon *syscon, bus_size_t offset, uint32_t val) +{ + struct syscon_generic_softc *sc; + + sc = device_get_softc(syscon->pdev); + + SYSCON_LOCK(sc); + bus_write_4(sc->mem_res, offset, val); + SYSCON_UNLOCK(sc); + return (0); +} + +static int +syscon_generic_modify_4(struct syscon *syscon, bus_size_t offset, + uint32_t clear_bits, uint32_t set_bits) +{ + struct syscon_generic_softc *sc; + uint32_t val; + + sc = device_get_softc(syscon->pdev); + + SYSCON_LOCK(sc); + val = bus_read_4(sc->mem_res, offset); + val &= ~clear_bits; + val |= set_bits; + bus_write_4(sc->mem_res, offset, val); + SYSCON_UNLOCK(sc); + return (0); +} + +static int +syscon_generic_probe(device_t dev) +{ + + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0) + return (ENXIO); + + device_set_desc(dev, "syscon"); + return (BUS_PROBE_GENERIC); +} + +static int +syscon_generic_attach(device_t dev) +{ + struct syscon_generic_softc *sc; + int rid; + + sc = device_get_softc(dev); + sc->dev = dev; + + rid = 0; + sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, + RF_ACTIVE); + if (sc->mem_res == NULL) { + device_printf(dev, "Cannot allocate memory resource\n"); + return (ENXIO); + } + + SYSCON_LOCK_INIT(sc); + sc->syscon = syscon_create_ofw_node(dev, &syscon_generic_class, + ofw_bus_get_node(dev)); + if (sc->syscon == NULL) { + device_printf(dev, "Failed to create/register syscon\n"); + return (ENXIO); + } + return (0); +} + +static int +syscon_generic_detach(device_t dev) +{ + struct syscon_generic_softc *sc; + + sc = device_get_softc(dev); + + if (sc->syscon != NULL) { + syscon_unregister(sc->syscon); + free(sc->syscon, M_SYSCON); + } + + SYSCON_LOCK_DESTROY(sc); + + if (sc->mem_res != NULL) + bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->mem_res); + return (0); +} + +static device_method_t syscon_generic_dmethods[] = { + /* Device interface */ + DEVMETHOD(device_probe, syscon_generic_probe), + DEVMETHOD(device_attach, syscon_generic_attach), + DEVMETHOD(device_detach, syscon_generic_detach), + + DEVMETHOD_END +}; + +DEFINE_CLASS_0(syscon_generic, syscon_generic_driver, syscon_generic_dmethods, + sizeof(struct syscon_generic_softc)); +static devclass_t syscon_generic_devclass; +EARLY_DRIVER_MODULE(syscon_generic, simplebus, syscon_generic_driver, + syscon_generic_devclass, 0, 0, BUS_PASS_BUS + BUS_PASS_ORDER_LATE); +MODULE_VERSION(syscon_generic, 1); diff --git a/sys/dev/syscon/syscon_if.m b/sys/dev/syscon/syscon_if.m index 04cf1707765..b394927842d 100644 --- a/sys/dev/syscon/syscon_if.m +++ b/sys/dev/syscon/syscon_if.m @@ -30,25 +30,34 @@ INTERFACE syscon; +HEADER { + struct syscon; +} + +METHOD int init { + struct syscon *syscon; +}; + +METHOD int uninit { + struct syscon *syscon; +}; + /** * Accessor functions for syscon register space */ METHOD uint32_t read_4 { - device_t dev; - device_t consumer; + struct syscon *syscon; bus_size_t offset; }; -METHOD void write_4 { - device_t dev; - device_t consumer; +METHOD int write_4 { + struct syscon *syscon; bus_size_t offset; uint32_t val; }; -METHOD void modify_4 { - device_t dev; - device_t consumer; +METHOD int modify_4 { + struct syscon *syscon; bus_size_t offset; uint32_t clear_bits; uint32_t set_bits; From cd04523f0edf38e57f54ded357ffa5138db00ed7 Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Sat, 23 Dec 2017 14:30:44 +0000 Subject: [PATCH 101/115] Move syscon into extres framework This should help reduce confusion between syscon/syscons a little bit. syscon is a resource generally modeled by FDT platforms, and not to be confused with syscons. --- sys/conf/files | 6 +++--- sys/dev/{ => extres}/syscon/syscon.c | 0 sys/dev/{ => extres}/syscon/syscon.h | 0 sys/dev/{ => extres}/syscon/syscon_generic.c | 0 sys/dev/{ => extres}/syscon/syscon_if.m | 0 5 files changed, 3 insertions(+), 3 deletions(-) rename sys/dev/{ => extres}/syscon/syscon.c (100%) rename sys/dev/{ => extres}/syscon/syscon.h (100%) rename sys/dev/{ => extres}/syscon/syscon_generic.c (100%) rename sys/dev/{ => extres}/syscon/syscon_if.m (100%) diff --git a/sys/conf/files b/sys/conf/files index 968957d20da..c6f57082743 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1723,6 +1723,9 @@ dev/extres/regulator/regnode_if.m optional ext_resources regulator fdt dev/extres/regulator/regulator.c optional ext_resources regulator fdt dev/extres/regulator/regulator_bus.c optional ext_resources regulator fdt dev/extres/regulator/regulator_fixed.c optional ext_resources regulator fdt +dev/extres/syscon/syscon.c optional ext_resources syscon +dev/extres/syscon/syscon_generic.c optional ext_resources syscon fdt +dev/extres/syscon/syscon_if.m optional ext_resources syscon dev/fb/fbd.c optional fbd | vt dev/fb/fb_if.m standard dev/fb/splash.c optional sc splash @@ -3107,9 +3110,6 @@ dev/stg/tmc18c30_subr.c optional stg dev/stge/if_stge.c optional stge dev/sym/sym_hipd.c optional sym \ dependency "$S/dev/sym/sym_{conf,defs}.h" -dev/syscon/syscon.c optional syscon -dev/syscon/syscon_generic.c optional fdt syscon -dev/syscon/syscon_if.m optional syscon dev/syscons/blank/blank_saver.c optional blank_saver dev/syscons/daemon/daemon_saver.c optional daemon_saver dev/syscons/dragon/dragon_saver.c optional dragon_saver diff --git a/sys/dev/syscon/syscon.c b/sys/dev/extres/syscon/syscon.c similarity index 100% rename from sys/dev/syscon/syscon.c rename to sys/dev/extres/syscon/syscon.c diff --git a/sys/dev/syscon/syscon.h b/sys/dev/extres/syscon/syscon.h similarity index 100% rename from sys/dev/syscon/syscon.h rename to sys/dev/extres/syscon/syscon.h diff --git a/sys/dev/syscon/syscon_generic.c b/sys/dev/extres/syscon/syscon_generic.c similarity index 100% rename from sys/dev/syscon/syscon_generic.c rename to sys/dev/extres/syscon/syscon_generic.c diff --git a/sys/dev/syscon/syscon_if.m b/sys/dev/extres/syscon/syscon_if.m similarity index 100% rename from sys/dev/syscon/syscon_if.m rename to sys/dev/extres/syscon/syscon_if.m From 3395dd6eb8f69fd45aec7238eba75476f2eb050c Mon Sep 17 00:00:00 2001 From: Alexander Kabaev Date: Sat, 23 Dec 2017 16:23:58 +0000 Subject: [PATCH 102/115] Do not double free the memory in if_clone. if_clone_attach function will drop the reference on failure which will free the if_clone structure. No need to do it second time. Reviewed by: glebius, ae Differential Revision: https://reviews.freebsd.org/D10386 --- sys/net/if_clone.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sys/net/if_clone.c b/sys/net/if_clone.c index dbb7bd6d28a..c455357cae8 100644 --- a/sys/net/if_clone.c +++ b/sys/net/if_clone.c @@ -387,10 +387,8 @@ if_clone_advanced(const char *name, u_int maxunit, ifc_match_t match, ifc->ifc_create = create; ifc->ifc_destroy = destroy; - if (if_clone_attach(ifc) != 0) { - if_clone_free(ifc); + if (if_clone_attach(ifc) != 0) return (NULL); - } EVENTHANDLER_INVOKE(if_clone_event, ifc); @@ -410,10 +408,8 @@ if_clone_simple(const char *name, ifcs_create_t create, ifcs_destroy_t destroy, ifc->ifcs_destroy = destroy; ifc->ifcs_minifs = minifs; - if (if_clone_attach(ifc) != 0) { - if_clone_free(ifc); + if (if_clone_attach(ifc) != 0) return (NULL); - } for (unit = 0; unit < minifs; unit++) { char name[IFNAMSIZ]; From ce4ab99d82111e683beca99631ae5a90fda26f3c Mon Sep 17 00:00:00 2001 From: Alexander Kabaev Date: Sat, 23 Dec 2017 16:24:00 +0000 Subject: [PATCH 103/115] Remove some trailing whitespace. Reviewed by: glebius, ae Differential Revision: https://reviews.freebsd.org/D10386 --- sys/net/if_clone.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/net/if_clone.c b/sys/net/if_clone.c index c455357cae8..23381267650 100644 --- a/sys/net/if_clone.c +++ b/sys/net/if_clone.c @@ -355,7 +355,7 @@ if_clone_alloc(const char *name, int maxunit) return (ifc); } - + static int if_clone_attach(struct if_clone *ifc) { @@ -446,7 +446,7 @@ if_clone_detach(struct if_clone *ifc) /* destroy all interfaces for this cloner */ while (!LIST_EMPTY(&ifc->ifc_iflist)) if_clone_destroyif(ifc, LIST_FIRST(&ifc->ifc_iflist)); - + IF_CLONE_REMREF(ifc); } From 5f943cca65ce394da89b66ab503accc3bad98c1e Mon Sep 17 00:00:00 2001 From: Alexander Kabaev Date: Sat, 23 Dec 2017 16:24:02 +0000 Subject: [PATCH 104/115] Remove dead initialization of the inode pointer. The pointer gets initialized again later in the code. This also improves code style(9). --- sys/ufs/ffs/ffs_snapshot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c index 157f67861f5..43829e869d4 100644 --- a/sys/ufs/ffs/ffs_snapshot.c +++ b/sys/ufs/ffs/ffs_snapshot.c @@ -2502,7 +2502,7 @@ readblock(vp, bp, lbn) struct buf *bp; ufs2_daddr_t lbn; { - struct inode *ip = VTOI(vp); + struct inode *ip; struct bio *bip; struct fs *fs; From 92f19df431e2135b6750f26aa7a4523119e3cf89 Mon Sep 17 00:00:00 2001 From: Alexander Kabaev Date: Sat, 23 Dec 2017 16:45:24 +0000 Subject: [PATCH 105/115] Do not pass NULL pointer to copyout in if_clone_list. Sometimes caller is only interested in how many clones are there and NULL pointer is passed for the destination buffer. Do not pass it to copyout then. --- sys/net/if_clone.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/net/if_clone.c b/sys/net/if_clone.c index 23381267650..8a227a527d8 100644 --- a/sys/net/if_clone.c +++ b/sys/net/if_clone.c @@ -508,7 +508,7 @@ if_clone_list(struct if_clonereq *ifcr) done: IF_CLONERS_UNLOCK(); - if (err == 0) + if (err == 0 && dst != NULL) err = copyout(outbuf, dst, buf_count*IFNAMSIZ); if (outbuf != NULL) free(outbuf, M_CLONE); From bf51c9665d3e8890166e77b29acd06841cc2baae Mon Sep 17 00:00:00 2001 From: Alexander Kabaev Date: Sat, 23 Dec 2017 16:45:26 +0000 Subject: [PATCH 106/115] Silence clang analyzer false positive. clang does not know that two lookup calls will return the same pointer, so it assumes correctly that using the old pointer after dropping the reference to it is a bit risky. --- sys/netinet6/nd6_nbr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index 0ac90638aa3..6766e1b8000 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -1307,7 +1307,8 @@ nd6_dad_stop(struct ifaddr *ifa) * we were waiting for it to stop, so re-do the lookup. */ nd6_dad_rele(dp); - if (nd6_dad_find(ifa, NULL) == NULL) + dp = nd6_dad_find(ifa, NULL); + if (dp == NULL) return; nd6_dad_del(dp); From 4daa09f3430cae306ced07bc337098cbd96b2ef1 Mon Sep 17 00:00:00 2001 From: Alexander Kabaev Date: Sat, 23 Dec 2017 16:49:57 +0000 Subject: [PATCH 107/115] Remove dead store to local variable. --- sys/kern/kern_shutdown.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index ad080ad07f7..d51a4c5aa76 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -536,7 +536,6 @@ shutdown_halt(void *junk, int howto) cpu_halt(); /* NOTREACHED */ default: - howto &= ~RB_HALT; break; } } From 6d41588b6b96171640f07e9805193623636ac11b Mon Sep 17 00:00:00 2001 From: Alexander Kabaev Date: Sat, 23 Dec 2017 17:55:19 +0000 Subject: [PATCH 108/115] Reverse the check to allocate the buffer if cached pointer is NULL. Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D13596 --- sys/kern/vfs_export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c index 2f67c054f5f..3ce2ea4ea0e 100644 --- a/sys/kern/vfs_export.c +++ b/sys/kern/vfs_export.c @@ -412,7 +412,7 @@ vfs_setpublicfs(struct mount *mp, struct netexport *nep, * If an indexfile was specified, pull it in. */ if (argp->ex_indexfile != NULL) { - if (nfs_pub.np_index != NULL) + if (nfs_pub.np_index == NULL) nfs_pub.np_index = malloc(MAXNAMLEN + 1, M_TEMP, M_WAITOK); error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, From 16545cf5d5f51cb67c03923cbb9c75a024bdaca8 Mon Sep 17 00:00:00 2001 From: Mariusz Zaborski Date: Sat, 23 Dec 2017 18:07:43 +0000 Subject: [PATCH 109/115] Introduce the daemonfd function. The daemonfd function is equivalent to the daemon(3) function expect that arguments are descriptors. For example dhclient(8) which is sandboxed is unable to open /dev/null to close stdio instead it's allows to fail daemon(3) function to close the descriptors and then do it explicit in code. Instead of such hacks we can use now daemonfd. This API can be also helpful to migrate system to platforms like CheriBSD. Reviewed by: brooks@, bcr@, jilles@ (earlier version) Differential Revision: https://reviews.freebsd.org/D13433 --- include/stdlib.h | 1 + lib/libc/gen/Symbol.map | 1 + lib/libc/gen/daemon.3 | 34 +++++++++++++++++++++++++++-- lib/libc/gen/daemon.c | 48 ++++++++++++++++++++++++++++++----------- 4 files changed, 70 insertions(+), 14 deletions(-) diff --git a/include/stdlib.h b/include/stdlib.h index 93c4bb50cd7..56bb1566d58 100644 --- a/include/stdlib.h +++ b/include/stdlib.h @@ -274,6 +274,7 @@ int cgetstr(char *, const char *, char **); int cgetustr(char *, const char *, char **); int daemon(int, int); +int daemonfd(int, int); char *devname(__dev_t, __mode_t); char *devname_r(__dev_t, __mode_t, char *, int); char *fdevname(int); diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map index d11815c05fa..44fb85a4fab 100644 --- a/lib/libc/gen/Symbol.map +++ b/lib/libc/gen/Symbol.map @@ -394,6 +394,7 @@ FBSD_1.4 { FBSD_1.5 { alphasort; basename; + daemonfd; devname; devname_r; dirname; diff --git a/lib/libc/gen/daemon.3 b/lib/libc/gen/daemon.3 index cced3ceabb2..7d95dd8c2a2 100644 --- a/lib/libc/gen/daemon.3 +++ b/lib/libc/gen/daemon.3 @@ -28,7 +28,7 @@ .\" @(#)daemon.3 8.1 (Berkeley) 6/9/93 .\" $FreeBSD$ .\" -.Dd June 9, 1993 +.Dd December 23, 2017 .Dt DAEMON 3 .Os .Sh NAME @@ -40,6 +40,8 @@ .In stdlib.h .Ft int .Fn daemon "int nochdir" "int noclose" +.Ft int +.Fn daemonfd "int chdirfd" "int nullfd" .Sh DESCRIPTION The .Fn daemon @@ -59,15 +61,39 @@ is non-zero, .Fn daemon will redirect standard input, standard output, and standard error to .Pa /dev/null . +.Pp +The +.Fn daemonfd +function is equivalent to the +.Fn daemon +function except that arguments are the descriptors for the current working +directory and to the descriptor to +.Pa /dev/null . +.Pp +If +.Fa chdirfd +is equal to +.Pq -1 +the current working directory is not changed. +.Pp +If +.Fa nullfd +is equals to +.Pq -1 +the redirection of standard input, standard output, and standard error is not +closed. .Sh RETURN VALUES -.Rv -std daemon +.Rv -std daemon daemonfd .Sh ERRORS The .Fn daemon +and +.Fn daemonfd function may fail and set .Va errno for any of the errors specified for the library functions .Xr fork 2 +.Xr open 2, and .Xr setsid 2 . .Sh SEE ALSO @@ -79,6 +105,10 @@ The .Fn daemon function first appeared in .Bx 4.4 . +The +.Fn daemonfd +function first appeared in +.Fx 12.0 . .Sh CAVEATS Unless the .Fa noclose diff --git a/lib/libc/gen/daemon.c b/lib/libc/gen/daemon.c index b105e7519b5..467f0027700 100644 --- a/lib/libc/gen/daemon.c +++ b/lib/libc/gen/daemon.c @@ -1,8 +1,9 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. + * Copyright (c) 1990, 1993 The Regents of the University of California. + * Copyright (c) 2017 Mariusz Zaborski + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -46,10 +47,9 @@ __FBSDID("$FreeBSD$"); #include "libc_private.h" int -daemon(int nochdir, int noclose) +daemonfd(int chdirfd, int nullfd) { struct sigaction osa, sa; - int fd; pid_t newgrp; int oerrno; int osa_ok; @@ -83,15 +83,39 @@ daemon(int nochdir, int noclose) return (-1); } - if (!nochdir) - (void)chdir("/"); + if (chdirfd != -1) + (void)fchdir(chdirfd); - if (!noclose && (fd = _open(_PATH_DEVNULL, O_RDWR, 0)) != -1) { - (void)_dup2(fd, STDIN_FILENO); - (void)_dup2(fd, STDOUT_FILENO); - (void)_dup2(fd, STDERR_FILENO); - if (fd > 2) - (void)_close(fd); + if (nullfd != -1) { + (void)_dup2(nullfd, STDIN_FILENO); + (void)_dup2(nullfd, STDOUT_FILENO); + (void)_dup2(nullfd, STDERR_FILENO); } return (0); } + +int +daemon(int nochdir, int noclose) +{ + int chdirfd, nullfd, ret; + + if (!noclose) + nullfd = _open(_PATH_DEVNULL, O_RDWR, 0); + else + nullfd = -1; + + if (!nochdir) + chdirfd = _open("/", O_EXEC); + else + chdirfd = -1; + + ret = daemonfd(chdirfd, nullfd); + + if (chdirfd != -1) + _close(chdirfd); + + if (nullfd > 2) + _close(nullfd); + + return (ret); +} From 4f8efc2274abf89a2103658c24d601f7deed477c Mon Sep 17 00:00:00 2001 From: Eitan Adler Date: Sat, 23 Dec 2017 19:48:57 +0000 Subject: [PATCH 110/115] ldd: avoid statically linked executables in example The example works but spews warnings if run over a directory with statically linked binaries. PR: 211024 Submitted by: mike@skew.org --- usr.bin/ldd/ldd.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usr.bin/ldd/ldd.1 b/usr.bin/ldd/ldd.1 index 483825bfdd5..5a06515ebd8 100644 --- a/usr.bin/ldd/ldd.1 +++ b/usr.bin/ldd/ldd.1 @@ -63,7 +63,7 @@ The following is an example of a shell pipeline which uses the option. It will print a report of all ELF binaries in the current directory, which link against libc.so.6: -.Dl "find . -type f | xargs -n1 file -F ' ' | grep ELF | cut -f1 -d' ' | xargs ldd -f '%A %o\en' | grep libc.so.6" +.Dl "find . -type f | xargs -n1 file -F ' ' | grep 'ELF.*dynamically' | cut -f1 -d' ' | xargs ldd -f '%A %o\en' | grep libc.so.6" .Sh SEE ALSO .Xr ld 1 , .Xr nm 1 , From a92970d8aead74fb219bbf8387c373e89b2bbd9b Mon Sep 17 00:00:00 2001 From: Eitan Adler Date: Sat, 23 Dec 2017 21:04:32 +0000 Subject: [PATCH 111/115] calendar: add missing header file time.h is required for strftime and struct tm Reviewed by: edje --- usr.bin/calendar/io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/usr.bin/calendar/io.c b/usr.bin/calendar/io.c index 19b7c7dd334..0b894f0b894 100644 --- a/usr.bin/calendar/io.c +++ b/usr.bin/calendar/io.c @@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include "pathnames.h" From 6332b148872675ae3907d51c0e3015067624bb37 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sat, 23 Dec 2017 21:32:50 +0000 Subject: [PATCH 112/115] Add missed AVX512VL (128 and 256 bit vector length) extension identification bit. Sponsored by: The FreeBSD Foundation MFC after: 3 days --- sys/x86/include/specialreg.h | 1 + sys/x86/x86/identcpu.c | 1 + 2 files changed, 2 insertions(+) diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h index 511056a362b..0fa472f948a 100644 --- a/sys/x86/include/specialreg.h +++ b/sys/x86/include/specialreg.h @@ -409,6 +409,7 @@ #define CPUID_STDEXT_AVX512CD 0x10000000 #define CPUID_STDEXT_SHA 0x20000000 #define CPUID_STDEXT_AVX512BW 0x40000000 +#define CPUID_STDEXT_AVX512VL 0x80000000 /* * CPUID instruction 7 Structured Extended Features, leaf 0 ecx info diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c index 6a29ce26528..8d4da42c83a 100644 --- a/sys/x86/x86/identcpu.c +++ b/sys/x86/x86/identcpu.c @@ -963,6 +963,7 @@ printcpuinfo(void) "\035AVX512CD" "\036SHA" "\037AVX512BW" + "\040AVX512VL" ); } From 7dcb3b12959a17fbb62e5fa18fb3cf2be4b403b3 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Sat, 23 Dec 2017 22:57:14 +0000 Subject: [PATCH 113/115] Warn when nonPNP ISA devices are attached in GENERIC that they are being removed from GENERIC in 12. Always print PNP info for ISA when it exists: it doesn't depend on ISAPNP. Add PNP ID to orm and vga to prevent us from warning about them since those devices aren't being removed from GENERIC. PNP devices will be removed from GENERIC too, but they will be automatically loaded, so need no warning. We don't warn for non-GENERIC kernels because people running them are presumed to know what they are doing. MFC After: 2 weeks --- sys/isa/isa_common.c | 34 ++++++++++++++++++++++++++++------ sys/isa/pnp.c | 20 -------------------- sys/isa/vga_isa.c | 1 + sys/x86/isa/orm.c | 1 + 4 files changed, 30 insertions(+), 26 deletions(-) diff --git a/sys/isa/isa_common.c b/sys/isa/isa_common.c index f2bd9c36f4b..c95fe46655c 100644 --- a/sys/isa/isa_common.c +++ b/sys/isa/isa_common.c @@ -68,10 +68,12 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include #include +#include #include @@ -499,7 +501,7 @@ isa_probe_children(device_t dev) struct isa_device *idev; device_t *children, child; struct isa_config *cfg; - int nchildren, i; + int nchildren, i, err; /* * Create all the non-hinted children by calling drivers' @@ -569,7 +571,11 @@ isa_probe_children(device_t dev) !TAILQ_EMPTY(&idev->id_configs)) continue; - device_probe_and_attach(child); + err = device_probe_and_attach(child); + if (err == 0 && idev->id_vendorid == 0 && + strcmp(kern_ident, "GENERIC") == 0) + device_printf(child, + "non-PNP ISA device will be removed from GENERIC in FreeBSD 12."); } /* @@ -637,10 +643,8 @@ isa_print_all_resources(device_t dev) retval += resource_list_print_type(rl, "drq", SYS_RES_DRQ, "%jd"); if (device_get_flags(dev)) retval += printf(" flags %#x", device_get_flags(dev)); -#ifdef ISAPNP if (idev->id_vendorid) retval += printf(" pnpid %s", pnp_eisaformat(idev->id_vendorid)); -#endif return (retval); } @@ -1030,13 +1034,11 @@ static int isa_child_pnpinfo_str(device_t bus, device_t child, char *buf, size_t buflen) { -#ifdef ISAPNP struct isa_device *idev = DEVTOISA(child); if (idev->id_vendorid) snprintf(buf, buflen, "pnpid=%s", pnp_eisaformat(idev->id_vendorid)); -#endif return (0); } @@ -1125,3 +1127,23 @@ isab_attach(device_t dev) return (bus_generic_attach(dev)); return (ENXIO); } + +char * +pnp_eisaformat(uint32_t id) +{ + uint8_t *data; + static char idbuf[8]; + const char hextoascii[] = "0123456789abcdef"; + + id = htole32(id); + data = (uint8_t *)&id; + idbuf[0] = '@' + ((data[0] & 0x7c) >> 2); + idbuf[1] = '@' + (((data[0] & 0x3) << 3) + ((data[1] & 0xe0) >> 5)); + idbuf[2] = '@' + (data[1] & 0x1f); + idbuf[3] = hextoascii[(data[2] >> 4)]; + idbuf[4] = hextoascii[(data[2] & 0xf)]; + idbuf[5] = hextoascii[(data[3] >> 4)]; + idbuf[6] = hextoascii[(data[3] & 0xf)]; + idbuf[7] = 0; + return(idbuf); +} diff --git a/sys/isa/pnp.c b/sys/isa/pnp.c index 99b2edc0612..c2c66e08c20 100644 --- a/sys/isa/pnp.c +++ b/sys/isa/pnp.c @@ -103,26 +103,6 @@ static void pnp_send_initiation_key(void); static int pnp_get_serial(pnp_id *p); static int pnp_isolation_protocol(device_t parent); -char * -pnp_eisaformat(uint32_t id) -{ - uint8_t *data; - static char idbuf[8]; - const char hextoascii[] = "0123456789abcdef"; - - id = htole32(id); - data = (uint8_t *)&id; - idbuf[0] = '@' + ((data[0] & 0x7c) >> 2); - idbuf[1] = '@' + (((data[0] & 0x3) << 3) + ((data[1] & 0xe0) >> 5)); - idbuf[2] = '@' + (data[1] & 0x1f); - idbuf[3] = hextoascii[(data[2] >> 4)]; - idbuf[4] = hextoascii[(data[2] & 0xf)]; - idbuf[5] = hextoascii[(data[3] >> 4)]; - idbuf[6] = hextoascii[(data[3] & 0xf)]; - idbuf[7] = 0; - return(idbuf); -} - static void pnp_write(int d, u_char r) { diff --git a/sys/isa/vga_isa.c b/sys/isa/vga_isa.c index 524f927ba76..b2e32a6c6c9 100644 --- a/sys/isa/vga_isa.c +++ b/sys/isa/vga_isa.c @@ -175,6 +175,7 @@ isavga_probe(device_t dev) adp.va_io_base, adp.va_io_size); bus_set_resource(dev, SYS_RES_MEMORY, 0, adp.va_mem_base, adp.va_mem_size); + isa_set_vendorid(dev, PNP_EISAID("PNP0900")); #if 0 isa_set_port(dev, adp.va_io_base); isa_set_portsize(dev, adp.va_io_size); diff --git a/sys/x86/isa/orm.c b/sys/x86/isa/orm.c index 6caee883885..14bfdc42f26 100644 --- a/sys/x86/isa/orm.c +++ b/sys/x86/isa/orm.c @@ -156,6 +156,7 @@ orm_identify(driver_t* driver, device_t parent) device_set_desc(child, "ISA Option ROM"); else device_set_desc(child, "ISA Option ROMs"); + isa_set_vendorid(child, PNP_EISAID("PNP0C80")); } static int From 6986f58f53b6b5149a0df2dc5faea0becbb1ab40 Mon Sep 17 00:00:00 2001 From: Jilles Tjoelker Date: Sat, 23 Dec 2017 22:58:19 +0000 Subject: [PATCH 114/115] sh(1): Markup and spelling fixes --- bin/sh/sh.1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/sh/sh.1 b/bin/sh/sh.1 index 98ec2f2c219..d2ec8c41536 100644 --- a/bin/sh/sh.1 +++ b/bin/sh/sh.1 @@ -343,7 +343,7 @@ Write each command variable subjected to parameter expansion and arithmetic expansion) to standard error before it is executed. Useful for debugging. -.It nolog +.It Li nolog Another do-nothing option for .Tn POSIX compliance. @@ -2739,7 +2739,7 @@ were a known job that exited with exit status 127. If no operands are given, wait for all jobs to complete and return an exit status of zero. .El -.Ss Commandline Editing +.Ss Command Line Editing When .Nm is being used interactively from a terminal, the current command From c690824a91685ce4ccdced8df80349b6a9994f81 Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Sun, 24 Dec 2017 01:55:12 +0000 Subject: [PATCH 115/115] Fix rule number truncation, use uint16_t type to specify rulenum. PR: 224555 MFC after: 1 week --- sbin/ipfw/ipfw2.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c index 82cb23a9a5c..57b0562978f 100644 --- a/sbin/ipfw/ipfw2.c +++ b/sbin/ipfw/ipfw2.c @@ -2256,12 +2256,13 @@ do_range_cmd(int cmd, ipfw_range_tlv *rt) void ipfw_sets_handler(char *av[]) { - uint32_t masks[2]; - int i; - uint8_t cmd, rulenum; ipfw_range_tlv rt; char *msg; size_t size; + uint32_t masks[2]; + int i; + uint16_t rulenum; + uint8_t cmd; av++; memset(&rt, 0, sizeof(rt));