diff --git a/sys/conf/files b/sys/conf/files index a6769759616..af6b14587fc 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -912,6 +912,8 @@ dev/eisa/eisa_if.m standard dev/eisa/eisaconf.c optional eisa dev/e1000/if_em.c optional em inet \ compile-with "${NORMAL_C} -I$S/dev/e1000" +dev/e1000/if_lem.c optional em inet \ + compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/if_igb.c optional igb inet \ compile-with "${NORMAL_C} -I$S/dev/e1000" dev/e1000/e1000_80003es2lan.c optional em | igb \ diff --git a/sys/dev/e1000/e1000_80003es2lan.c b/sys/dev/e1000/e1000_80003es2lan.c index db32f8cbe36..af32ee0f1fc 100644 --- a/sys/dev/e1000/e1000_80003es2lan.c +++ b/sys/dev/e1000/e1000_80003es2lan.c @@ -231,7 +231,9 @@ static s32 e1000_init_mac_params_80003es2lan(struct e1000_hw *hw) mac->rar_entry_count = E1000_RAR_ENTRIES; /* Set if part includes ASF firmware */ mac->asf_firmware_present = TRUE; - /* Set if manageability features are enabled. */ + /* FWSM register */ + mac->has_fwsm = TRUE; + /* ARC supported; valid only if manageability features are enabled. */ mac->arc_subsystem_valid = (E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK) ? TRUE : FALSE; diff --git a/sys/dev/e1000/e1000_82571.c b/sys/dev/e1000/e1000_82571.c index 96a3b2f5397..afeb1a05e5c 100644 --- a/sys/dev/e1000/e1000_82571.c +++ b/sys/dev/e1000/e1000_82571.c @@ -313,10 +313,6 @@ static s32 e1000_init_mac_params_82571(struct e1000_hw *hw) mac->rar_entry_count = E1000_RAR_ENTRIES; /* Set if part includes ASF firmware */ mac->asf_firmware_present = TRUE; - /* Set if manageability features are enabled. */ - mac->arc_subsystem_valid = - (E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK) - ? TRUE : FALSE; /* Adaptive IFS supported */ mac->adaptive_ifs = TRUE; @@ -357,6 +353,16 @@ static s32 e1000_init_mac_params_82571(struct e1000_hw *hw) mac->ops.set_lan_id = e1000_set_lan_id_single_port; mac->ops.check_mng_mode = e1000_check_mng_mode_generic; mac->ops.led_on = e1000_led_on_generic; + + /* FWSM register */ + mac->has_fwsm = TRUE; + /* + * ARC supported; valid only if manageability features are + * enabled. + */ + mac->arc_subsystem_valid = + (E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK) + ? TRUE : FALSE; break; case e1000_82574: case e1000_82583: @@ -367,6 +373,9 @@ static s32 e1000_init_mac_params_82571(struct e1000_hw *hw) default: mac->ops.check_mng_mode = e1000_check_mng_mode_generic; mac->ops.led_on = e1000_led_on_generic; + + /* FWSM register */ + mac->has_fwsm = TRUE; break; } @@ -1076,9 +1085,10 @@ static s32 e1000_init_hw_82571(struct e1000_hw *hw) /* ...for both queues. */ switch (mac->type) { case e1000_82573: + e1000_enable_tx_pkt_filtering_generic(hw); + /* fall through */ case e1000_82574: case e1000_82583: - e1000_enable_tx_pkt_filtering_generic(hw); reg_data = E1000_READ_REG(hw, E1000_GCR); reg_data |= E1000_GCR_L1_ACT_WITHOUT_L0S_RX; E1000_WRITE_REG(hw, E1000_GCR, reg_data); @@ -1364,7 +1374,7 @@ static s32 e1000_setup_link_82571(struct e1000_hw *hw) static s32 e1000_setup_copper_link_82571(struct e1000_hw *hw) { u32 ctrl; - s32 ret_val; + s32 ret_val; DEBUGFUNC("e1000_setup_copper_link_82571"); diff --git a/sys/dev/e1000/e1000_82575.c b/sys/dev/e1000/e1000_82575.c index 4227556d503..65b3ce47a41 100644 --- a/sys/dev/e1000/e1000_82575.c +++ b/sys/dev/e1000/e1000_82575.c @@ -289,7 +289,9 @@ static s32 e1000_init_mac_params_82575(struct e1000_hw *hw) mac->rar_entry_count = E1000_RAR_ENTRIES_82580; /* Set if part includes ASF firmware */ mac->asf_firmware_present = TRUE; - /* Set if manageability features are enabled. */ + /* FWSM register */ + mac->has_fwsm = TRUE; + /* ARC supported; valid only if manageability features are enabled. */ mac->arc_subsystem_valid = (E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK) ? TRUE : FALSE; @@ -1435,13 +1437,12 @@ static void e1000_config_collision_dist_82575(struct e1000_hw *hw) static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; - struct e1000_mac_info *mac = &hw->mac; if (!(phy->ops.check_reset_block)) return; /* If the management interface is not enabled, then power down */ - if (!(mac->ops.check_mng_mode(hw) || phy->ops.check_reset_block(hw))) + if (!(e1000_enable_mng_pass_thru(hw) || phy->ops.check_reset_block(hw))) e1000_power_down_phy_copper(hw); return; @@ -1646,14 +1647,23 @@ out: **/ void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable) { - u32 dtxswc = E1000_READ_REG(hw, E1000_DTXSWC); + u32 dtxswc; + + switch (hw->mac.type) { + case e1000_82576: + dtxswc = E1000_READ_REG(hw, E1000_DTXSWC); + if (enable) + dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN; + else + dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN; + E1000_WRITE_REG(hw, E1000_DTXSWC, dtxswc); + break; + default: + /* Currently no other hardware supports loopback */ + break; + } - if (enable) - dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN; - else - dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN; - E1000_WRITE_REG(hw, E1000_DTXSWC, dtxswc); } /** diff --git a/sys/dev/e1000/e1000_82575.h b/sys/dev/e1000/e1000_82575.h index 582f4a4058d..1fc7e26dd59 100644 --- a/sys/dev/e1000/e1000_82575.h +++ b/sys/dev/e1000/e1000_82575.h @@ -49,8 +49,8 @@ * For 82576, there are an additional set of RARs that begin at an offset * separate from the first set of RARs. */ -#define E1000_RAR_ENTRIES_82575 16 -#define E1000_RAR_ENTRIES_82576 24 +#define E1000_RAR_ENTRIES_82575 16 +#define E1000_RAR_ENTRIES_82576 24 #define E1000_RAR_ENTRIES_82580 24 #define E1000_SW_SYNCH_MB 0x00000100 #define E1000_STAT_DEV_RST_SET 0x00100000 @@ -425,6 +425,7 @@ struct e1000_adv_tx_context_desc { #define E1000_VMOLR_STRVLAN 0x40000000 /* Vlan stripping enable */ #define E1000_VMOLR_STRCRC 0x80000000 /* CRC stripping enable */ + #define E1000_VLVF_ARRAY_SIZE 32 #define E1000_VLVF_VLANID_MASK 0x00000FFF #define E1000_VLVF_POOLSEL_SHIFT 12 diff --git a/sys/dev/e1000/e1000_defines.h b/sys/dev/e1000/e1000_defines.h index 4c80ca03702..7ac5d8b8b1d 100644 --- a/sys/dev/e1000/e1000_defines.h +++ b/sys/dev/e1000/e1000_defines.h @@ -314,6 +314,11 @@ #define E1000_MANC_SMB_DATA_OUT_SHIFT 28 /* SMBus Data Out Shift */ #define E1000_MANC_SMB_CLK_OUT_SHIFT 29 /* SMBus Clock Out Shift */ +#define E1000_MANC2H_PORT_623 0x00000020 /* Port 0x26f */ +#define E1000_MANC2H_PORT_664 0x00000040 /* Port 0x298 */ +#define E1000_MDEF_PORT_623 0x00000800 /* Port 0x26f */ +#define E1000_MDEF_PORT_664 0x00000400 /* Port 0x298 */ + /* Receive Control */ #define E1000_RCTL_RST 0x00000001 /* Software reset */ #define E1000_RCTL_EN 0x00000002 /* enable */ @@ -418,6 +423,8 @@ * PHYRST_N pin */ #define E1000_CTRL_EXT_LINK_EN 0x00010000 /* enable link status from external * LINK_0 and LINK_1 pins */ +#define E1000_CTRL_LANPHYPC_OVERRIDE 0x00010000 /* SW control of LANPHYPC */ +#define E1000_CTRL_LANPHYPC_VALUE 0x00020000 /* SW value of LANPHYPC */ #define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ #define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ #define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */ @@ -953,6 +960,8 @@ #define E1000_EICS_OTHER E1000_EICR_OTHER /* Interrupt Cause Active */ #define E1000_EITR_ITR_INT_MASK 0x0000FFFF +/* E1000_EITR_CNT_IGNR is only for 82576 and newer */ +#define E1000_EITR_CNT_IGNR 0x80000000 /* Don't reset counters on write */ /* Transmit Descriptor Control */ #define E1000_TXDCTL_PTHRESH 0x0000003F /* TXDCTL Prefetch Threshold */ @@ -1380,6 +1389,9 @@ #define PCI_HEADER_TYPE_MULTIFUNC 0x80 #define PCIE_LINK_WIDTH_MASK 0x3F0 #define PCIE_LINK_WIDTH_SHIFT 4 +#define PCIE_LINK_SPEED_MASK 0x0F +#define PCIE_LINK_SPEED_2500 0x01 +#define PCIE_LINK_SPEED_5000 0x02 #define PCIE_DEVICE_CONTROL2_16ms 0x0005 #ifndef ETH_ADDR_LEN @@ -1410,7 +1422,7 @@ #define BME1000_E_PHY_ID_R2 0x01410CB1 #define I82577_E_PHY_ID 0x01540050 #define I82578_E_PHY_ID 0x004DD040 -#define I82580_I_PHY_ID 0x015403A0 +#define I82580_I_PHY_ID 0x015403A0 #define IGP04E1000_E_PHY_ID 0x02A80391 #define M88_VENDOR 0x0141 @@ -1507,6 +1519,7 @@ #define M88E1000_EPSCR_TX_CLK_25 0x0070 /* 25 MHz TX_CLK */ #define M88E1000_EPSCR_TX_CLK_0 0x0000 /* NO TX_CLK */ + /* M88EC018 Rev 2 specific DownShift settings */ #define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK 0x0E00 #define M88EC018_EPSCR_DOWNSHIFT_COUNTER_1X 0x0000 diff --git a/sys/dev/e1000/e1000_hw.h b/sys/dev/e1000/e1000_hw.h index fd30173c646..ce5dffdd67d 100644 --- a/sys/dev/e1000/e1000_hw.h +++ b/sys/dev/e1000/e1000_hw.h @@ -122,6 +122,7 @@ struct e1000_hw; #define E1000_DEV_ID_ICH10_R_BM_V 0x10CE #define E1000_DEV_ID_ICH10_D_BM_LM 0x10DE #define E1000_DEV_ID_ICH10_D_BM_LF 0x10DF + #define E1000_DEV_ID_PCH_M_HV_LM 0x10EA #define E1000_DEV_ID_PCH_M_HV_LC 0x10EB #define E1000_DEV_ID_PCH_D_HV_DM 0x10EF @@ -682,6 +683,7 @@ struct e1000_mac_info { u8 forced_speed_duplex; bool adaptive_ifs; + bool has_fwsm; bool arc_subsystem_valid; bool asf_firmware_present; bool autoneg; diff --git a/sys/dev/e1000/e1000_ich8lan.c b/sys/dev/e1000/e1000_ich8lan.c index c40085fcfd2..6b3c25d8218 100644 --- a/sys/dev/e1000/e1000_ich8lan.c +++ b/sys/dev/e1000/e1000_ich8lan.c @@ -177,6 +177,7 @@ union ich8_hws_flash_regacc { static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; + u32 ctrl; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_init_phy_params_pchlan"); @@ -199,6 +200,35 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) phy->ops.power_down = e1000_power_down_phy_copper_ich8lan; phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; + if ((hw->mac.type == e1000_pchlan) && + (!(E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID))) { + + /* + * The MAC-PHY interconnect may still be in SMBus mode + * after Sx->S0. Toggle the LANPHYPC Value bit to force + * the interconnect to PCIe mode, but only if there is no + * firmware present otherwise firmware will have done it. + */ + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl |= E1000_CTRL_LANPHYPC_OVERRIDE; + ctrl &= ~E1000_CTRL_LANPHYPC_VALUE; + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + usec_delay(10); + ctrl &= ~E1000_CTRL_LANPHYPC_OVERRIDE; + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + msec_delay(50); + } + + /* + * Reset the PHY before any acccess to it. Doing so, ensures that + * the PHY is in a known good state before we read/write PHY registers. + * The generic reset is sufficient here, because we haven't determined + * the PHY type yet. + */ + ret_val = e1000_phy_hw_reset_generic(hw); + if (ret_val) + goto out; + phy->id = e1000_phy_unknown; ret_val = e1000_get_phy_id(hw); if (ret_val) @@ -225,6 +255,7 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) phy->ops.get_cable_length = e1000_get_cable_length_82577; phy->ops.get_info = e1000_get_phy_info_82577; phy->ops.commit = e1000_phy_sw_reset_generic; + break; case e1000_phy_82578: phy->ops.check_polarity = e1000_check_polarity_m88; phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; @@ -431,8 +462,10 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) mac->rar_entry_count--; /* Set if part includes ASF firmware */ mac->asf_firmware_present = TRUE; - /* Set if manageability features are enabled. */ - mac->arc_subsystem_valid = TRUE; + /* FWSM register */ + mac->has_fwsm = TRUE; + /* ARC subsystem not supported */ + mac->arc_subsystem_valid = FALSE; /* Adaptive IFS supported */ mac->adaptive_ifs = TRUE; @@ -764,6 +797,9 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw) DEBUGFUNC("e1000_check_reset_block_ich8lan"); + if (hw->phy.reset_disable) + return E1000_BLK_PHY_RESET; + fwsm = E1000_READ_REG(hw, E1000_FWSM); return (fwsm & E1000_ICH_FWSM_RSPCIPHY) ? E1000_SUCCESS @@ -2684,6 +2720,7 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) DEBUGOUT("Auto Read Done did not complete\n"); } } + /* Dummy read to clear the phy wakeup bit after lcd reset */ if (hw->mac.type == e1000_pchlan) hw->phy.ops.read_reg(hw, BM_WUC, ®); @@ -2857,6 +2894,14 @@ static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw) E1000_WRITE_REG(hw, E1000_STATUS, reg); } + /* + * work-around descriptor data corruption issue during nfs v2 udp + * traffic, just disable the nfs filtering capability + */ + reg = E1000_READ_REG(hw, E1000_RFCTL); + reg |= (E1000_RFCTL_NFSW_DIS | E1000_RFCTL_NFSR_DIS); + E1000_WRITE_REG(hw, E1000_RFCTL, reg); + return; } diff --git a/sys/dev/e1000/e1000_ich8lan.h b/sys/dev/e1000/e1000_ich8lan.h index cc8ba16cb2a..f26b7b9f70a 100644 --- a/sys/dev/e1000/e1000_ich8lan.h +++ b/sys/dev/e1000/e1000_ich8lan.h @@ -167,6 +167,9 @@ #define HV_KMRN_MODE_CTRL PHY_REG(769, 16) #define HV_KMRN_MDIO_SLOW 0x0400 +/* PHY Power Management Control */ +#define HV_PM_CTRL PHY_REG(770, 17) + #define SW_FLAG_TIMEOUT 1000 /* SW Semaphore flag timeout in milliseconds */ /* @@ -192,7 +195,6 @@ #define E1000_RXDEXT_LINKSEC_ERROR_REPLAY_ERROR 0x40000000 #define E1000_RXDEXT_LINKSEC_ERROR_BAD_SIG 0x60000000 - void e1000_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw, bool state); void e1000_igp3_phy_powerdown_workaround_ich8lan(struct e1000_hw *hw); diff --git a/sys/dev/e1000/e1000_mac.c b/sys/dev/e1000/e1000_mac.c index 3c525c1f1f1..d4d2bec27bd 100644 --- a/sys/dev/e1000/e1000_mac.c +++ b/sys/dev/e1000/e1000_mac.c @@ -225,17 +225,30 @@ s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw) DEBUGFUNC("e1000_get_bus_info_pcie_generic"); bus->type = e1000_bus_type_pci_express; - bus->speed = e1000_bus_speed_2500; ret_val = e1000_read_pcie_cap_reg(hw, PCIE_LINK_STATUS, &pcie_link_status); - if (ret_val) + if (ret_val) { bus->width = e1000_bus_width_unknown; - else + bus->speed = e1000_bus_speed_unknown; + } else { + switch (pcie_link_status & PCIE_LINK_SPEED_MASK) { + case PCIE_LINK_SPEED_2500: + bus->speed = e1000_bus_speed_2500; + break; + case PCIE_LINK_SPEED_5000: + bus->speed = e1000_bus_speed_5000; + break; + default: + bus->speed = e1000_bus_speed_unknown; + break; + } + bus->width = (enum e1000_bus_width)((pcie_link_status & PCIE_LINK_WIDTH_MASK) >> PCIE_LINK_WIDTH_SHIFT); + } mac->ops.set_lan_id(hw); diff --git a/sys/dev/e1000/e1000_manage.c b/sys/dev/e1000/e1000_manage.c index 2cd85b3a9e8..0b295f8d384 100644 --- a/sys/dev/e1000/e1000_manage.c +++ b/sys/dev/e1000/e1000_manage.c @@ -78,6 +78,12 @@ s32 e1000_mng_enable_host_if_generic(struct e1000_hw *hw) DEBUGFUNC("e1000_mng_enable_host_if_generic"); + if (!(hw->mac.arc_subsystem_valid)) { + DEBUGOUT("ARC subsystem not valid.\n"); + ret_val = -E1000_ERR_HOST_INTERFACE_COMMAND; + goto out; + } + /* Check that the host interface is enabled. */ hicr = E1000_READ_REG(hw, E1000_HICR); if ((hicr & E1000_HICR_EN) == 0) { @@ -365,7 +371,7 @@ bool e1000_enable_mng_pass_thru(struct e1000_hw *hw) if (!(manc & E1000_MANC_RCV_TCO_EN)) goto out; - if (hw->mac.arc_subsystem_valid) { + if (hw->mac.has_fwsm) { fwsm = E1000_READ_REG(hw, E1000_FWSM); factps = E1000_READ_REG(hw, E1000_FACTPS); @@ -375,12 +381,23 @@ bool e1000_enable_mng_pass_thru(struct e1000_hw *hw) ret_val = TRUE; goto out; } - } else { - if ((manc & E1000_MANC_SMBUS_EN) && - !(manc & E1000_MANC_ASF_EN)) { + } else if ((hw->mac.type == e1000_82574) || + (hw->mac.type == e1000_82583)) { + u16 data; + + factps = E1000_READ_REG(hw, E1000_FACTPS); + e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &data); + + if (!(factps & E1000_FACTPS_MNGCG) && + ((data & E1000_NVM_INIT_CTRL2_MNGM) == + (e1000_mng_mode_pt << 13))) { ret_val = TRUE; goto out; } + } else if ((manc & E1000_MANC_SMBUS_EN) && + !(manc & E1000_MANC_ASF_EN)) { + ret_val = TRUE; + goto out; } out: diff --git a/sys/dev/e1000/e1000_phy.c b/sys/dev/e1000/e1000_phy.c index dbc422a53f4..24ca36f6ebb 100644 --- a/sys/dev/e1000/e1000_phy.c +++ b/sys/dev/e1000/e1000_phy.c @@ -3402,9 +3402,7 @@ s32 e1000_check_polarity_82577(struct e1000_hw *hw) * e1000_phy_force_speed_duplex_82577 - Force speed/duplex for I82577 PHY * @hw: pointer to the HW structure * - * Calls the PHY setup function to force speed and duplex. Clears the - * auto-crossover to force MDI manually. Waits for link and returns - * successful if link up is successful, else -E1000_ERR_PHY (-2). + * Calls the PHY setup function to force speed and duplex. **/ s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw) { @@ -3425,23 +3423,6 @@ s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw) if (ret_val) goto out; - /* - * Clear Auto-Crossover to force MDI manually. 82577 requires MDI - * forced whenever speed and duplex are forced. - */ - ret_val = phy->ops.read_reg(hw, I82577_PHY_CTRL_2, &phy_data); - if (ret_val) - goto out; - - phy_data &= ~I82577_PHY_CTRL2_AUTO_MDIX; - phy_data &= ~I82577_PHY_CTRL2_FORCE_MDI_MDIX; - - ret_val = phy->ops.write_reg(hw, I82577_PHY_CTRL_2, phy_data); - if (ret_val) - goto out; - - DEBUGOUT1("I82577_PHY_CTRL_2: %X\n", phy_data); - usec_delay(1); if (phy->autoneg_wait_to_complete) { diff --git a/sys/dev/e1000/e1000_regs.h b/sys/dev/e1000/e1000_regs.h index 56418a6b493..b2a477e8234 100644 --- a/sys/dev/e1000/e1000_regs.h +++ b/sys/dev/e1000/e1000_regs.h @@ -65,7 +65,7 @@ #define E1000_IAM 0x000E0 /* Interrupt Acknowledge Auto Mask */ #define E1000_IVAR 0x000E4 /* Interrupt Vector Allocation Register - RW */ #define E1000_SVCR 0x000F0 -#define E1000_SVT 0x000F4 +#define E1000_SVT 0x000F4 #define E1000_RCTL 0x00100 /* Rx Control - RW */ #define E1000_FCTTV 0x00170 /* Flow Control Transmit Timer Value - RW */ #define E1000_TXCW 0x00178 /* Tx Configuration Word - RW */ @@ -282,6 +282,17 @@ #define E1000_ICRXOC 0x04124 /* Interrupt Cause Receiver Overrun Count */ #define E1000_CRC_OFFSET 0x05F50 /* CRC Offset register */ +/* Virtualization statistical counters */ +#define E1000_PFVFGPRC(_n) (0x010010 + (0x100 * (_n))) +#define E1000_PFVFGPTC(_n) (0x010014 + (0x100 * (_n))) +#define E1000_PFVFGORC(_n) (0x010018 + (0x100 * (_n))) +#define E1000_PFVFGOTC(_n) (0x010034 + (0x100 * (_n))) +#define E1000_PFVFMPRC(_n) (0x010038 + (0x100 * (_n))) +#define E1000_PFVFGPRLBC(_n) (0x010040 + (0x100 * (_n))) +#define E1000_PFVFGPTLBC(_n) (0x010044 + (0x100 * (_n))) +#define E1000_PFVFGORLBC(_n) (0x010048 + (0x100 * (_n))) +#define E1000_PFVFGOTLBC(_n) (0x010050 + (0x100 * (_n))) + #define E1000_LSECTXUT 0x04300 /* LinkSec Tx Untagged Packet Count - OutPktsUntagged */ #define E1000_LSECTXPKTE 0x04304 /* LinkSec Encrypted Tx Packets Count - OutPktsEncrypted */ #define E1000_LSECTXPKTP 0x04308 /* LinkSec Protected Tx Packet Count - OutPktsProtected */ @@ -386,6 +397,7 @@ #define E1000_KMRNCTRLSTA 0x00034 /* MAC-PHY interface - RW */ #define E1000_MDPHYA 0x0003C /* PHY address - RW */ #define E1000_MANC2H 0x05860 /* Management Control To Host - RW */ +#define E1000_MDEF(_n) (0x05890 + (4 * (_n))) /* Mngmt Decision Filters */ #define E1000_SW_FW_SYNC 0x05B5C /* Software-Firmware Synchronization - RW */ #define E1000_CCMCTL 0x05B48 /* CCM Control Register */ #define E1000_GIOCTL 0x05B44 /* GIO Analog Control Register */ diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index b45e4784014..7e47adc8ed9 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -35,7 +35,6 @@ #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #include "opt_inet.h" -#include "opt_altq.h" #endif #include @@ -55,9 +54,7 @@ #include #include #include -#if __FreeBSD_version >= 700029 #include -#endif #include #include @@ -95,7 +92,7 @@ int em_display_debug_stats = 0; /********************************************************************* * Driver version: *********************************************************************/ -char em_driver_version[] = "6.9.25"; +char em_driver_version[] = "7.0.0"; /********************************************************************* @@ -111,51 +108,6 @@ char em_driver_version[] = "6.9.25"; static em_vendor_info_t em_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection */ - { 0x8086, E1000_DEV_ID_82540EM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82540EM_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82540EP, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82540EP_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82540EP_LP, PCI_ANY_ID, PCI_ANY_ID, 0}, - - { 0x8086, E1000_DEV_ID_82541EI, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82541ER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82541ER_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82541EI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82541GI, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82541GI_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82541GI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0}, - - { 0x8086, E1000_DEV_ID_82542, PCI_ANY_ID, PCI_ANY_ID, 0}, - - { 0x8086, E1000_DEV_ID_82543GC_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82543GC_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - - { 0x8086, E1000_DEV_ID_82544EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82544EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82544GC_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82544GC_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, - - { 0x8086, E1000_DEV_ID_82545EM_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82545EM_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82545GM_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82545GM_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82545GM_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, - - { 0x8086, E1000_DEV_ID_82546EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82546EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82546GB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82546GB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82546GB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82546GB_PCIE, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3, - PCI_ANY_ID, PCI_ANY_ID, 0}, - - { 0x8086, E1000_DEV_ID_82547EI, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82547EI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82547GI, PCI_ANY_ID, PCI_ANY_ID, 0}, - { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, @@ -238,10 +190,11 @@ static int em_shutdown(device_t); static int em_suspend(device_t); static int em_resume(device_t); static void em_start(struct ifnet *); -static void em_start_locked(struct ifnet *ifp); +static void em_start_locked(struct ifnet *, struct tx_ring *); #if __FreeBSD_version >= 800000 static int em_mq_start(struct ifnet *, struct mbuf *); -static int em_mq_start_locked(struct ifnet *, struct mbuf *); +static int em_mq_start_locked(struct ifnet *, + struct tx_ring *, struct mbuf *); static void em_qflush(struct ifnet *); #endif static int em_ioctl(struct ifnet *, u_long, caddr_t); @@ -252,55 +205,49 @@ static void em_media_status(struct ifnet *, struct ifmediareq *); static int em_media_change(struct ifnet *); static void em_identify_hardware(struct adapter *); static int em_allocate_pci_resources(struct adapter *); -static int em_allocate_legacy(struct adapter *adapter); -static int em_allocate_msix(struct adapter *adapter); +static int em_allocate_legacy(struct adapter *); +static int em_allocate_msix(struct adapter *); +static int em_allocate_queues(struct adapter *); static int em_setup_msix(struct adapter *); static void em_free_pci_resources(struct adapter *); static void em_local_timer(void *); -static int em_hardware_init(struct adapter *); +static void em_reset(struct adapter *); static void em_setup_interface(device_t, struct adapter *); + static void em_setup_transmit_structures(struct adapter *); static void em_initialize_transmit_unit(struct adapter *); +static int em_allocate_transmit_buffers(struct tx_ring *); +static void em_free_transmit_structures(struct adapter *); +static void em_free_transmit_buffers(struct tx_ring *); + static int em_setup_receive_structures(struct adapter *); +static int em_allocate_receive_buffers(struct rx_ring *); static void em_initialize_receive_unit(struct adapter *); +static void em_free_receive_structures(struct adapter *); +static void em_free_receive_buffers(struct rx_ring *); + static void em_enable_intr(struct adapter *); static void em_disable_intr(struct adapter *); -static void em_free_transmit_structures(struct adapter *); -static void em_free_receive_structures(struct adapter *); static void em_update_stats_counters(struct adapter *); -static void em_txeof(struct adapter *); -static void em_tx_purge(struct adapter *); -static int em_allocate_receive_structures(struct adapter *); -static int em_allocate_transmit_structures(struct adapter *); -static int em_rxeof(struct adapter *, int); +static bool em_txeof(struct tx_ring *); +static int em_rxeof(struct rx_ring *, int); #ifndef __NO_STRICT_ALIGNMENT -static int em_fixup_rx(struct adapter *); +static int em_fixup_rx(struct rx_ring *); #endif -static void em_receive_checksum(struct adapter *, struct e1000_rx_desc *, - struct mbuf *); -static void em_transmit_checksum_setup(struct adapter *, struct mbuf *, +static void em_receive_checksum(struct e1000_rx_desc *, struct mbuf *); +static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *); -#if __FreeBSD_version >= 700000 -static bool em_tso_setup(struct adapter *, struct mbuf *, - u32 *, u32 *); -#endif /* FreeBSD_version >= 700000 */ +static bool em_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *); static void em_set_promisc(struct adapter *); static void em_disable_promisc(struct adapter *); static void em_set_multi(struct adapter *); static void em_print_hw_stats(struct adapter *); static void em_update_link_status(struct adapter *); -static int em_get_buf(struct adapter *, int); -#if __FreeBSD_version >= 700029 +static void em_refresh_mbufs(struct rx_ring *, int); static void em_register_vlan(void *, struct ifnet *, u16); static void em_unregister_vlan(void *, struct ifnet *, u16); static void em_setup_vlan_hw_support(struct adapter *); -#endif -static int em_xmit(struct adapter *, struct mbuf **); -static void em_smartspeed(struct adapter *); -static int em_82547_fifo_workaround(struct adapter *, int); -static void em_82547_update_fifo_head(struct adapter *, int); -static int em_82547_tx_fifo_reset(struct adapter *); -static void em_82547_move_tail(void *); +static int em_xmit(struct tx_ring *, struct mbuf **); static int em_dma_malloc(struct adapter *, bus_size_t, struct em_dma_alloc *, int); static void em_dma_free(struct adapter *, struct em_dma_alloc *); @@ -309,8 +256,6 @@ static void em_print_nvm_info(struct adapter *); static int em_is_valid_ether_addr(u8 *); static int em_sysctl_stats(SYSCTL_HANDLER_ARGS); static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); -static u32 em_fill_descriptors (bus_addr_t address, u32 length, - PDESC_ARRAY desc_array); static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); static void em_add_int_delay_sysctl(struct adapter *, const char *, const char *, struct em_int_delay_info *, int, int); @@ -323,27 +268,18 @@ static void em_get_wakeup(device_t); static void em_enable_wakeup(device_t); static int em_enable_phy_wakeup(struct adapter *); -#ifdef EM_LEGACY_IRQ -static void em_intr(void *); -#else /* FAST IRQ */ -#if __FreeBSD_version < 700000 -static void em_irq_fast(void *); -#else static int em_irq_fast(void *); -#endif /* MSIX handlers */ static void em_msix_tx(void *); static void em_msix_rx(void *); static void em_msix_link(void *); -static void em_handle_rx(void *context, int pending); static void em_handle_tx(void *context, int pending); - -static void em_handle_rxtx(void *context, int pending); +static void em_handle_rx(void *context, int pending); static void em_handle_link(void *context, int pending); + static void em_add_rx_process_limit(struct adapter *, const char *, const char *, int *, int); -#endif /* ~EM_LEGACY_IRQ */ #ifdef DEVICE_POLLING static poll_handler_t em_poll; @@ -368,7 +304,7 @@ static driver_t em_driver = { "em", em_methods, sizeof(struct adapter), }; -static devclass_t em_devclass; +devclass_t em_devclass; DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); MODULE_DEPEND(em, pci, 1, 1, 1); MODULE_DEPEND(em, ether, 1, 1, 1); @@ -388,31 +324,35 @@ MODULE_DEPEND(em, ether, 1, 1, 1); static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); -static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); -static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); -static int em_rxd = EM_DEFAULT_RXD; -static int em_txd = EM_DEFAULT_TXD; -static int em_smart_pwr_down = FALSE; -/* Controls whether promiscuous also shows bad packets */ -static int em_debug_sbp = FALSE; -/* Local switch for MSI/MSIX */ -static int em_enable_msi = TRUE; - TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt); TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt); + +static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); +static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt); TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt); + +static int em_rxd = EM_DEFAULT_RXD; +static int em_txd = EM_DEFAULT_TXD; TUNABLE_INT("hw.em.rxd", &em_rxd); TUNABLE_INT("hw.em.txd", &em_txd); -TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down); -TUNABLE_INT("hw.em.sbp", &em_debug_sbp); -TUNABLE_INT("hw.em.enable_msi", &em_enable_msi); -#ifndef EM_LEGACY_IRQ +static int em_smart_pwr_down = FALSE; +TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down); + +/* Controls whether promiscuous also shows bad packets */ +static int em_debug_sbp = FALSE; +TUNABLE_INT("hw.em.sbp", &em_debug_sbp); + +/* Local controls for MSI/MSIX */ +static int em_enable_msix = TRUE; +static int em_msix_queues = 2; /* for 82574, can be 1 or 2 */ +TUNABLE_INT("hw.em.enable_msix", &em_enable_msix); +TUNABLE_INT("hw.em.msix_queues", &em_msix_queues); + /* How many packets rxeof tries to clean at a time */ static int em_rx_process_limit = 100; TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit); -#endif /* Flow control setting - default to FULL */ static int em_fc_setting = e1000_fc_full; @@ -494,7 +434,6 @@ static int em_attach(device_t dev) { struct adapter *adapter; - int tsize, rsize; int error = 0; INIT_DEBUGOUT("em_attach: begin"); @@ -502,8 +441,6 @@ em_attach(device_t dev) adapter = device_get_softc(dev); adapter->dev = adapter->osdep.dev = dev; EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); - EM_TX_LOCK_INIT(adapter, device_get_nameunit(dev)); - EM_RX_LOCK_INIT(adapter, device_get_nameunit(dev)); /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), @@ -517,7 +454,6 @@ em_attach(device_t dev) em_sysctl_stats, "I", "Statistics"); callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); - callout_init_mtx(&adapter->tx_fifo_timer, &adapter->tx_mtx, 0); /* Determine hardware and mac info */ em_identify_hardware(adapter); @@ -571,25 +507,21 @@ em_attach(device_t dev) em_add_int_delay_sysctl(adapter, "tx_int_delay", "transmit interrupt delay in usecs", &adapter->tx_int_delay, E1000_REGISTER(&adapter->hw, E1000_TIDV), em_tx_int_delay_dflt); - if (adapter->hw.mac.type >= e1000_82540) { - em_add_int_delay_sysctl(adapter, "rx_abs_int_delay", - "receive interrupt delay limit in usecs", - &adapter->rx_abs_int_delay, - E1000_REGISTER(&adapter->hw, E1000_RADV), - em_rx_abs_int_delay_dflt); - em_add_int_delay_sysctl(adapter, "tx_abs_int_delay", - "transmit interrupt delay limit in usecs", - &adapter->tx_abs_int_delay, - E1000_REGISTER(&adapter->hw, E1000_TADV), - em_tx_abs_int_delay_dflt); - } + em_add_int_delay_sysctl(adapter, "rx_abs_int_delay", + "receive interrupt delay limit in usecs", + &adapter->rx_abs_int_delay, + E1000_REGISTER(&adapter->hw, E1000_RADV), + em_rx_abs_int_delay_dflt); + em_add_int_delay_sysctl(adapter, "tx_abs_int_delay", + "transmit interrupt delay limit in usecs", + &adapter->tx_abs_int_delay, + E1000_REGISTER(&adapter->hw, E1000_TADV), + em_tx_abs_int_delay_dflt); -#ifndef EM_LEGACY_IRQ /* Sysctls for limiting the amount of work done in the taskqueue */ em_add_rx_process_limit(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, em_rx_process_limit); -#endif /* * Validate number of transmit and receive descriptors. It @@ -597,18 +529,15 @@ em_attach(device_t dev) * of E1000_DBA_ALIGN. */ if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 || - (adapter->hw.mac.type >= e1000_82544 && em_txd > EM_MAX_TXD) || - (adapter->hw.mac.type < e1000_82544 && em_txd > EM_MAX_TXD_82543) || - (em_txd < EM_MIN_TXD)) { + (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) { device_printf(dev, "Using %d TX descriptors instead of %d!\n", EM_DEFAULT_TXD, em_txd); adapter->num_tx_desc = EM_DEFAULT_TXD; } else adapter->num_tx_desc = em_txd; + if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 || - (adapter->hw.mac.type >= e1000_82544 && em_rxd > EM_MAX_RXD) || - (adapter->hw.mac.type < e1000_82544 && em_rxd > EM_MAX_RXD_82543) || - (em_rxd < EM_MIN_RXD)) { + (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) { device_printf(dev, "Using %d RX descriptors instead of %d!\n", EM_DEFAULT_RXD, em_rxd); adapter->num_rx_desc = EM_DEFAULT_RXD; @@ -618,10 +547,6 @@ em_attach(device_t dev) adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_wait_to_complete = FALSE; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; - adapter->rx_buffer_len = 2048; - - e1000_init_script_state_82541(&adapter->hw, TRUE); - e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE); /* Copper options */ if (adapter->hw.phy.media_type == e1000_media_type_copper) { @@ -643,29 +568,13 @@ em_attach(device_t dev) */ adapter->hw.mac.report_tx_early = 1; - tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc), - EM_DBA_ALIGN); - - /* Allocate Transmit Descriptor ring */ - if (em_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) { - device_printf(dev, "Unable to allocate tx_desc memory\n"); + /* + ** Get queue/ring memory + */ + if (em_allocate_queues(adapter)) { error = ENOMEM; - goto err_tx_desc; + goto err_pci; } - adapter->tx_desc_base = - (struct e1000_tx_desc *)adapter->txdma.dma_vaddr; - - rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc), - EM_DBA_ALIGN); - - /* Allocate Receive Descriptor ring */ - if (em_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) { - device_printf(dev, "Unable to allocate rx_desc memory\n"); - error = ENOMEM; - goto err_rx_desc; - } - adapter->rx_desc_base = - (struct e1000_rx_desc *)adapter->rxdma.dma_vaddr; /* ** Start from a known state, this is @@ -685,7 +594,7 @@ em_attach(device_t dev) device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; - goto err_hw_init; + goto err_late; } } @@ -694,45 +603,24 @@ em_attach(device_t dev) device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; - goto err_hw_init; + goto err_late; } if (!em_is_valid_ether_addr(adapter->hw.mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; - goto err_hw_init; - } - - /* Initialize the hardware */ - if (em_hardware_init(adapter)) { - device_printf(dev, "Unable to initialize the hardware\n"); - error = EIO; - goto err_hw_init; - } - - /* Allocate transmit descriptors and buffers */ - if (em_allocate_transmit_structures(adapter)) { - device_printf(dev, "Could not setup transmit structures\n"); - error = ENOMEM; - goto err_tx_struct; - } - - /* Allocate receive descriptors and buffers */ - if (em_allocate_receive_structures(adapter)) { - device_printf(dev, "Could not setup receive structures\n"); - error = ENOMEM; - goto err_rx_struct; + goto err_late; } /* ** Do interrupt configuration */ - if (adapter->msi > 1) /* Do MSI/X */ + if (adapter->msix > 1) /* Do MSIX */ error = em_allocate_msix(adapter); else /* MSI or Legacy */ error = em_allocate_legacy(adapter); if (error) - goto err_rx_struct; + goto err_late; /* * Get Wake-on-Lan and Management info for later use @@ -742,6 +630,8 @@ em_attach(device_t dev) /* Setup OS specific network interface */ em_setup_interface(dev, adapter); + em_reset(adapter); + /* Initialize statistics */ em_update_stats_counters(adapter); @@ -753,20 +643,11 @@ em_attach(device_t dev) device_printf(dev, "PHY reset is blocked due to SOL/IDER session.\n"); - /* Do we need workaround for 82544 PCI-X adapter? */ - if (adapter->hw.bus.type == e1000_bus_type_pcix && - adapter->hw.mac.type == e1000_82544) - adapter->pcix_82544 = TRUE; - else - adapter->pcix_82544 = FALSE; - -#if __FreeBSD_version >= 700029 /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); -#endif /* Non-AMT based hardware can now take control from firmware */ if (adapter->has_manage && !adapter->has_amt) @@ -779,19 +660,12 @@ em_attach(device_t dev) return (0); -err_rx_struct: +err_late: em_free_transmit_structures(adapter); -err_tx_struct: -err_hw_init: + em_free_receive_structures(adapter); em_release_hw_control(adapter); - em_dma_free(adapter, &adapter->rxdma); -err_rx_desc: - em_dma_free(adapter, &adapter->txdma); -err_tx_desc: err_pci: em_free_pci_resources(adapter); - EM_TX_LOCK_DESTROY(adapter); - EM_RX_LOCK_DESTROY(adapter); EM_CORE_LOCK_DESTROY(adapter); return (error); @@ -816,11 +690,7 @@ em_detach(device_t dev) INIT_DEBUGOUT("em_detach: begin"); /* Make sure VLANS are not using driver */ -#if __FreeBSD_version >= 700000 if (adapter->ifp->if_vlantrunk != NULL) { -#else - if (adapter->ifp->if_nvlans != 0) { -#endif device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } @@ -831,27 +701,24 @@ em_detach(device_t dev) #endif EM_CORE_LOCK(adapter); - EM_TX_LOCK(adapter); adapter->in_detach = 1; em_stop(adapter); + EM_CORE_UNLOCK(adapter); + EM_CORE_LOCK_DESTROY(adapter); + e1000_phy_hw_reset(&adapter->hw); em_release_manageability(adapter); + em_release_hw_control(adapter); - EM_TX_UNLOCK(adapter); - EM_CORE_UNLOCK(adapter); - -#if __FreeBSD_version >= 700029 /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); -#endif ether_ifdetach(adapter->ifp); callout_drain(&adapter->timer); - callout_drain(&adapter->tx_fifo_timer); em_free_pci_resources(adapter); bus_generic_detach(dev); @@ -860,22 +727,7 @@ em_detach(device_t dev) em_free_transmit_structures(adapter); em_free_receive_structures(adapter); - /* Free Transmit Descriptor ring */ - if (adapter->tx_desc_base) { - em_dma_free(adapter, &adapter->txdma); - adapter->tx_desc_base = NULL; - } - - /* Free Receive Descriptor ring */ - if (adapter->rx_desc_base) { - em_dma_free(adapter, &adapter->rxdma); - adapter->rx_desc_base = NULL; - } - em_release_hw_control(adapter); - EM_TX_LOCK_DESTROY(adapter); - EM_RX_LOCK_DESTROY(adapter); - EM_CORE_LOCK_DESTROY(adapter); return (0); } @@ -939,69 +791,45 @@ em_resume(device_t dev) #if __FreeBSD_version >= 800000 static int -em_mq_start_locked(struct ifnet *ifp, struct mbuf *m) +em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m) { - struct adapter *adapter = ifp->if_softc; - struct mbuf *next; - int error = E1000_SUCCESS; + struct adapter *adapter = txr->adapter; + struct mbuf *next; + int err = 0, enq = 0; - EM_TX_LOCK_ASSERT(adapter); - /* To allow being called from a tasklet */ + if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING || adapter->link_active == 0) { + if (m != NULL) + err = drbr_enqueue(ifp, txr->br, m); + return (err); + } + + enq = 0; if (m == NULL) - goto process; + next = drbr_dequeue(ifp, txr->br); + else + next = m; - if (((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != - IFF_DRV_RUNNING) - || (!adapter->link_active)) { - error = drbr_enqueue(ifp, adapter->br, m); - return (error); - } else if (!drbr_needs_enqueue(ifp, adapter->br) && - (adapter->num_tx_desc_avail > EM_TX_OP_THRESHOLD)) { - if ((error = em_xmit(adapter, &m)) != 0) { - if (m) - error = drbr_enqueue(ifp, adapter->br, m); - return (error); - } else { - /* - * We've bypassed the buf ring so we need to update - * ifp directly - */ - drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags); - /* - ** Send a copy of the frame to the BPF - ** listener and set the watchdog on. - */ - ETHER_BPF_MTAP(ifp, m); - adapter->watchdog_check = TRUE; - } - } else if ((error = drbr_enqueue(ifp, adapter->br, m)) != 0) - return (error); - -process: - if (drbr_empty(ifp, adapter->br)) - return(error); - /* Process the queue */ - while (TRUE) { - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - break; - next = drbr_dequeue(ifp, adapter->br); - if (next == NULL) - break; - if ((error = em_xmit(adapter, &next)) != 0) { - if (next != NULL) - error = drbr_enqueue(ifp, adapter->br, next); + /* Process the queue */ + while (next != NULL) { + if ((err = em_xmit(txr, &next)) != 0) { + if (next != NULL) + err = drbr_enqueue(ifp, txr->br, next); break; } + enq++; drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags); - ETHER_BPF_MTAP(ifp, next); + ETHER_BPF_MTAP(ifp, next); + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + break; + next = drbr_dequeue(ifp, txr->br); + } + + if (enq > 0) { /* Set the watchdog */ - adapter->watchdog_check = TRUE; - } - - if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) - ifp->if_drv_flags |= IFF_DRV_OACTIVE; - - return (error); + txr->watchdog_check = TRUE; + } + return (err); } /* @@ -1011,45 +839,61 @@ process: static int em_mq_start(struct ifnet *ifp, struct mbuf *m) { - - struct adapter *adapter = ifp->if_softc; - int error = 0; + struct adapter *adapter = ifp->if_softc; + struct tx_ring *txr; + int i, error = 0; - if (EM_TX_TRYLOCK(adapter)) { + /* Which queue to use */ + if ((m->m_flags & M_FLOWID) != 0) + i = m->m_pkthdr.flowid % adapter->num_queues; + else + i = curcpu % adapter->num_queues; + + txr = &adapter->tx_rings[i]; + + if (EM_TX_TRYLOCK(txr)) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) - error = em_mq_start_locked(ifp, m); - EM_TX_UNLOCK(adapter); + error = em_mq_start_locked(ifp, txr, m); + EM_TX_UNLOCK(txr); } else - error = drbr_enqueue(ifp, adapter->br, m); + error = drbr_enqueue(ifp, txr->br, m); return (error); } +/* +** Flush all ring buffers +*/ static void em_qflush(struct ifnet *ifp) { - struct mbuf *m; - struct adapter *adapter = (struct adapter *)ifp->if_softc; + struct adapter *adapter = ifp->if_softc; + struct tx_ring *txr = adapter->tx_rings; + struct mbuf *m; - EM_TX_LOCK(adapter); - while ((m = buf_ring_dequeue_sc(adapter->br)) != NULL) - m_freem(m); + for (int i = 0; i < adapter->num_queues; i++, txr++) { + EM_TX_LOCK(txr); + while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) + m_freem(m); + EM_TX_UNLOCK(txr); + } if_qflush(ifp); - EM_TX_UNLOCK(adapter); } + #endif /* FreeBSD_version */ static void -em_start_locked(struct ifnet *ifp) +em_start_locked(struct ifnet *ifp, struct tx_ring *txr) { struct adapter *adapter = ifp->if_softc; struct mbuf *m_head; - EM_TX_LOCK_ASSERT(adapter); + EM_TX_LOCK_ASSERT(txr); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; + if (!adapter->link_active) return; @@ -1062,7 +906,7 @@ em_start_locked(struct ifnet *ifp) * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ - if (em_xmit(adapter, &m_head)) { + if (em_xmit(txr, &m_head)) { if (m_head == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; @@ -1074,10 +918,8 @@ em_start_locked(struct ifnet *ifp) ETHER_BPF_MTAP(ifp, m_head); /* Set timeout in case hardware has problems transmitting. */ - adapter->watchdog_check = TRUE; + txr->watchdog_check = TRUE; } - if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) - ifp->if_drv_flags |= IFF_DRV_OACTIVE; return; } @@ -1085,12 +927,15 @@ em_start_locked(struct ifnet *ifp) static void em_start(struct ifnet *ifp) { - struct adapter *adapter = ifp->if_softc; + struct adapter *adapter = ifp->if_softc; + struct tx_ring *txr = adapter->tx_rings; - EM_TX_LOCK(adapter); - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - em_start_locked(ifp); - EM_TX_UNLOCK(adapter); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + EM_TX_LOCK(txr); + em_start_locked(ifp, txr); + EM_TX_UNLOCK(txr); + } + return; } /********************************************************************* @@ -1140,37 +985,23 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data) case SIOCSIFMTU: { int max_frame_size; - u16 eeprom_data = 0; IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); EM_CORE_LOCK(adapter); switch (adapter->hw.mac.type) { - case e1000_82573: - /* - * 82573 only supports jumbo frames - * if ASPM is disabled. - */ - e1000_read_nvm(&adapter->hw, - NVM_INIT_3GIO_3, 1, &eeprom_data); - if (eeprom_data & NVM_WORD1A_ASPM_MASK) { - max_frame_size = ETHER_MAX_LEN; - break; - } - /* Allow Jumbo frames - fall thru */ case e1000_82571: case e1000_82572: case e1000_ich9lan: case e1000_ich10lan: case e1000_82574: - case e1000_80003es2lan: /* Limit Jumbo Frame size */ + case e1000_80003es2lan: /* 9K Jumbo Frame size */ max_frame_size = 9234; break; case e1000_pchlan: max_frame_size = 4096; break; /* Adapters that do not support jumbo frames */ - case e1000_82542: case e1000_82583: case e1000_ich8lan: max_frame_size = ETHER_MAX_LEN; @@ -1206,11 +1037,8 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data) } else em_init_locked(adapter); } else - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - EM_TX_LOCK(adapter); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) em_stop(adapter); - EM_TX_UNLOCK(adapter); - } adapter->if_flags = ifp->if_flags; EM_CORE_UNLOCK(adapter); break; @@ -1221,10 +1049,6 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data) EM_CORE_LOCK(adapter); em_disable_intr(adapter); em_set_multi(adapter); - if (adapter->hw.mac.type == e1000_82542 && - adapter->hw.revision_id == E1000_REVISION_2) { - em_initialize_receive_unit(adapter); - } #ifdef DEVICE_POLLING if (!(ifp->if_capenable & IFCAP_POLLING)) #endif @@ -1278,22 +1102,14 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data) ifp->if_capenable ^= IFCAP_HWCSUM; reinit = 1; } -#if __FreeBSD_version >= 700000 if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; reinit = 1; } -#endif if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; reinit = 1; } - - if (mask & IFCAP_VLAN_HWFILTER) { - ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; - reinit = 1; - } - if ((mask & IFCAP_WOL) && (ifp->if_capabilities & IFCAP_WOL) != 0) { if (mask & IFCAP_WOL_MCAST) @@ -1301,12 +1117,9 @@ em_ioctl(struct ifnet *ifp, u_long command, caddr_t data) if (mask & IFCAP_WOL_MAGIC) ifp->if_capenable ^= IFCAP_WOL_MAGIC; } - if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) em_init(adapter); -#if __FreeBSD_version >= 700000 VLAN_CAPABILITIES(ifp); -#endif break; } @@ -1341,33 +1154,15 @@ em_init_locked(struct adapter *adapter) EM_CORE_LOCK_ASSERT(adapter); - EM_TX_LOCK(adapter); - em_stop(adapter); - EM_TX_UNLOCK(adapter); + em_disable_intr(adapter); + callout_stop(&adapter->timer); /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. - * - * Devices before the 82547 had a Packet Buffer of 64K. - * Default allocation: PBA=48K for Rx, leaving 16K for Tx. - * After the 82547 the buffer was reduced to 40K. - * Default allocation: PBA=30K for Rx, leaving 10K for Tx. - * Note: default does not leave enough room for Jumbo Frame >10k. */ switch (adapter->hw.mac.type) { - case e1000_82547: - case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */ - if (adapter->max_frame_size > 8192) - pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */ - else - pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */ - adapter->tx_fifo_head = 0; - adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT; - adapter->tx_fifo_size = - (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT; - break; /* Total Packet Buffer on these is 48K */ case e1000_82571: case e1000_82572: @@ -1390,7 +1185,6 @@ em_init_locked(struct adapter *adapter) pba = E1000_PBA_8K; break; default: - /* Devices before 82547 had a Packet Buffer of 64K. */ if (adapter->max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else @@ -1420,36 +1214,21 @@ em_init_locked(struct adapter *adapter) } /* Initialize the hardware */ - if (em_hardware_init(adapter)) { - device_printf(dev, "Unable to initialize the hardware\n"); - return; - } + em_reset(adapter); em_update_link_status(adapter); /* Setup VLAN support, basic and offload if available */ E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); - if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { - if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) - /* Use real VLAN Filter support */ - em_setup_vlan_hw_support(adapter); - else { - u32 ctrl; - ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); - ctrl |= E1000_CTRL_VME; - E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); - } - } + + /* Use real VLAN Filter support */ + em_setup_vlan_hw_support(adapter); /* Set hardware offload abilities */ ifp->if_hwassist = 0; - if (adapter->hw.mac.type >= e1000_82543) { - if (ifp->if_capenable & IFCAP_TXCSUM) - ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); -#if __FreeBSD_version >= 700000 - if (ifp->if_capenable & IFCAP_TSO4) - ifp->if_hwassist |= CSUM_TSO; -#endif - } + if (ifp->if_capenable & IFCAP_TXCSUM) + ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); + if (ifp->if_capenable & IFCAP_TSO4) + ifp->if_hwassist |= CSUM_TSO; /* Configure for OS presence */ em_init_manageability(adapter); @@ -1464,9 +1243,7 @@ em_init_locked(struct adapter *adapter) /* Prepare receive descriptors and buffers */ if (em_setup_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); - EM_TX_LOCK(adapter); em_stop(adapter); - EM_TX_UNLOCK(adapter); return; } em_initialize_receive_unit(adapter); @@ -1486,14 +1263,8 @@ em_init_locked(struct adapter *adapter) tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); - /* - ** Set the IVAR - interrupt vector routing. - ** Each nibble represents a vector, high bit - ** is enable, other 3 bits are the MSIX table - ** entry, we map RXQ0 to 0, TXQ0 to 1, and - ** Link (other) to 2, hence the magic number. - */ - E1000_WRITE_REG(&adapter->hw, E1000_IVAR, 0x800A0908); + /* Set the IVAR - interrupt vector routing. */ + E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); } #ifdef DEVICE_POLLING @@ -1529,13 +1300,15 @@ em_init(void *arg) #ifdef DEVICE_POLLING /********************************************************************* * - * Legacy polling routine + * Legacy polling routine: note this only works with single queue * *********************************************************************/ static int em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { struct adapter *adapter = ifp->if_softc; + struct tx_ring *txr = adapter->tx_rings; + struct rx_ring *rxr = adapter->rx_rings; u32 reg_icr, rx_done = 0; EM_CORE_LOCK(adapter); @@ -1546,147 +1319,40 @@ em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) if (cmd == POLL_AND_CHECK_STATUS) { reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); - /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { + callout_stop(&adapter->timer); adapter->hw.mac.get_link_status = 1; em_update_link_status(adapter); + callout_reset(&adapter->timer, hz, + em_local_timer, adapter); } - if (reg_icr & E1000_ICR_RXO) - adapter->rx_overruns++; } EM_CORE_UNLOCK(adapter); - rx_done = em_rxeof(adapter, count); + rx_done = em_rxeof(rxr, count); - EM_TX_LOCK(adapter); - em_txeof(adapter); + EM_TX_LOCK(txr); + em_txeof(txr); #if __FreeBSD_version >= 800000 - if (!drbr_empty(ifp, adapter->br)) - em_mq_start_locked(ifp, NULL); + if (!drbr_empty(ifp, txr->br)) + em_mq_start_locked(ifp, txr, NULL); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - em_start_locked(ifp); + em_start_locked(ifp, txr); #endif - EM_TX_UNLOCK(adapter); + EM_TX_UNLOCK(txr); + return (rx_done); } #endif /* DEVICE_POLLING */ -#ifdef EM_LEGACY_IRQ -/********************************************************************* - * - * Legacy Interrupt Service routine - * - *********************************************************************/ - -static void -em_intr(void *arg) -{ - struct adapter *adapter = arg; - struct ifnet *ifp = adapter->ifp; - u32 reg_icr; - - - if (ifp->if_capenable & IFCAP_POLLING) - return; - - EM_CORE_LOCK(adapter); - reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); - if (reg_icr & E1000_ICR_RXO) - adapter->rx_overruns++; - if ((reg_icr == 0xffffffff) || (reg_icr == 0)|| - (adapter->hw.mac.type >= e1000_82571 && - (reg_icr & E1000_ICR_INT_ASSERTED) == 0)) - goto out; - - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - goto out; - - if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { - callout_stop(&adapter->timer); - adapter->hw.mac.get_link_status = 1; - em_update_link_status(adapter); - /* Deal with TX cruft when link lost */ - em_tx_purge(adapter); - callout_reset(&adapter->timer, hz, - em_local_timer, adapter); - goto out; - } - - EM_TX_LOCK(adapter); - em_txeof(adapter); - em_rxeof(adapter, -1); - em_txeof(adapter); - if (ifp->if_drv_flags & IFF_DRV_RUNNING && - !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - em_start_locked(ifp); - EM_TX_UNLOCK(adapter); - -out: - EM_CORE_UNLOCK(adapter); - return; -} - -#else /* EM_FAST_IRQ, then fast interrupt routines only */ - -static void -em_handle_link(void *context, int pending) -{ - struct adapter *adapter = context; - struct ifnet *ifp = adapter->ifp; - - if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) - return; - - EM_CORE_LOCK(adapter); - callout_stop(&adapter->timer); - em_update_link_status(adapter); - /* Deal with TX cruft when link lost */ - em_tx_purge(adapter); - callout_reset(&adapter->timer, hz, em_local_timer, adapter); - EM_CORE_UNLOCK(adapter); -} - - -/* Combined RX/TX handler, used by Legacy and MSI */ -static void -em_handle_rxtx(void *context, int pending) -{ - struct adapter *adapter = context; - struct ifnet *ifp = adapter->ifp; - - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - if (em_rxeof(adapter, adapter->rx_process_limit) != 0) - taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); - EM_TX_LOCK(adapter); - em_txeof(adapter); - -#if __FreeBSD_version >= 800000 - if (!drbr_empty(ifp, adapter->br)) - em_mq_start_locked(ifp, NULL); -#else - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - em_start_locked(ifp); -#endif - EM_TX_UNLOCK(adapter); - } - - em_enable_intr(adapter); -} /********************************************************************* * * Fast Legacy/MSI Combined Interrupt Service routine * *********************************************************************/ -#if __FreeBSD_version < 700000 -#define FILTER_STRAY -#define FILTER_HANDLED -static void -#else static int -#endif em_irq_fast(void *arg) { struct adapter *adapter = arg; @@ -1713,13 +1379,8 @@ em_irq_fast(void *arg) (reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; - /* - * Mask interrupts until the taskqueue is finished running. This is - * cheap, just assume that it is needed. This also works around the - * MSI message reordering errata on certain systems. - */ em_disable_intr(adapter); - taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); + taskqueue_enqueue(adapter->tq, &adapter->que_task); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { @@ -1732,30 +1393,64 @@ em_irq_fast(void *arg) return FILTER_HANDLED; } +/* Combined RX/TX handler, used by Legacy and MSI */ +static void +em_handle_que(void *context, int pending) +{ + struct adapter *adapter = context; + struct ifnet *ifp = adapter->ifp; + struct tx_ring *txr = adapter->tx_rings; + struct rx_ring *rxr = adapter->rx_rings; + u32 loop = EM_MAX_LOOP; + bool more_rx, more_tx; + + + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + EM_TX_LOCK(txr); + do { + more_rx = em_rxeof(rxr, adapter->rx_process_limit); + more_tx = em_txeof(txr); + } while (loop-- && (more_rx || more_tx)); + +#if __FreeBSD_version >= 800000 + if (!drbr_empty(ifp, txr->br)) + em_mq_start_locked(ifp, txr, NULL); +#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + em_start_locked(ifp, txr); +#endif + if (more_rx || more_tx) + taskqueue_enqueue(adapter->tq, &adapter->que_task); + + EM_TX_UNLOCK(txr); + } + + em_enable_intr(adapter); + return; +} + + /********************************************************************* * * MSIX Interrupt Service Routines * **********************************************************************/ -#define EM_MSIX_TX 0x00040000 -#define EM_MSIX_RX 0x00010000 -#define EM_MSIX_LINK 0x00100000 - static void em_msix_tx(void *arg) { - struct adapter *adapter = arg; - struct ifnet *ifp = adapter->ifp; + struct tx_ring *txr = arg; + struct adapter *adapter = txr->adapter; + bool more; - ++adapter->tx_irq; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - EM_TX_LOCK(adapter); - em_txeof(adapter); - EM_TX_UNLOCK(adapter); - taskqueue_enqueue(adapter->tq, &adapter->tx_task); - } - /* Reenable this interrupt */ - E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_TX); + ++txr->tx_irq; + EM_TX_LOCK(txr); + more = em_txeof(txr); + EM_TX_UNLOCK(txr); + if (more) + taskqueue_enqueue(txr->tq, &txr->tx_task); + else + /* Reenable this interrupt */ + E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); return; } @@ -1768,15 +1463,17 @@ em_msix_tx(void *arg) static void em_msix_rx(void *arg) { - struct adapter *adapter = arg; - struct ifnet *ifp = adapter->ifp; + struct rx_ring *rxr = arg; + struct adapter *adapter = rxr->adapter; + bool more; - ++adapter->rx_irq; - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && - (em_rxeof(adapter, adapter->rx_process_limit) != 0)) - taskqueue_enqueue(adapter->tq, &adapter->rx_task); - /* Reenable this interrupt */ - E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_RX); + ++rxr->rx_irq; + more = em_rxeof(rxr, adapter->rx_process_limit); + if (more) + taskqueue_enqueue(rxr->tq, &rxr->rx_task); + else + /* Reenable this interrupt */ + E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); return; } @@ -1785,7 +1482,6 @@ em_msix_rx(void *arg) * MSIX Link Fast Interrupt Service routine * **********************************************************************/ - static void em_msix_link(void *arg) { @@ -1798,45 +1494,71 @@ em_msix_link(void *arg) if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; taskqueue_enqueue(taskqueue_fast, &adapter->link_task); - } - E1000_WRITE_REG(&adapter->hw, E1000_IMS, - EM_MSIX_LINK | E1000_IMS_LSC); + } else + E1000_WRITE_REG(&adapter->hw, E1000_IMS, + EM_MSIX_LINK | E1000_IMS_LSC); return; } static void em_handle_rx(void *context, int pending) { - struct adapter *adapter = context; - struct ifnet *ifp = adapter->ifp; - - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && - (em_rxeof(adapter, adapter->rx_process_limit) != 0)) - taskqueue_enqueue(adapter->tq, &adapter->rx_task); + struct rx_ring *rxr = context; + struct adapter *adapter = rxr->adapter; + u32 loop = EM_MAX_LOOP; + bool more; + do { + more = em_rxeof(rxr, adapter->rx_process_limit); + } while (loop-- && more); + /* Reenable this interrupt */ + E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); } static void em_handle_tx(void *context, int pending) { - struct adapter *adapter = context; + struct tx_ring *txr = context; + struct adapter *adapter = txr->adapter; struct ifnet *ifp = adapter->ifp; + u32 loop = EM_MAX_LOOP; + bool more; + + if (!EM_TX_TRYLOCK(txr)) + return; + do { + more = em_txeof(txr); + } while (loop-- && more); - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - if (!EM_TX_TRYLOCK(adapter)) - return; - em_txeof(adapter); #if __FreeBSD_version >= 800000 - if (!drbr_empty(ifp, adapter->br)) - em_mq_start_locked(ifp, NULL); + if (!drbr_empty(ifp, txr->br)) + em_mq_start_locked(ifp, txr, NULL); #else - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - em_start_locked(ifp); + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + em_start_locked(ifp, txr); #endif - EM_TX_UNLOCK(adapter); - } + E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); + EM_TX_UNLOCK(txr); } -#endif /* EM_FAST_IRQ */ + +static void +em_handle_link(void *context, int pending) +{ + struct adapter *adapter = context; + struct ifnet *ifp = adapter->ifp; + + if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) + return; + + EM_CORE_LOCK(adapter); + callout_stop(&adapter->timer); + em_update_link_status(adapter); + callout_reset(&adapter->timer, hz, em_local_timer, adapter); + E1000_WRITE_REG(&adapter->hw, E1000_IMS, + EM_MSIX_LINK | E1000_IMS_LSC); + EM_CORE_UNLOCK(adapter); +} + /********************************************************************* * @@ -1869,8 +1591,6 @@ em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { - if (adapter->hw.mac.type == e1000_82545) - fiber_type = IFM_1000_LX; ifmr->ifm_active |= fiber_type | IFM_FDX; } else { switch (adapter->link_speed) { @@ -1962,8 +1682,9 @@ em_media_change(struct ifnet *ifp) **********************************************************************/ static int -em_xmit(struct adapter *adapter, struct mbuf **m_headp) +em_xmit(struct tx_ring *txr, struct mbuf **m_headp) { + struct adapter *adapter = txr->adapter; bus_dma_segment_t segs[EM_MAX_SCATTER]; bus_dmamap_t map; struct em_buffer *tx_buffer, *tx_buffer_mapped; @@ -1972,31 +1693,17 @@ em_xmit(struct adapter *adapter, struct mbuf **m_headp) u32 txd_upper, txd_lower, txd_used, txd_saved; int nsegs, i, j, first, last = 0; int error, do_tso, tso_desc = 0; -#if __FreeBSD_version < 700000 - struct m_tag *mtag; -#endif + m_head = *m_headp; txd_upper = txd_lower = txd_used = txd_saved = 0; - -#if __FreeBSD_version >= 700000 do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0); -#else - do_tso = 0; -#endif /* * Force a cleanup if number of TX descriptors * available hits the threshold */ - if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) { - em_txeof(adapter); - /* Now do we at least have a minimal? */ - if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) { - adapter->no_tx_desc_avail1++; - return (ENOBUFS); - } - } - + if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD) + em_txeof(txr); /* * TSO workaround: @@ -2018,12 +1725,12 @@ em_xmit(struct adapter *adapter, struct mbuf **m_headp) * of the EOP which is the only one that * now gets a DONE bit writeback. */ - first = adapter->next_avail_tx_desc; - tx_buffer = &adapter->tx_buffer_area[first]; + first = txr->next_avail_desc; + tx_buffer = &txr->tx_buffers[first]; tx_buffer_mapped = tx_buffer; map = tx_buffer->map; - error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, + error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); /* @@ -2048,7 +1755,7 @@ em_xmit(struct adapter *adapter, struct mbuf **m_headp) *m_headp = m; /* Try it again */ - error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, + error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); if (error) { @@ -2068,15 +1775,15 @@ em_xmit(struct adapter *adapter, struct mbuf **m_headp) * it follows a TSO burst, then we need to add a * sentinel descriptor to prevent premature writeback. */ - if ((do_tso == 0) && (adapter->tx_tso == TRUE)) { + if ((do_tso == 0) && (txr->tx_tso == TRUE)) { if (nsegs == 1) tso_desc = TRUE; - adapter->tx_tso = FALSE; + txr->tx_tso = FALSE; } - if (nsegs > (adapter->num_tx_desc_avail - 2)) { - adapter->no_tx_desc_avail2++; - bus_dmamap_unload(adapter->txtag, map); + if (nsegs > (txr->tx_avail - 2)) { + txr->no_desc_avail++; + bus_dmamap_unload(txr->txtag, map); return (ENOBUFS); } m_head = *m_headp; @@ -2084,7 +1791,7 @@ em_xmit(struct adapter *adapter, struct mbuf **m_headp) /* Do hardware assists */ #if __FreeBSD_version >= 700000 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { - error = em_tso_setup(adapter, m_head, &txd_upper, &txd_lower); + error = em_tso_setup(txr, m_head, &txd_upper, &txd_lower); if (error != TRUE) return (ENXIO); /* something foobar */ /* we need to make a final sentinel transmit desc */ @@ -2092,123 +1799,70 @@ em_xmit(struct adapter *adapter, struct mbuf **m_headp) } else #endif if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) - em_transmit_checksum_setup(adapter, m_head, + em_transmit_checksum_setup(txr, m_head, &txd_upper, &txd_lower); - i = adapter->next_avail_tx_desc; - if (adapter->pcix_82544) - txd_saved = i; + i = txr->next_avail_desc; /* Set up our transmit descriptors */ for (j = 0; j < nsegs; j++) { bus_size_t seg_len; bus_addr_t seg_addr; - /* If adapter is 82544 and on PCIX bus */ - if(adapter->pcix_82544) { - DESC_ARRAY desc_array; - u32 array_elements, counter; - /* - * Check the Address and Length combination and - * split the data accordingly - */ - array_elements = em_fill_descriptors(segs[j].ds_addr, - segs[j].ds_len, &desc_array); - for (counter = 0; counter < array_elements; counter++) { - if (txd_used == adapter->num_tx_desc_avail) { - adapter->next_avail_tx_desc = txd_saved; - adapter->no_tx_desc_avail2++; - bus_dmamap_unload(adapter->txtag, map); - return (ENOBUFS); - } - tx_buffer = &adapter->tx_buffer_area[i]; - ctxd = &adapter->tx_desc_base[i]; - ctxd->buffer_addr = htole64( - desc_array.descriptor[counter].address); - ctxd->lower.data = htole32( - (adapter->txd_cmd | txd_lower | (u16) - desc_array.descriptor[counter].length)); - ctxd->upper.data = - htole32((txd_upper)); - last = i; - if (++i == adapter->num_tx_desc) - i = 0; - tx_buffer->m_head = NULL; - tx_buffer->next_eop = -1; - txd_used++; - } + + tx_buffer = &txr->tx_buffers[i]; + ctxd = &txr->tx_base[i]; + seg_addr = segs[j].ds_addr; + seg_len = segs[j].ds_len; + /* + ** TSO Workaround: + ** If this is the last descriptor, we want to + ** split it so we have a small final sentinel + */ + if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) { + seg_len -= 4; + ctxd->buffer_addr = htole64(seg_addr); + ctxd->lower.data = htole32( + adapter->txd_cmd | txd_lower | seg_len); + ctxd->upper.data = + htole32(txd_upper); + if (++i == adapter->num_tx_desc) + i = 0; + /* Now make the sentinel */ + ++txd_used; /* using an extra txd */ + ctxd = &txr->tx_base[i]; + tx_buffer = &txr->tx_buffers[i]; + ctxd->buffer_addr = + htole64(seg_addr + seg_len); + ctxd->lower.data = htole32( + adapter->txd_cmd | txd_lower | 4); + ctxd->upper.data = + htole32(txd_upper); + last = i; + if (++i == adapter->num_tx_desc) + i = 0; } else { - tx_buffer = &adapter->tx_buffer_area[i]; - ctxd = &adapter->tx_desc_base[i]; - seg_addr = segs[j].ds_addr; - seg_len = segs[j].ds_len; - /* - ** TSO Workaround: - ** If this is the last descriptor, we want to - ** split it so we have a small final sentinel - */ - if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) { - seg_len -= 4; - ctxd->buffer_addr = htole64(seg_addr); - ctxd->lower.data = htole32( - adapter->txd_cmd | txd_lower | seg_len); - ctxd->upper.data = - htole32(txd_upper); - if (++i == adapter->num_tx_desc) - i = 0; - /* Now make the sentinel */ - ++txd_used; /* using an extra txd */ - ctxd = &adapter->tx_desc_base[i]; - tx_buffer = &adapter->tx_buffer_area[i]; - ctxd->buffer_addr = - htole64(seg_addr + seg_len); - ctxd->lower.data = htole32( - adapter->txd_cmd | txd_lower | 4); - ctxd->upper.data = - htole32(txd_upper); - last = i; - if (++i == adapter->num_tx_desc) - i = 0; - } else { - ctxd->buffer_addr = htole64(seg_addr); - ctxd->lower.data = htole32( - adapter->txd_cmd | txd_lower | seg_len); - ctxd->upper.data = - htole32(txd_upper); - last = i; - if (++i == adapter->num_tx_desc) - i = 0; - } - tx_buffer->m_head = NULL; - tx_buffer->next_eop = -1; + ctxd->buffer_addr = htole64(seg_addr); + ctxd->lower.data = htole32( + adapter->txd_cmd | txd_lower | seg_len); + ctxd->upper.data = + htole32(txd_upper); + last = i; + if (++i == adapter->num_tx_desc) + i = 0; } + tx_buffer->m_head = NULL; + tx_buffer->next_eop = -1; } - adapter->next_avail_tx_desc = i; - if (adapter->pcix_82544) - adapter->num_tx_desc_avail -= txd_used; - else { - adapter->num_tx_desc_avail -= nsegs; - if (tso_desc) /* TSO used an extra for sentinel */ - adapter->num_tx_desc_avail -= txd_used; - } + txr->next_avail_desc = i; + txr->tx_avail -= nsegs; + if (tso_desc) /* TSO used an extra for sentinel */ + txr->tx_avail -= txd_used; - /* - ** Handle VLAN tag, this is the - ** biggest difference between - ** 6.x and 7 - */ -#if __FreeBSD_version < 700000 - /* Find out if we are in vlan mode. */ - mtag = VLAN_OUTPUT_TAG(ifp, m_head); - if (mtag != NULL) { - ctxd->upper.fields.special = - htole16(VLAN_TAG_VALUE(mtag)); -#else /* FreeBSD 7 */ if (m_head->m_flags & M_VLANTAG) { /* Set the vlan id. */ ctxd->upper.fields.special = htole16(m_head->m_pkthdr.ether_vtag); -#endif /* Tell hardware to add tag */ ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE); } @@ -2216,7 +1870,7 @@ em_xmit(struct adapter *adapter, struct mbuf **m_headp) tx_buffer->m_head = m_head; tx_buffer_mapped->map = tx_buffer->map; tx_buffer->map = map; - bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE); + bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); /* * Last Descriptor of Packet @@ -2229,146 +1883,20 @@ em_xmit(struct adapter *adapter, struct mbuf **m_headp) * Keep track in the first buffer which * descriptor will be written back */ - tx_buffer = &adapter->tx_buffer_area[first]; + tx_buffer = &txr->tx_buffers[first]; tx_buffer->next_eop = last; - adapter->watchdog_time = ticks; /* * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 * that this frame is available to transmit. */ - bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, + bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - if (adapter->hw.mac.type == e1000_82547 && - adapter->link_duplex == HALF_DUPLEX) - em_82547_move_tail(adapter); - else { - E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), i); - if (adapter->hw.mac.type == e1000_82547) - em_82547_update_fifo_head(adapter, - m_head->m_pkthdr.len); - } + E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); return (0); } -/********************************************************************* - * - * 82547 workaround to avoid controller hang in half-duplex environment. - * The workaround is to avoid queuing a large packet that would span - * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers - * in this case. We do that only when FIFO is quiescent. - * - **********************************************************************/ -static void -em_82547_move_tail(void *arg) -{ - struct adapter *adapter = arg; - struct e1000_tx_desc *tx_desc; - u16 hw_tdt, sw_tdt, length = 0; - bool eop = 0; - - EM_TX_LOCK_ASSERT(adapter); - - hw_tdt = E1000_READ_REG(&adapter->hw, E1000_TDT(0)); - sw_tdt = adapter->next_avail_tx_desc; - - while (hw_tdt != sw_tdt) { - tx_desc = &adapter->tx_desc_base[hw_tdt]; - length += tx_desc->lower.flags.length; - eop = tx_desc->lower.data & E1000_TXD_CMD_EOP; - if (++hw_tdt == adapter->num_tx_desc) - hw_tdt = 0; - - if (eop) { - if (em_82547_fifo_workaround(adapter, length)) { - adapter->tx_fifo_wrk_cnt++; - callout_reset(&adapter->tx_fifo_timer, 1, - em_82547_move_tail, adapter); - break; - } - E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), hw_tdt); - em_82547_update_fifo_head(adapter, length); - length = 0; - } - } -} - -static int -em_82547_fifo_workaround(struct adapter *adapter, int len) -{ - int fifo_space, fifo_pkt_len; - - fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR); - - if (adapter->link_duplex == HALF_DUPLEX) { - fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head; - - if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) { - if (em_82547_tx_fifo_reset(adapter)) - return (0); - else - return (1); - } - } - - return (0); -} - -static void -em_82547_update_fifo_head(struct adapter *adapter, int len) -{ - int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR); - - /* tx_fifo_head is always 16 byte aligned */ - adapter->tx_fifo_head += fifo_pkt_len; - if (adapter->tx_fifo_head >= adapter->tx_fifo_size) { - adapter->tx_fifo_head -= adapter->tx_fifo_size; - } -} - - -static int -em_82547_tx_fifo_reset(struct adapter *adapter) -{ - u32 tctl; - - if ((E1000_READ_REG(&adapter->hw, E1000_TDT(0)) == - E1000_READ_REG(&adapter->hw, E1000_TDH(0))) && - (E1000_READ_REG(&adapter->hw, E1000_TDFT) == - E1000_READ_REG(&adapter->hw, E1000_TDFH)) && - (E1000_READ_REG(&adapter->hw, E1000_TDFTS) == - E1000_READ_REG(&adapter->hw, E1000_TDFHS)) && - (E1000_READ_REG(&adapter->hw, E1000_TDFPC) == 0)) { - /* Disable TX unit */ - tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); - E1000_WRITE_REG(&adapter->hw, E1000_TCTL, - tctl & ~E1000_TCTL_EN); - - /* Reset FIFO pointers */ - E1000_WRITE_REG(&adapter->hw, E1000_TDFT, - adapter->tx_head_addr); - E1000_WRITE_REG(&adapter->hw, E1000_TDFH, - adapter->tx_head_addr); - E1000_WRITE_REG(&adapter->hw, E1000_TDFTS, - adapter->tx_head_addr); - E1000_WRITE_REG(&adapter->hw, E1000_TDFHS, - adapter->tx_head_addr); - - /* Re-enable TX unit */ - E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); - E1000_WRITE_FLUSH(&adapter->hw); - - adapter->tx_fifo_head = 0; - adapter->tx_fifo_reset_cnt++; - - return (TRUE); - } - else { - return (FALSE); - } -} - static void em_set_promisc(struct adapter *adapter) { @@ -2492,13 +2020,10 @@ em_local_timer(void *arg) { struct adapter *adapter = arg; struct ifnet *ifp = adapter->ifp; + struct tx_ring *txr = adapter->tx_rings; EM_CORE_LOCK_ASSERT(adapter); -#ifndef DEVICE_POLLING - taskqueue_enqueue(adapter->tq, - &adapter->rxtx_task); -#endif em_update_link_status(adapter); em_update_stats_counters(adapter); @@ -2509,26 +2034,31 @@ em_local_timer(void *arg) if (em_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING) em_print_hw_stats(adapter); - em_smartspeed(adapter); - /* - * We check the watchdog: the time since - * the last TX descriptor was cleaned. - * This implies a functional TX engine. - */ - if ((adapter->watchdog_check == TRUE) && - (ticks - adapter->watchdog_time > EM_WATCHDOG)) - goto hung; + ** Check for time since any descriptor was cleaned + */ + for (int i = 0; i < adapter->num_queues; i++, txr++) { + EM_TX_LOCK(txr); + if (txr->watchdog_check == FALSE) { + EM_TX_UNLOCK(txr); + continue; + } + if ((ticks - txr->watchdog_time) > EM_WATCHDOG) + goto hung; + EM_TX_UNLOCK(txr); + } callout_reset(&adapter->timer, hz, em_local_timer, adapter); return; hung: device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); - adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->watchdog_events++; + EM_TX_UNLOCK(txr); em_init_locked(adapter); } + static void em_update_link_status(struct adapter *adapter) { @@ -2592,7 +2122,8 @@ em_update_link_status(struct adapter *adapter) device_printf(dev, "Link is Down\n"); adapter->link_active = 0; /* Link down, disable watchdog */ - adapter->watchdog_check = FALSE; + // JFV change later + //adapter->watchdog_check = FALSE; if_link_state_change(ifp, LINK_STATE_DOWN); } } @@ -2611,22 +2142,27 @@ em_stop(void *arg) { struct adapter *adapter = arg; struct ifnet *ifp = adapter->ifp; + struct tx_ring *txr = adapter->tx_rings; EM_CORE_LOCK_ASSERT(adapter); - EM_TX_LOCK_ASSERT(adapter); INIT_DEBUGOUT("em_stop: begin"); em_disable_intr(adapter); callout_stop(&adapter->timer); - callout_stop(&adapter->tx_fifo_timer); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); + /* Unarm watchdog timer. */ + for (int i = 0; i < adapter->num_queues; i++, txr++) { + EM_TX_LOCK(txr); + txr->watchdog_check = FALSE; + EM_TX_UNLOCK(txr); + } + e1000_reset_hw(&adapter->hw); - if (adapter->hw.mac.type >= e1000_82544) - E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); + E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); } @@ -2672,7 +2208,7 @@ static int em_allocate_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; - int val, rid, error = E1000_SUCCESS; + int rid; rid = PCIR_BAR(0); adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, @@ -2687,58 +2223,17 @@ em_allocate_pci_resources(struct adapter *adapter) rman_get_bushandle(adapter->memory); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; - /* Only older adapters use IO mapping */ - if ((adapter->hw.mac.type > e1000_82543) && - (adapter->hw.mac.type < e1000_82571)) { - /* Figure our where our IO BAR is ? */ - for (rid = PCIR_BAR(0); rid < PCIR_CIS;) { - val = pci_read_config(dev, rid, 4); - if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) { - adapter->io_rid = rid; - break; - } - rid += 4; - /* check for 64bit BAR */ - if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT) - rid += 4; - } - if (rid >= PCIR_CIS) { - device_printf(dev, "Unable to locate IO BAR\n"); - return (ENXIO); - } - adapter->ioport = bus_alloc_resource_any(dev, - SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE); - if (adapter->ioport == NULL) { - device_printf(dev, "Unable to allocate bus resource: " - "ioport\n"); - return (ENXIO); - } - adapter->hw.io_base = 0; - adapter->osdep.io_bus_space_tag = - rman_get_bustag(adapter->ioport); - adapter->osdep.io_bus_space_handle = - rman_get_bushandle(adapter->ioport); - } - - /* - ** Init the resource arrays - ** used by MSIX setup - */ - for (int i = 0; i < 3; i++) { - adapter->rid[i] = i + 1; /* MSI/X RID starts at 1 */ - adapter->tag[i] = NULL; - adapter->res[i] = NULL; - } + /* Default to a single queue */ + adapter->num_queues = 1; /* * Setup MSI/X or MSI if PCI Express */ - if (em_enable_msi) - adapter->msi = em_setup_msix(adapter); + adapter->msix = em_setup_msix(adapter); adapter->hw.back = &adapter->osdep; - return (error); + return (0); } /********************************************************************* @@ -2750,63 +2245,40 @@ int em_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; - int error; + int error, rid = 0; /* Manually turn off all interrupts */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); - /* Legacy RID is 0 */ - if (adapter->msi == 0) - adapter->rid[0] = 0; - + if (adapter->msix == 1) /* using MSI */ + rid = 1; /* We allocate a single interrupt resource */ - adapter->res[0] = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &adapter->rid[0], RF_SHAREABLE | RF_ACTIVE); - if (adapter->res[0] == NULL) { + adapter->res = bus_alloc_resource_any(dev, + SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); + if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " "interrupt\n"); return (ENXIO); } -#ifdef EM_LEGACY_IRQ - /* We do Legacy setup */ - if ((error = bus_setup_intr(dev, adapter->res[0], -#if __FreeBSD_version > 700000 - INTR_TYPE_NET | INTR_MPSAFE, NULL, em_intr, adapter, -#else /* 6.X */ - INTR_TYPE_NET | INTR_MPSAFE, em_intr, adapter, -#endif - &adapter->tag[0])) != 0) { - device_printf(dev, "Failed to register interrupt handler"); - return (error); - } - -#else /* FAST_IRQ */ /* - * Try allocating a fast interrupt and the associated deferred - * processing contexts. + * Allocate a fast interrupt and the associated + * deferred processing contexts. */ - TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter); + TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter); TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter); adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq", device_get_nameunit(adapter->dev)); -#if __FreeBSD_version < 700000 - if ((error = bus_setup_intr(dev, adapter->res[0], - INTR_TYPE_NET | INTR_FAST, em_irq_fast, adapter, -#else - if ((error = bus_setup_intr(dev, adapter->res[0], - INTR_TYPE_NET, em_irq_fast, NULL, adapter, -#endif - &adapter->tag[0])) != 0) { + if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET, + em_irq_fast, NULL, adapter, &adapter->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); taskqueue_free(adapter->tq); adapter->tq = NULL; return (error); } -#endif /* EM_LEGACY_IRQ */ return (0); } @@ -2821,80 +2293,109 @@ em_allocate_legacy(struct adapter *adapter) int em_allocate_msix(struct adapter *adapter) { - device_t dev = adapter->dev; - int error; + device_t dev = adapter->dev; + struct tx_ring *txr = adapter->tx_rings; + struct rx_ring *rxr = adapter->rx_rings; + int error, rid, vector = 0; + /* Make sure all interrupts are disabled */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); - /* First get the resources */ - for (int i = 0; i < adapter->msi; i++) { - adapter->res[i] = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &adapter->rid[i], RF_ACTIVE); - if (adapter->res[i] == NULL) { + /* First set up ring resources */ + for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { + + /* RX ring */ + rid = vector + 1; + + rxr->res = bus_alloc_resource_any(dev, + SYS_RES_IRQ, &rid, RF_ACTIVE); + if (rxr->res == NULL) { device_printf(dev, "Unable to allocate bus resource: " - "MSIX Interrupt\n"); + "RX MSIX Interrupt %d\n", i); return (ENXIO); } + if ((error = bus_setup_intr(dev, rxr->res, + INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx, + rxr, &rxr->tag)) != 0) { + device_printf(dev, "Failed to register RX handler"); + return (error); + } + rxr->msix = vector++; /* NOTE increment vector for TX */ + TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr); + rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT, + taskqueue_thread_enqueue, &rxr->tq); + taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq", + device_get_nameunit(adapter->dev)); + /* + ** Set the bit to enable interrupt + ** in E1000_IMS -- bits 20 and 21 + ** are for RX0 and RX1, note this has + ** NOTHING to do with the MSIX vector + */ + rxr->ims = 1 << (20 + i); + adapter->ivars |= (8 | rxr->msix) << (i * 4); + + /* TX ring */ + rid = vector + 1; + txr->res = bus_alloc_resource_any(dev, + SYS_RES_IRQ, &rid, RF_ACTIVE); + if (txr->res == NULL) { + device_printf(dev, + "Unable to allocate bus resource: " + "TX MSIX Interrupt %d\n", i); + return (ENXIO); + } + if ((error = bus_setup_intr(dev, txr->res, + INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx, + txr, &txr->tag)) != 0) { + device_printf(dev, "Failed to register TX handler"); + return (error); + } + txr->msix = vector++; /* Increment vector for next pass */ + TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); + txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, + taskqueue_thread_enqueue, &txr->tq); + taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq", + device_get_nameunit(adapter->dev)); + /* + ** Set the bit to enable interrupt + ** in E1000_IMS -- bits 22 and 23 + ** are for TX0 and TX1, note this has + ** NOTHING to do with the MSIX vector + */ + txr->ims = 1 << (22 + i); + adapter->ivars |= (8 | txr->msix) << (8 + (i * 4)); } - /* - * Now allocate deferred processing contexts. - */ - TASK_INIT(&adapter->rx_task, 0, em_handle_rx, adapter); - TASK_INIT(&adapter->tx_task, 0, em_handle_tx, adapter); - /* - * Handle compatibility for msi case for deferral due to - * trylock failure - */ - TASK_INIT(&adapter->rxtx_task, 0, em_handle_tx, adapter); + /* Link interrupt */ + ++rid; + adapter->res = bus_alloc_resource_any(dev, + SYS_RES_IRQ, &rid, RF_ACTIVE); + if (!adapter->res) { + device_printf(dev,"Unable to allocate " + "bus resource: Link interrupt [%d]\n", rid); + return (ENXIO); + } + /* Set the link handler function */ + error = bus_setup_intr(dev, adapter->res, + INTR_TYPE_NET | INTR_MPSAFE, NULL, + em_msix_link, adapter, &adapter->tag); + if (error) { + adapter->res = NULL; + device_printf(dev, "Failed to register LINK handler"); + return (error); + } + adapter->linkvec = vector; + adapter->ivars |= (8 | vector) << 16; + adapter->ivars |= 0x80000000; TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter); - adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT, + adapter->tq = taskqueue_create_fast("em_link", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); - taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq", + taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq", device_get_nameunit(adapter->dev)); - /* - * And setup the interrupt handlers - */ - - /* First slot to RX */ - if ((error = bus_setup_intr(dev, adapter->res[0], -#if __FreeBSD_version > 700000 - INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx, adapter, -#else /* 6.X */ - INTR_TYPE_NET | INTR_MPSAFE, em_msix_rx, adapter, -#endif - &adapter->tag[0])) != 0) { - device_printf(dev, "Failed to register RX handler"); - return (error); - } - - /* Next TX */ - if ((error = bus_setup_intr(dev, adapter->res[1], -#if __FreeBSD_version > 700000 - INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx, adapter, -#else /* 6.X */ - INTR_TYPE_NET | INTR_MPSAFE, em_msix_tx, adapter, -#endif - &adapter->tag[1])) != 0) { - device_printf(dev, "Failed to register TX handler"); - return (error); - } - - /* And Link */ - if ((error = bus_setup_intr(dev, adapter->res[2], -#if __FreeBSD_version > 700000 - INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_link, adapter, -#else /* 6.X */ - INTR_TYPE_NET | INTR_MPSAFE, em_msix_link, adapter, -#endif - &adapter->tag[2])) != 0) { - device_printf(dev, "Failed to register TX handler"); - return (error); - } - return (0); } @@ -2902,36 +2403,56 @@ em_allocate_msix(struct adapter *adapter) static void em_free_pci_resources(struct adapter *adapter) { - device_t dev = adapter->dev; + device_t dev = adapter->dev; + struct tx_ring *txr; + struct rx_ring *rxr; + int rid; - /* Make sure the for loop below runs once */ - if (adapter->msi == 0) - adapter->msi = 1; /* - * First release all the interrupt resources: - * notice that since these are just kept - * in an array we can do the same logic - * whether its MSIX or just legacy. - */ - for (int i = 0; i < adapter->msi; i++) { - if (adapter->tag[i] != NULL) { - bus_teardown_intr(dev, adapter->res[i], - adapter->tag[i]); - adapter->tag[i] = NULL; + ** Release all the queue interrupt resources: + */ + for (int i = 0; i < adapter->num_queues; i++) { + txr = &adapter->tx_rings[i]; + rxr = &adapter->rx_rings[i]; + rid = txr->msix +1; + if (txr->tag != NULL) { + bus_teardown_intr(dev, txr->res, txr->tag); + txr->tag = NULL; } - if (adapter->res[i] != NULL) { + if (txr->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, - adapter->rid[i], adapter->res[i]); + rid, txr->res); + rid = rxr->msix +1; + if (rxr->tag != NULL) { + bus_teardown_intr(dev, rxr->res, rxr->tag); + rxr->tag = NULL; } + if (rxr->res != NULL) + bus_release_resource(dev, SYS_RES_IRQ, + rid, rxr->res); } - if (adapter->msi) + if (adapter->linkvec) /* we are doing MSIX */ + rid = adapter->linkvec + 1; + else + (adapter->msix != 0) ? (rid = 1):(rid = 0); + + if (adapter->tag != NULL) { + bus_teardown_intr(dev, adapter->res, adapter->tag); + adapter->tag = NULL; + } + + if (adapter->res != NULL) + bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); + + + if (adapter->msix) pci_release_msi(dev); - if (adapter->msix != NULL) + if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, - PCIR_BAR(EM_MSIX_BAR), adapter->msix); + PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); if (adapter->memory != NULL) bus_release_resource(dev, SYS_RES_MEMORY, @@ -2940,10 +2461,6 @@ em_free_pci_resources(struct adapter *adapter) if (adapter->flash != NULL) bus_release_resource(dev, SYS_RES_MEMORY, EM_FLASH, adapter->flash); - - if (adapter->ioport != NULL) - bus_release_resource(dev, SYS_RES_IOPORT, - adapter->io_rid, adapter->ioport); } /* @@ -2955,73 +2472,81 @@ em_setup_msix(struct adapter *adapter) device_t dev = adapter->dev; int val = 0; - if (adapter->hw.mac.type < e1000_82571) - return (0); /* Setup MSI/X for Hartwell */ - if (adapter->hw.mac.type == e1000_82574) { + if ((adapter->hw.mac.type == e1000_82574) && + (em_enable_msix == TRUE)) { /* Map the MSIX BAR */ int rid = PCIR_BAR(EM_MSIX_BAR); - adapter->msix = bus_alloc_resource_any(dev, + adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); - if (!adapter->msix) { + if (!adapter->msix_mem) { /* May not be enabled */ device_printf(adapter->dev, "Unable to map MSIX table \n"); goto msi; } val = pci_msix_count(dev); - /* - ** 82574 can be configured for 5 but - ** we limit use to 3. - */ - if (val > 3) val = 3; - if ((val) && pci_alloc_msix(dev, &val) == 0) { - device_printf(adapter->dev,"Using MSIX interrupts\n"); - return (val); + if (val != 5) { + bus_release_resource(dev, SYS_RES_MEMORY, + PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem); + adapter->msix_mem = NULL; + device_printf(adapter->dev, + "MSIX vectors wrong, using MSI \n"); + goto msi; } + if (em_msix_queues == 2) { + val = 5; + adapter->num_queues = 2; + } else { + val = 3; + adapter->num_queues = 1; + } + if (pci_alloc_msix(dev, &val) == 0) { + device_printf(adapter->dev, + "Using MSIX interrupts " + "with %d vectors\n", val); + } + + return (val); } msi: val = pci_msi_count(dev); if (val == 1 && pci_alloc_msi(dev, &val) == 0) { - adapter->msi = 1; + adapter->msix = 1; device_printf(adapter->dev,"Using MSI interrupt\n"); return (val); } + /* Should only happen due to manual invention */ + device_printf(adapter->dev,"Setup MSIX failure\n"); return (0); } + /********************************************************************* * * Initialize the hardware to a configuration * as specified by the adapter structure. * **********************************************************************/ -static int -em_hardware_init(struct adapter *adapter) +static void +em_reset(struct adapter *adapter) { - device_t dev = adapter->dev; - u16 rx_buffer_size; + device_t dev = adapter->dev; + struct e1000_hw *hw = &adapter->hw; + u16 rx_buffer_size; - INIT_DEBUGOUT("em_hardware_init: begin"); - - /* Issue a global reset */ - e1000_reset_hw(&adapter->hw); - - /* When hardware is reset, fifo_head is also reset */ - adapter->tx_fifo_head = 0; + INIT_DEBUGOUT("em_reset: begin"); /* Set up smart power down as default off on newer adapters. */ - if (!em_smart_pwr_down && (adapter->hw.mac.type == e1000_82571 || - adapter->hw.mac.type == e1000_82572)) { + if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 || + hw->mac.type == e1000_82572)) { u16 phy_tmp = 0; /* Speed up time to link by disabling smart power down. */ - e1000_read_phy_reg(&adapter->hw, - IGP02E1000_PHY_POWER_MGMT, &phy_tmp); + e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp); phy_tmp &= ~IGP02E1000_PM_SPD; - e1000_write_phy_reg(&adapter->hw, - IGP02E1000_PHY_POWER_MGMT, phy_tmp); + e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp); } /* @@ -3038,37 +2563,42 @@ em_hardware_init(struct adapter *adapter) * by 1500. * - The pause time is fairly large at 1000 x 512ns = 512 usec. */ - rx_buffer_size = ((E1000_READ_REG(&adapter->hw, E1000_PBA) & - 0xffff) << 10 ); + rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 ); - adapter->hw.fc.high_water = rx_buffer_size - + hw->fc.high_water = rx_buffer_size - roundup2(adapter->max_frame_size, 1024); - adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500; + hw->fc.low_water = hw->fc.high_water - 1500; - if (adapter->hw.mac.type == e1000_80003es2lan) - adapter->hw.fc.pause_time = 0xFFFF; + if (hw->mac.type == e1000_80003es2lan) + hw->fc.pause_time = 0xFFFF; else - adapter->hw.fc.pause_time = EM_FC_PAUSE_TIME; - adapter->hw.fc.send_xon = TRUE; + hw->fc.pause_time = EM_FC_PAUSE_TIME; + + hw->fc.send_xon = TRUE; /* Set Flow control, use the tunable location if sane */ if ((em_fc_setting >= 0) || (em_fc_setting < 4)) - adapter->hw.fc.requested_mode = em_fc_setting; - else - adapter->hw.fc.requested_mode = e1000_fc_none; + hw->fc.requested_mode = em_fc_setting; + else + hw->fc.requested_mode = e1000_fc_none; /* Override - workaround for PCHLAN issue */ - if (adapter->hw.mac.type == e1000_pchlan) - adapter->hw.fc.requested_mode = e1000_fc_rx_pause; + if (hw->mac.type == e1000_pchlan) + hw->fc.requested_mode = e1000_fc_rx_pause; - if (e1000_init_hw(&adapter->hw) < 0) { + /* Issue a global reset */ + e1000_reset_hw(hw); + E1000_WRITE_REG(hw, E1000_WUC, 0); + + if (e1000_init_hw(hw) < 0) { device_printf(dev, "Hardware Initialization Failed\n"); - return (EIO); + return; } - e1000_check_for_link(&adapter->hw); - - return (0); + E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); + e1000_get_phy_info(hw); + e1000_check_for_link(hw); + return; } /********************************************************************* @@ -3105,57 +2635,30 @@ em_setup_interface(device_t dev, struct adapter *adapter) /* Multiqueue tx functions */ ifp->if_transmit = em_mq_start; ifp->if_qflush = em_qflush; - adapter->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &adapter->tx_mtx); #endif - if (adapter->hw.mac.type >= e1000_82543) { - int version_cap; -#if __FreeBSD_version < 700000 - version_cap = IFCAP_HWCSUM; -#else - version_cap = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; -#endif - ifp->if_capabilities |= version_cap; - ifp->if_capenable |= version_cap; - } -#if __FreeBSD_version >= 700000 - /* Identify TSO capable adapters */ - if ((adapter->hw.mac.type > e1000_82544) && - (adapter->hw.mac.type != e1000_82547)) - ifp->if_capabilities |= IFCAP_TSO4; + ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; + ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; + + /* Enable TSO by default, can disable with ifconfig */ + ifp->if_capabilities |= IFCAP_TSO4; + ifp->if_capenable |= IFCAP_TSO4; + /* - * By default only enable on PCI-E, this - * can be overriden by ifconfig. - */ - if (adapter->hw.mac.type >= e1000_82571) - ifp->if_capenable |= IFCAP_TSO4; -#endif - /* - * Tell the upper layer(s) we - * support full VLAN capability + * Tell the upper layer(s) we support long frames. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; - ifp->if_capenable |= (IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING); - - /* - ** Dont turn this on by default, if vlans are - ** created on another pseudo device (eg. lagg) - ** then vlan events are not passed thru, breaking - ** operation, but with HW FILTER off it works. If - ** using vlans directly on the em driver you can - ** enable this and get full hardware tag filtering. - */ - ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; + ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; #ifdef DEVICE_POLLING ifp->if_capabilities |= IFCAP_POLLING; #endif - /* Limit WOL to MAGIC, not clear others are used */ + /* Enable All WOL methods by default */ if (adapter->wol) { - ifp->if_capabilities |= IFCAP_WOL_MAGIC; - ifp->if_capenable |= IFCAP_WOL_MAGIC; + ifp->if_capabilities |= IFCAP_WOL; + ifp->if_capenable |= IFCAP_WOL; } /* @@ -3168,8 +2671,6 @@ em_setup_interface(device_t dev, struct adapter *adapter) (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { u_char fiber_type = IFM_1000_SX; /* default type */ - if (adapter->hw.mac.type == e1000_82545) - fiber_type = IFM_1000_LX; ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL); @@ -3193,67 +2694,6 @@ em_setup_interface(device_t dev, struct adapter *adapter) } -/********************************************************************* - * - * Workaround for SmartSpeed on 82541 and 82547 controllers - * - **********************************************************************/ -static void -em_smartspeed(struct adapter *adapter) -{ - u16 phy_tmp; - - if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) || - adapter->hw.mac.autoneg == 0 || - (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0) - return; - - if (adapter->smartspeed == 0) { - /* If Master/Slave config fault is asserted twice, - * we assume back-to-back */ - e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); - if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT)) - return; - e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); - if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) { - e1000_read_phy_reg(&adapter->hw, - PHY_1000T_CTRL, &phy_tmp); - if(phy_tmp & CR_1000T_MS_ENABLE) { - phy_tmp &= ~CR_1000T_MS_ENABLE; - e1000_write_phy_reg(&adapter->hw, - PHY_1000T_CTRL, phy_tmp); - adapter->smartspeed++; - if(adapter->hw.mac.autoneg && - !e1000_copper_link_autoneg(&adapter->hw) && - !e1000_read_phy_reg(&adapter->hw, - PHY_CONTROL, &phy_tmp)) { - phy_tmp |= (MII_CR_AUTO_NEG_EN | - MII_CR_RESTART_AUTO_NEG); - e1000_write_phy_reg(&adapter->hw, - PHY_CONTROL, phy_tmp); - } - } - } - return; - } else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) { - /* If still no link, perhaps using 2/3 pair cable */ - e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); - phy_tmp |= CR_1000T_MS_ENABLE; - e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); - if(adapter->hw.mac.autoneg && - !e1000_copper_link_autoneg(&adapter->hw) && - !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { - phy_tmp |= (MII_CR_AUTO_NEG_EN | - MII_CR_RESTART_AUTO_NEG); - e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); - } - } - /* Restart process after EM_SMARTSPEED_MAX iterations */ - if(adapter->smartspeed++ == EM_SMARTSPEED_MAX) - adapter->smartspeed = 0; -} - - /* * Manage DMA'able memory. */ @@ -3271,11 +2711,7 @@ em_dma_malloc(struct adapter *adapter, bus_size_t size, { int error; -#if __FreeBSD_version >= 700000 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ -#else - error = bus_dma_tag_create(NULL, /* parent */ -#endif EM_DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ @@ -3346,97 +2782,243 @@ em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma) /********************************************************************* * - * Allocate memory for tx_buffer structures. The tx_buffer stores all - * the information needed to transmit a packet on the wire. + * Allocate memory for the transmit and receive rings, and then + * the descriptors associated with each, called only once at attach. * **********************************************************************/ static int -em_allocate_transmit_structures(struct adapter *adapter) +em_allocate_queues(struct adapter *adapter) { - device_t dev = adapter->dev; - struct em_buffer *tx_buffer; - int error; + device_t dev = adapter->dev; + struct tx_ring *txr = NULL; + struct rx_ring *rxr = NULL; + int rsize, tsize, error = E1000_SUCCESS; + int txconf = 0, rxconf = 0; - /* - * Create DMA tags for tx descriptors - */ -#if __FreeBSD_version >= 700000 - if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ -#else - if ((error = bus_dma_tag_create(NULL, /* parent */ -#endif - 1, 0, /* alignment, bounds */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - EM_TSO_SIZE, /* maxsize */ - EM_MAX_SCATTER, /* nsegments */ - EM_TSO_SEG_SIZE, /* maxsegsize */ - 0, /* flags */ - NULL, /* lockfunc */ - NULL, /* lockarg */ - &adapter->txtag)) != 0) { - device_printf(dev, "Unable to allocate TX DMA tag\n"); + + /* Allocate the TX ring struct memory */ + if (!(adapter->tx_rings = + (struct tx_ring *) malloc(sizeof(struct tx_ring) * + adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(dev, "Unable to allocate TX ring memory\n"); + error = ENOMEM; goto fail; } - adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) * - adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); - if (adapter->tx_buffer_area == NULL) { + /* Now allocate the RX */ + if (!(adapter->rx_rings = + (struct rx_ring *) malloc(sizeof(struct rx_ring) * + adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(dev, "Unable to allocate RX ring memory\n"); + error = ENOMEM; + goto rx_fail; + } + + tsize = roundup2(adapter->num_tx_desc * + sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); + /* + * Now set up the TX queues, txconf is needed to handle the + * possibility that things fail midcourse and we need to + * undo memory gracefully + */ + for (int i = 0; i < adapter->num_queues; i++, txconf++) { + /* Set up some basics */ + txr = &adapter->tx_rings[i]; + txr->adapter = adapter; + txr->me = i; + + /* Initialize the TX lock */ + snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", + device_get_nameunit(dev), txr->me); + mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); + + if (em_dma_malloc(adapter, tsize, + &txr->txdma, BUS_DMA_NOWAIT)) { + device_printf(dev, + "Unable to allocate TX Descriptor memory\n"); + error = ENOMEM; + goto err_tx_desc; + } + txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr; + bzero((void *)txr->tx_base, tsize); + + if (em_allocate_transmit_buffers(txr)) { + device_printf(dev, + "Critical Failure setting up transmit buffers\n"); + error = ENOMEM; + goto err_tx_desc; + } +#if __FreeBSD_version >= 800000 + /* Allocate a buf ring */ + txr->br = buf_ring_alloc(4096, M_DEVBUF, + M_WAITOK, &txr->tx_mtx); +#endif + } + + /* + * Next the RX queues... + */ + rsize = roundup2(adapter->num_rx_desc * + sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); + for (int i = 0; i < adapter->num_queues; i++, rxconf++) { + rxr = &adapter->rx_rings[i]; + rxr->adapter = adapter; + rxr->me = i; + + /* Initialize the RX lock */ + snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", + device_get_nameunit(dev), txr->me); + mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); + + if (em_dma_malloc(adapter, rsize, + &rxr->rxdma, BUS_DMA_NOWAIT)) { + device_printf(dev, + "Unable to allocate RxDescriptor memory\n"); + error = ENOMEM; + goto err_rx_desc; + } + rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr; + bzero((void *)rxr->rx_base, rsize); + + /* Allocate receive buffers for the ring*/ + if (em_allocate_receive_buffers(rxr)) { + device_printf(dev, + "Critical Failure setting up receive buffers\n"); + error = ENOMEM; + goto err_rx_desc; + } + } + + return (0); + +err_rx_desc: + for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) + em_dma_free(adapter, &rxr->rxdma); +err_tx_desc: + for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) + em_dma_free(adapter, &txr->txdma); + free(adapter->rx_rings, M_DEVBUF); +rx_fail: + buf_ring_free(txr->br, M_DEVBUF); + free(adapter->tx_rings, M_DEVBUF); +fail: + return (error); +} + + +/********************************************************************* + * + * Allocate memory for tx_buffer structures. The tx_buffer stores all + * the information needed to transmit a packet on the wire. This is + * called only once at attach, setup is done every reset. + * + **********************************************************************/ +static int +em_allocate_transmit_buffers(struct tx_ring *txr) +{ + struct adapter *adapter = txr->adapter; + device_t dev = adapter->dev; + struct em_buffer *txbuf; + int error, i; + + /* + * Setup DMA descriptor areas. + */ + if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), + 1, 0, /* alignment, bounds */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + EM_TSO_SIZE, /* maxsize */ + EM_MAX_SCATTER, /* nsegments */ + PAGE_SIZE, /* maxsegsize */ + 0, /* flags */ + NULL, /* lockfunc */ + NULL, /* lockfuncarg */ + &txr->txtag))) { + device_printf(dev,"Unable to allocate TX DMA tag\n"); + goto fail; + } + + if (!(txr->tx_buffers = + (struct em_buffer *) malloc(sizeof(struct em_buffer) * + adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); error = ENOMEM; goto fail; } - /* Create the descriptor buffer dma maps */ - for (int i = 0; i < adapter->num_tx_desc; i++) { - tx_buffer = &adapter->tx_buffer_area[i]; - error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map); + /* Create the descriptor buffer dma maps */ + txbuf = txr->tx_buffers; + for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { + error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); if (error != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } - tx_buffer->next_eop = -1; } - return (0); + return 0; fail: + /* We free all, it handles case where we are in the middle */ em_free_transmit_structures(adapter); return (error); } /********************************************************************* * - * (Re)Initialize transmit structures. + * Initialize a transmit ring. + * + **********************************************************************/ +static void +em_setup_transmit_ring(struct tx_ring *txr) +{ + struct adapter *adapter = txr->adapter; + struct em_buffer *txbuf; + int i; + + /* Clear the old descriptor contents */ + EM_TX_LOCK(txr); + bzero((void *)txr->tx_base, + (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc); + /* Reset indices */ + txr->next_avail_desc = 0; + txr->next_to_clean = 0; + + /* Free any existing tx buffers. */ + txbuf = txr->tx_buffers; + for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { + if (txbuf->m_head != NULL) { + bus_dmamap_sync(txr->txtag, txbuf->map, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(txr->txtag, txbuf->map); + m_freem(txbuf->m_head); + txbuf->m_head = NULL; + } + /* clear the watch index */ + txbuf->next_eop = -1; + } + + /* Set number of descriptors available */ + txr->tx_avail = adapter->num_tx_desc; + + bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + EM_TX_UNLOCK(txr); +} + +/********************************************************************* + * + * Initialize all transmit rings. * **********************************************************************/ static void em_setup_transmit_structures(struct adapter *adapter) { - struct em_buffer *tx_buffer; + struct tx_ring *txr = adapter->tx_rings; - /* Clear the old ring contents */ - bzero(adapter->tx_desc_base, - (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc); - - /* Free any existing TX buffers */ - for (int i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { - tx_buffer = &adapter->tx_buffer_area[i]; - bus_dmamap_sync(adapter->txtag, tx_buffer->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(adapter->txtag, tx_buffer->map); - m_freem(tx_buffer->m_head); - tx_buffer->m_head = NULL; - tx_buffer->next_eop = -1; - } - - /* Reset state */ - adapter->next_avail_tx_desc = 0; - adapter->next_tx_to_clean = 0; - adapter->num_tx_desc_avail = adapter->num_tx_desc; - - bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + for (int i = 0; i < adapter->num_queues; i++, txr++) + em_setup_transmit_ring(txr); return; } @@ -3449,25 +3031,31 @@ em_setup_transmit_structures(struct adapter *adapter) static void em_initialize_transmit_unit(struct adapter *adapter) { + struct tx_ring *txr = adapter->tx_rings; + struct e1000_hw *hw = &adapter->hw; u32 tctl, tarc, tipg = 0; - u64 bus_addr; INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); - /* Setup the Base and Length of the Tx Descriptor Ring */ - bus_addr = adapter->txdma.dma_paddr; - E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(0), - adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); - E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(0), - (u32)(bus_addr >> 32)); - E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(0), - (u32)bus_addr); - /* Setup the HW Tx Head and Tail descriptor pointers */ - E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), 0); - E1000_WRITE_REG(&adapter->hw, E1000_TDH(0), 0); - HW_DEBUGOUT2("Base = %x, Length = %x\n", - E1000_READ_REG(&adapter->hw, E1000_TDBAL(0)), - E1000_READ_REG(&adapter->hw, E1000_TDLEN(0))); + for (int i = 0; i < adapter->num_queues; i++, txr++) { + u64 bus_addr = txr->txdma.dma_paddr; + /* Base and Len of TX Ring */ + E1000_WRITE_REG(hw, E1000_TDLEN(i), + adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); + E1000_WRITE_REG(hw, E1000_TDBAH(i), + (u32)(bus_addr >> 32)); + E1000_WRITE_REG(hw, E1000_TDBAL(i), + (u32)bus_addr); + /* Init the HEAD/TAIL indices */ + E1000_WRITE_REG(hw, E1000_TDT(i), 0); + E1000_WRITE_REG(hw, E1000_TDH(i), 0); + + HW_DEBUGOUT2("Base = %x, Length = %x\n", + E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)), + E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); + + txr->watchdog_check = FALSE; + } /* Set the default values for the Tx Inter Packet Gap timer */ switch (adapter->hw.mac.type) { @@ -3494,6 +3082,7 @@ em_initialize_transmit_unit(struct adapter *adapter) E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); + if(adapter->hw.mac.type >= e1000_82540) E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value); @@ -3512,6 +3101,10 @@ em_initialize_transmit_unit(struct adapter *adapter) E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } + adapter->txd_cmd = E1000_TXD_CMD_IFCS; + if (adapter->tx_int_delay.value > 0) + adapter->txd_cmd |= E1000_TXD_CMD_IDE; + /* Program the Transmit Control Register */ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; @@ -3524,59 +3117,84 @@ em_initialize_transmit_unit(struct adapter *adapter) /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); - /* Setup Transmit Descriptor Base Settings */ - adapter->txd_cmd = E1000_TXD_CMD_IFCS; - - if (adapter->tx_int_delay.value > 0) - adapter->txd_cmd |= E1000_TXD_CMD_IDE; } + /********************************************************************* * - * Free all transmit related data structures. + * Free all transmit rings. * **********************************************************************/ static void em_free_transmit_structures(struct adapter *adapter) { - struct em_buffer *tx_buffer; + struct tx_ring *txr = adapter->tx_rings; - INIT_DEBUGOUT("free_transmit_structures: begin"); + for (int i = 0; i < adapter->num_queues; i++, txr++) { + EM_TX_LOCK(txr); + em_free_transmit_buffers(txr); + em_dma_free(adapter, &txr->txdma); + EM_TX_UNLOCK(txr); + EM_TX_LOCK_DESTROY(txr); + } - if (adapter->tx_buffer_area != NULL) { - for (int i = 0; i < adapter->num_tx_desc; i++) { - tx_buffer = &adapter->tx_buffer_area[i]; - if (tx_buffer->m_head != NULL) { - bus_dmamap_sync(adapter->txtag, tx_buffer->map, - BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(adapter->txtag, - tx_buffer->map); - m_freem(tx_buffer->m_head); - tx_buffer->m_head = NULL; - } else if (tx_buffer->map != NULL) - bus_dmamap_unload(adapter->txtag, - tx_buffer->map); - if (tx_buffer->map != NULL) { - bus_dmamap_destroy(adapter->txtag, - tx_buffer->map); - tx_buffer->map = NULL; + free(adapter->tx_rings, M_DEVBUF); +} + +/********************************************************************* + * + * Free transmit ring related data structures. + * + **********************************************************************/ +static void +em_free_transmit_buffers(struct tx_ring *txr) +{ + struct adapter *adapter = txr->adapter; + struct em_buffer *txbuf; + + INIT_DEBUGOUT("free_transmit_ring: begin"); + + if (txr->tx_buffers == NULL) + return; + + for (int i = 0; i < adapter->num_tx_desc; i++) { + txbuf = &txr->tx_buffers[i]; + if (txbuf->m_head != NULL) { + bus_dmamap_sync(txr->txtag, txbuf->map, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(txr->txtag, + txbuf->map); + m_freem(txbuf->m_head); + txbuf->m_head = NULL; + if (txbuf->map != NULL) { + bus_dmamap_destroy(txr->txtag, + txbuf->map); + txbuf->map = NULL; } + } else if (txbuf->map != NULL) { + bus_dmamap_unload(txr->txtag, + txbuf->map); + bus_dmamap_destroy(txr->txtag, + txbuf->map); + txbuf->map = NULL; } } - if (adapter->tx_buffer_area != NULL) { - free(adapter->tx_buffer_area, M_DEVBUF); - adapter->tx_buffer_area = NULL; - } - if (adapter->txtag != NULL) { - bus_dma_tag_destroy(adapter->txtag); - adapter->txtag = NULL; - } #if __FreeBSD_version >= 800000 - if (adapter->br != NULL) - buf_ring_free(adapter->br, M_DEVBUF); + if (txr->br != NULL) + buf_ring_free(txr->br, M_DEVBUF); #endif + if (txr->tx_buffers != NULL) { + free(txr->tx_buffers, M_DEVBUF); + txr->tx_buffers = NULL; + } + if (txr->txtag != NULL) { + bus_dma_tag_destroy(txr->txtag); + txr->txtag = NULL; + } + return; } + /********************************************************************* * * The offload context needs to be set when we transfer the first @@ -3588,22 +3206,23 @@ em_free_transmit_structures(struct adapter *adapter) * big performance win. -jfv **********************************************************************/ static void -em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp, +em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper, u32 *txd_lower) { - struct e1000_context_desc *TXD = NULL; + struct adapter *adapter = txr->adapter; + struct e1000_context_desc *TXD = NULL; struct em_buffer *tx_buffer; struct ether_vlan_header *eh; struct ip *ip = NULL; struct ip6_hdr *ip6; - int curr_txd, ehdrlen; + int cur, ehdrlen; u32 cmd, hdr_len, ip_hlen; u16 etype; u8 ipproto; cmd = hdr_len = ipproto = 0; - curr_txd = adapter->next_avail_tx_desc; + cur = txr->next_avail_desc; /* * Determine where frame payload starts. @@ -3636,7 +3255,7 @@ em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp, * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *) - &adapter->tx_desc_base[curr_txd]; + &txr->tx_base[cur]; TXD->lower_setup.ip_fields.ipcss = ehdrlen; TXD->lower_setup.ip_fields.ipcse = htole16(ehdrlen + ip_hlen); @@ -3678,16 +3297,16 @@ em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp, *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; /* no need for context if already set */ - if (adapter->last_hw_offload == CSUM_TCP) + if (txr->last_hw_offload == CSUM_TCP) return; - adapter->last_hw_offload = CSUM_TCP; + txr->last_hw_offload = CSUM_TCP; /* * Start offset for payload checksum calculation. * End offset for payload checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *) - &adapter->tx_desc_base[curr_txd]; + &txr->tx_base[cur]; TXD->upper_setup.tcp_fields.tucss = hdr_len; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = @@ -3701,16 +3320,16 @@ em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp, *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; *txd_upper |= E1000_TXD_POPTS_TXSM << 8; /* no need for context if already set */ - if (adapter->last_hw_offload == CSUM_UDP) + if (txr->last_hw_offload == CSUM_UDP) return; - adapter->last_hw_offload = CSUM_UDP; + txr->last_hw_offload = CSUM_UDP; /* * Start offset for header checksum calculation. * End offset for header checksum calculation. * Offset of place to put the checksum. */ TXD = (struct e1000_context_desc *) - &adapter->tx_desc_base[curr_txd]; + &txr->tx_base[cur]; TXD->upper_setup.tcp_fields.tucss = hdr_len; TXD->upper_setup.tcp_fields.tucse = htole16(0); TXD->upper_setup.tcp_fields.tucso = @@ -3725,35 +3344,35 @@ em_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp, TXD->tcp_seg_setup.data = htole32(0); TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd); - tx_buffer = &adapter->tx_buffer_area[curr_txd]; + tx_buffer = &txr->tx_buffers[cur]; tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; - if (++curr_txd == adapter->num_tx_desc) - curr_txd = 0; + if (++cur == adapter->num_tx_desc) + cur = 0; - adapter->num_tx_desc_avail--; - adapter->next_avail_tx_desc = curr_txd; + txr->tx_avail--; + txr->next_avail_desc = cur; } -#if __FreeBSD_version >= 700000 /********************************************************************** * * Setup work for hardware segmentation offload (TSO) * **********************************************************************/ static bool -em_tso_setup(struct adapter *adapter, struct mbuf *mp, u32 *txd_upper, +em_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *txd_upper, u32 *txd_lower) { - struct e1000_context_desc *TXD; - struct em_buffer *tx_buffer; - struct ether_vlan_header *eh; - struct ip *ip; - struct ip6_hdr *ip6; - struct tcphdr *th; - int curr_txd, ehdrlen, hdr_len, ip_hlen, isip6; + struct adapter *adapter = txr->adapter; + struct e1000_context_desc *TXD; + struct em_buffer *tx_buffer; + struct ether_vlan_header *eh; + struct ip *ip; + struct ip6_hdr *ip6; + struct tcphdr *th; + int cur, ehdrlen, hdr_len, ip_hlen, isip6; u16 etype; /* @@ -3833,9 +3452,9 @@ em_tso_setup(struct adapter *adapter, struct mbuf *mp, u32 *txd_upper, *txd_upper = ((isip6 ? 0 : E1000_TXD_POPTS_IXSM) | E1000_TXD_POPTS_TXSM) << 8; - curr_txd = adapter->next_avail_tx_desc; - tx_buffer = &adapter->tx_buffer_area[curr_txd]; - TXD = (struct e1000_context_desc *) &adapter->tx_desc_base[curr_txd]; + cur = txr->next_avail_desc; + tx_buffer = &txr->tx_buffers[cur]; + TXD = (struct e1000_context_desc *) &txr->tx_base[cur]; /* IPv6 doesn't have a header checksum. */ if (!isip6) { @@ -3870,24 +3489,23 @@ em_tso_setup(struct adapter *adapter, struct mbuf *mp, u32 *txd_upper, TXD->cmd_and_length = htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | /* Extended descr */ E1000_TXD_CMD_TSE | /* TSE context */ - (isip6 ? 0 : E1000_TXD_CMD_IP) | /* Do IP csum */ + (isip6 ? 0 : E1000_TXD_CMD_IP) | E1000_TXD_CMD_TCP | /* Do TCP checksum */ (mp->m_pkthdr.len - (hdr_len))); /* Total len */ tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; - if (++curr_txd == adapter->num_tx_desc) - curr_txd = 0; + if (++cur == adapter->num_tx_desc) + cur = 0; - adapter->num_tx_desc_avail--; - adapter->next_avail_tx_desc = curr_txd; - adapter->tx_tso = TRUE; + txr->tx_avail--; + txr->next_avail_desc = cur; + txr->tx_tso = TRUE; return TRUE; } -#endif /* __FreeBSD_version >= 700000 */ /********************************************************************** * @@ -3896,25 +3514,26 @@ em_tso_setup(struct adapter *adapter, struct mbuf *mp, u32 *txd_upper, * tx_buffer is put back on the free queue. * **********************************************************************/ -static void -em_txeof(struct adapter *adapter) +static bool +em_txeof(struct tx_ring *txr) { + struct adapter *adapter = txr->adapter; int first, last, done, num_avail; struct em_buffer *tx_buffer; struct e1000_tx_desc *tx_desc, *eop_desc; struct ifnet *ifp = adapter->ifp; - EM_TX_LOCK_ASSERT(adapter); + EM_TX_LOCK_ASSERT(txr); - if (adapter->num_tx_desc_avail == adapter->num_tx_desc) - return; + if (txr->tx_avail == adapter->num_tx_desc) + return (FALSE); - num_avail = adapter->num_tx_desc_avail; - first = adapter->next_tx_to_clean; - tx_desc = &adapter->tx_desc_base[first]; - tx_buffer = &adapter->tx_buffer_area[first]; + num_avail = txr->tx_avail; + first = txr->next_to_clean; + tx_desc = &txr->tx_base[first]; + tx_buffer = &txr->tx_buffers[first]; last = tx_buffer->next_eop; - eop_desc = &adapter->tx_desc_base[last]; + eop_desc = &txr->tx_base[last]; /* * What this does is get the index of the @@ -3926,7 +3545,7 @@ em_txeof(struct adapter *adapter) last = 0; done = last; - bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, + bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { @@ -3939,38 +3558,38 @@ em_txeof(struct adapter *adapter) if (tx_buffer->m_head) { ifp->if_opackets++; - bus_dmamap_sync(adapter->txtag, + bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(adapter->txtag, + bus_dmamap_unload(txr->txtag, tx_buffer->map); m_freem(tx_buffer->m_head); tx_buffer->m_head = NULL; } tx_buffer->next_eop = -1; - adapter->watchdog_time = ticks; + txr->watchdog_time = ticks; if (++first == adapter->num_tx_desc) first = 0; - tx_buffer = &adapter->tx_buffer_area[first]; - tx_desc = &adapter->tx_desc_base[first]; + tx_buffer = &txr->tx_buffers[first]; + tx_desc = &txr->tx_base[first]; } /* See if we can continue to the next packet */ last = tx_buffer->next_eop; if (last != -1) { - eop_desc = &adapter->tx_desc_base[last]; + eop_desc = &txr->tx_base[last]; /* Get new done point */ if (++last == adapter->num_tx_desc) last = 0; done = last; } else break; } - bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, + bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - adapter->next_tx_to_clean = first; + txr->next_to_clean = first; /* * If we have enough room, clear IFF_DRV_OACTIVE to @@ -3980,88 +3599,85 @@ em_txeof(struct adapter *adapter) if (num_avail > EM_TX_CLEANUP_THRESHOLD) { ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if (num_avail == adapter->num_tx_desc) { - adapter->watchdog_check = FALSE; - adapter->num_tx_desc_avail = num_avail; - return; + txr->watchdog_check = FALSE; + txr->tx_avail = num_avail; + return (FALSE); } } - adapter->num_tx_desc_avail = num_avail; - return; + txr->tx_avail = num_avail; + return (TRUE); } + /********************************************************************* * - * When Link is lost sometimes there is work still in the TX ring - * which may result in a watchdog, rather than allow that we do an - * attempted cleanup and then reinit here. Note that this has been - * seens mostly with fiber adapters. + * Refresh RX descriptor mbufs from system mbuf buffer pool. * **********************************************************************/ static void -em_tx_purge(struct adapter *adapter) -{ - if ((!adapter->link_active) && (adapter->watchdog_check)) { - EM_TX_LOCK(adapter); - em_txeof(adapter); - EM_TX_UNLOCK(adapter); - if (adapter->watchdog_check) /* Still outstanding? */ - em_init_locked(adapter); - } -} - -/********************************************************************* - * - * Get a buffer from system mbuf buffer pool. - * - **********************************************************************/ -static int -em_get_buf(struct adapter *adapter, int i) +em_refresh_mbufs(struct rx_ring *rxr, int limit) { + struct adapter *adapter = rxr->adapter; struct mbuf *m; bus_dma_segment_t segs[1]; bus_dmamap_t map; - struct em_buffer *rx_buffer; - int error, nsegs; + struct em_buffer *rxbuf; + int i, error, nsegs, cleaned; - m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); - if (m == NULL) { - adapter->mbuf_cluster_failed++; - return (ENOBUFS); + i = rxr->next_to_refresh; + cleaned = -1; + while (i != limit) { + m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); + if (m == NULL) + goto update; + m->m_len = m->m_pkthdr.len = MCLBYTES; + + if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) + m_adj(m, ETHER_ALIGN); + + /* + * Using memory from the mbuf cluster pool, invoke the + * bus_dma machinery to arrange the memory mapping. + */ + error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxr->rx_sparemap, + m, segs, &nsegs, BUS_DMA_NOWAIT); + if (error != 0) { + m_free(m); + goto update; + } + + /* If nsegs is wrong then the stack is corrupt. */ + KASSERT(nsegs == 1, ("Too many segments returned!")); + + rxbuf = &rxr->rx_buffers[i]; + if (rxbuf->m_head != NULL) + bus_dmamap_unload(rxr->rxtag, rxbuf->map); + + map = rxbuf->map; + rxbuf->map = rxr->rx_sparemap; + rxr->rx_sparemap = map; + bus_dmamap_sync(rxr->rxtag, + rxbuf->map, BUS_DMASYNC_PREREAD); + rxbuf->m_head = m; + rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr); + + cleaned = i; + /* Calculate next index */ + if (++i == adapter->num_rx_desc) + i = 0; + /* This is the work marker for refresh */ + rxr->next_to_refresh = i; } - m->m_len = m->m_pkthdr.len = MCLBYTES; +update: + if (cleaned != -1) /* Update tail index */ + E1000_WRITE_REG(&adapter->hw, + E1000_RDT(rxr->me), cleaned); - if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) - m_adj(m, ETHER_ALIGN); - - /* - * Using memory from the mbuf cluster pool, invoke the - * bus_dma machinery to arrange the memory mapping. - */ - error = bus_dmamap_load_mbuf_sg(adapter->rxtag, - adapter->rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT); - if (error != 0) { - m_free(m); - return (error); - } - - /* If nsegs is wrong then the stack is corrupt. */ - KASSERT(nsegs == 1, ("Too many segments returned!")); - - rx_buffer = &adapter->rx_buffer_area[i]; - if (rx_buffer->m_head != NULL) - bus_dmamap_unload(adapter->rxtag, rx_buffer->map); - - map = rx_buffer->map; - rx_buffer->map = adapter->rx_sparemap; - adapter->rx_sparemap = map; - bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD); - rx_buffer->m_head = m; - - adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr); - return (0); + return; } + /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one @@ -4071,24 +3687,21 @@ em_get_buf(struct adapter *adapter, int i) * **********************************************************************/ static int -em_allocate_receive_structures(struct adapter *adapter) +em_allocate_receive_buffers(struct rx_ring *rxr) { - device_t dev = adapter->dev; - struct em_buffer *rx_buffer; - int i, error; + struct adapter *adapter = rxr->adapter; + device_t dev = adapter->dev; + struct em_buffer *rxbuf; + int error; - adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * + rxr->rx_buffers = malloc(sizeof(struct em_buffer) * adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); - if (adapter->rx_buffer_area == NULL) { + if (rxr->rx_buffers == NULL) { device_printf(dev, "Unable to allocate rx_buffer memory\n"); return (ENOMEM); } -#if __FreeBSD_version >= 700000 error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ -#else - error = bus_dma_tag_create(NULL, /* parent */ -#endif 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ @@ -4099,7 +3712,7 @@ em_allocate_receive_structures(struct adapter *adapter) 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ - &adapter->rxtag); + &rxr->rxtag); if (error) { device_printf(dev, "%s: bus_dma_tag_create failed %d\n", __func__, error); @@ -4107,18 +3720,19 @@ em_allocate_receive_structures(struct adapter *adapter) } /* Create the spare map (used by getbuf) */ - error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT, - &adapter->rx_sparemap); + error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT, + &rxr->rx_sparemap); if (error) { device_printf(dev, "%s: bus_dmamap_create failed: %d\n", __func__, error); goto fail; } - rx_buffer = adapter->rx_buffer_area; - for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { - error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT, - &rx_buffer->map); + rxbuf = rxr->rx_buffers; + for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) { + rxbuf = &rxr->rx_buffers[i]; + error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT, + &rxbuf->map); if (error) { device_printf(dev, "%s: bus_dmamap_create failed: %d\n", __func__, error); @@ -4133,48 +3747,182 @@ fail: return (error); } + /********************************************************************* * - * (Re)initialize receive structures. + * Initialize a receive ring and its buffers. + * + **********************************************************************/ +static int +em_setup_receive_ring(struct rx_ring *rxr) +{ + struct adapter *adapter = rxr->adapter; + struct em_buffer *rxbuf; + bus_dma_segment_t seg[1]; + int rsize, nsegs, error; + + + /* Clear the ring contents */ + EM_RX_LOCK(rxr); + rsize = roundup2(adapter->num_rx_desc * + sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); + bzero((void *)rxr->rx_base, rsize); + + /* + ** Free current RX buffer structs and their mbufs + */ + for (int i = 0; i < adapter->num_rx_desc; i++) { + rxbuf = &rxr->rx_buffers[i]; + if (rxbuf->m_head != NULL) { + bus_dmamap_sync(rxr->rxtag, rxbuf->map, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(rxr->rxtag, rxbuf->map); + m_freem(rxbuf->m_head); + } + } + + /* Now replenish the mbufs */ + for (int j = 0; j != adapter->num_rx_desc; ++j) { + + rxbuf = &rxr->rx_buffers[j]; + rxbuf->m_head = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); + if (rxbuf->m_head == NULL) + panic("RX ring hdr initialization failed!\n"); + rxbuf->m_head->m_len = MCLBYTES; + rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */ + rxbuf->m_head->m_pkthdr.len = MCLBYTES; + + /* Get the memory mapping */ + error = bus_dmamap_load_mbuf_sg(rxr->rxtag, + rxbuf->map, rxbuf->m_head, seg, + &nsegs, BUS_DMA_NOWAIT); + if (error != 0) + panic("RX ring dma initialization failed!\n"); + bus_dmamap_sync(rxr->rxtag, + rxbuf->map, BUS_DMASYNC_PREREAD); + + /* Update descriptor */ + rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr); + } + + + /* Setup our descriptor indices */ + rxr->next_to_check = 0; + rxr->next_to_refresh = 0; + + bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + + EM_RX_UNLOCK(rxr); + return (0); +} + +/********************************************************************* + * + * Initialize all receive rings. * **********************************************************************/ static int em_setup_receive_structures(struct adapter *adapter) { - struct em_buffer *rx_buffer; - int i, error; + struct rx_ring *rxr = adapter->rx_rings; + int j; - /* Reset descriptor ring */ - bzero(adapter->rx_desc_base, - (sizeof(struct e1000_rx_desc)) * adapter->num_rx_desc); - - /* Free current RX buffers. */ - rx_buffer = adapter->rx_buffer_area; - for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { - if (rx_buffer->m_head != NULL) { - bus_dmamap_sync(adapter->rxtag, rx_buffer->map, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(adapter->rxtag, rx_buffer->map); - m_freem(rx_buffer->m_head); - rx_buffer->m_head = NULL; - } - } - - /* Allocate new ones. */ - for (i = 0; i < adapter->num_rx_desc; i++) { - error = em_get_buf(adapter, i); - if (error) - return (error); - } - - /* Setup our descriptor pointers */ - adapter->next_rx_desc_to_check = 0; - bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + for (j = 0; j < adapter->num_queues; j++, rxr++) + if (em_setup_receive_ring(rxr)) + goto fail; return (0); +fail: + /* + * Free RX buffers allocated so far, we will only handle + * the rings that completed, the failing case will have + * cleaned up for itself. 'j' failed, so its the terminus. + */ + for (int i = 0; i < j; ++i) { + rxr = &adapter->rx_rings[i]; + for (int n = 0; n < adapter->num_rx_desc; n++) { + struct em_buffer *rxbuf; + rxbuf = &rxr->rx_buffers[n]; + if (rxbuf->m_head != NULL) { + bus_dmamap_sync(rxr->rxtag, rxbuf->map, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(rxr->rxtag, rxbuf->map); + m_freem(rxbuf->m_head); + rxbuf->m_head = NULL; + } + } + } + + return (ENOBUFS); } +/********************************************************************* + * + * Free all receive rings. + * + **********************************************************************/ +static void +em_free_receive_structures(struct adapter *adapter) +{ + struct rx_ring *rxr = adapter->rx_rings; + + for (int i = 0; i < adapter->num_queues; i++, rxr++) { + em_free_receive_buffers(rxr); + /* Free the ring memory as well */ + em_dma_free(adapter, &rxr->rxdma); + EM_RX_LOCK_DESTROY(rxr); + } + + free(adapter->rx_rings, M_DEVBUF); +} + + +/********************************************************************* + * + * Free receive ring data structures + * + **********************************************************************/ +static void +em_free_receive_buffers(struct rx_ring *rxr) +{ + struct adapter *adapter = rxr->adapter; + struct em_buffer *rxbuf = NULL; + + INIT_DEBUGOUT("free_receive_buffers: begin"); + + if (rxr->rx_sparemap) { + bus_dmamap_destroy(rxr->rxtag, rxr->rx_sparemap); + rxr->rx_sparemap = NULL; + } + + if (rxr->rx_buffers != NULL) { + for (int i = 0; i < adapter->num_rx_desc; i++) { + rxbuf = &rxr->rx_buffers[i]; + if (rxbuf->map != NULL) { + bus_dmamap_sync(rxr->rxtag, rxbuf->map, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(rxr->rxtag, rxbuf->map); + bus_dmamap_destroy(rxr->rxtag, rxbuf->map); + } + if (rxbuf->m_head != NULL) { + m_freem(rxbuf->m_head); + rxbuf->m_head = NULL; + } + } + free(rxr->rx_buffers, M_DEVBUF); + rxr->rx_buffers = NULL; + } + + if (rxr->rxtag != NULL) { + bus_dma_tag_destroy(rxr->rxtag); + rxr->rxtag = NULL; + } + + return; +} + + /********************************************************************* * * Enable receive unit. @@ -4186,28 +3934,28 @@ em_setup_receive_structures(struct adapter *adapter) static void em_initialize_receive_unit(struct adapter *adapter) { + struct rx_ring *rxr = adapter->rx_rings; struct ifnet *ifp = adapter->ifp; + struct e1000_hw *hw = &adapter->hw; u64 bus_addr; u32 rctl, rxcsum; - INIT_DEBUGOUT("em_initialize_receive_unit: begin"); + INIT_DEBUGOUT("em_initialize_receive_units: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ - rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); - E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); + rctl = E1000_READ_REG(hw, E1000_RCTL); + E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); - if (adapter->hw.mac.type >= e1000_82540) { - E1000_WRITE_REG(&adapter->hw, E1000_RADV, - adapter->rx_abs_int_delay.value); - /* - * Set the interrupt throttling rate. Value is calculated - * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) - */ - E1000_WRITE_REG(&adapter->hw, E1000_ITR, DEFAULT_ITR); - } + E1000_WRITE_REG(&adapter->hw, E1000_RADV, + adapter->rx_abs_int_delay.value); + /* + * Set the interrupt throttling rate. Value is calculated + * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) + */ + E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); /* ** When using MSIX interrupts we need to throttle @@ -4215,67 +3963,17 @@ em_initialize_receive_unit(struct adapter *adapter) */ if (adapter->msix) for (int i = 0; i < 4; i++) - E1000_WRITE_REG(&adapter->hw, - E1000_EITR_82574(i), DEFAULT_ITR); + E1000_WRITE_REG(hw, E1000_EITR_82574(i), + DEFAULT_ITR); /* Disable accelerated ackknowledge */ if (adapter->hw.mac.type == e1000_82574) - E1000_WRITE_REG(&adapter->hw, - E1000_RFCTL, E1000_RFCTL_ACK_DIS); + E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS); - /* Setup the Base and Length of the Rx Descriptor Ring */ - bus_addr = adapter->rxdma.dma_paddr; - E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(0), - adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); - E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(0), - (u32)(bus_addr >> 32)); - E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(0), - (u32)bus_addr); - - /* Setup the Receive Control Register */ - rctl &= ~(3 << E1000_RCTL_MO_SHIFT); - rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | - E1000_RCTL_RDMTS_HALF | - (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT); - - /* Make sure VLAN Filters are off */ - rctl &= ~E1000_RCTL_VFE; - - if (e1000_tbi_sbp_enabled_82543(&adapter->hw)) - rctl |= E1000_RCTL_SBP; - else - rctl &= ~E1000_RCTL_SBP; - - switch (adapter->rx_buffer_len) { - default: - case 2048: - rctl |= E1000_RCTL_SZ_2048; - break; - case 4096: - rctl |= E1000_RCTL_SZ_4096 | - E1000_RCTL_BSEX | E1000_RCTL_LPE; - break; - case 8192: - rctl |= E1000_RCTL_SZ_8192 | - E1000_RCTL_BSEX | E1000_RCTL_LPE; - break; - case 16384: - rctl |= E1000_RCTL_SZ_16384 | - E1000_RCTL_BSEX | E1000_RCTL_LPE; - break; - } - - if (ifp->if_mtu > ETHERMTU) - rctl |= E1000_RCTL_LPE; - else - rctl &= ~E1000_RCTL_LPE; - - /* Enable 82543 Receive Checksum Offload for TCP and UDP */ - if ((adapter->hw.mac.type >= e1000_82543) && - (ifp->if_capenable & IFCAP_RXCSUM)) { - rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM); + if (ifp->if_capenable & IFCAP_RXCSUM) { + rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL); - E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum); + E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); } /* @@ -4285,72 +3983,42 @@ em_initialize_receive_unit(struct adapter *adapter) ** values in RDTR is a known source of problems on other ** platforms another solution is being sought. */ - if (adapter->hw.mac.type == e1000_82573) - E1000_WRITE_REG(&adapter->hw, E1000_RDTR, 0x20); + if (hw->mac.type == e1000_82573) + E1000_WRITE_REG(hw, E1000_RDTR, 0x20); - /* Enable Receives */ - E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); + for (int i = 0; i < adapter->num_queues; i++, rxr++) { + /* Setup the Base and Length of the Rx Descriptor Ring */ + bus_addr = rxr->rxdma.dma_paddr; + E1000_WRITE_REG(hw, E1000_RDLEN(i), + adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); + E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); + E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); + /* Setup the Head and Tail Descriptor Pointers */ + E1000_WRITE_REG(hw, E1000_RDH(i), 0); + E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1); + } - /* - * Setup the HW Rx Head and - * Tail Descriptor Pointers - */ - E1000_WRITE_REG(&adapter->hw, E1000_RDH(0), 0); - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), adapter->num_rx_desc - 1); + /* Setup the Receive Control Register */ + rctl &= ~(3 << E1000_RCTL_MO_SHIFT); + rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | + E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | + (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); + + /* Make sure VLAN Filters are off */ + rctl &= ~E1000_RCTL_VFE; + rctl &= ~E1000_RCTL_SBP; + rctl |= E1000_RCTL_SZ_2048; + if (ifp->if_mtu > ETHERMTU) + rctl |= E1000_RCTL_LPE; + else + rctl &= ~E1000_RCTL_LPE; + + /* Write out the settings */ + E1000_WRITE_REG(hw, E1000_RCTL, rctl); return; } -/********************************************************************* - * - * Free receive related data structures. - * - **********************************************************************/ -static void -em_free_receive_structures(struct adapter *adapter) -{ - struct em_buffer *rx_buffer; - int i; - - INIT_DEBUGOUT("free_receive_structures: begin"); - - if (adapter->rx_sparemap) { - bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap); - adapter->rx_sparemap = NULL; - } - - /* Cleanup any existing buffers */ - if (adapter->rx_buffer_area != NULL) { - rx_buffer = adapter->rx_buffer_area; - for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { - if (rx_buffer->m_head != NULL) { - bus_dmamap_sync(adapter->rxtag, rx_buffer->map, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(adapter->rxtag, - rx_buffer->map); - m_freem(rx_buffer->m_head); - rx_buffer->m_head = NULL; - } else if (rx_buffer->map != NULL) - bus_dmamap_unload(adapter->rxtag, - rx_buffer->map); - if (rx_buffer->map != NULL) { - bus_dmamap_destroy(adapter->rxtag, - rx_buffer->map); - rx_buffer->map = NULL; - } - } - } - - if (adapter->rx_buffer_area != NULL) { - free(adapter->rx_buffer_area, M_DEVBUF); - adapter->rx_buffer_area = NULL; - } - - if (adapter->rxtag != NULL) { - bus_dma_tag_destroy(adapter->rxtag); - adapter->rxtag = NULL; - } -} /********************************************************************* * @@ -4364,184 +4032,127 @@ em_free_receive_structures(struct adapter *adapter) * For polling we also now return the number of cleaned packets *********************************************************************/ static int -em_rxeof(struct adapter *adapter, int count) +em_rxeof(struct rx_ring *rxr, int count) { - struct ifnet *ifp = adapter->ifp;; - struct mbuf *mp; - u8 status, accept_frame = 0, eop = 0; - u16 len, desc_len, prev_len_adj; - int i, rx_sent = 0; - struct e1000_rx_desc *current_desc; + struct adapter *adapter = rxr->adapter; + struct ifnet *ifp = adapter->ifp;; + struct mbuf *mp, *sendmp; + u8 status; + u16 len; + int i, processed, rxdone = 0; + bool eop; + struct e1000_rx_desc *cur; - EM_RX_LOCK(adapter); - i = adapter->next_rx_desc_to_check; - current_desc = &adapter->rx_desc_base[i]; - bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, - BUS_DMASYNC_POSTREAD); + EM_RX_LOCK(rxr); - if (!((current_desc->status) & E1000_RXD_STAT_DD)) { - EM_RX_UNLOCK(adapter); - return (rx_sent); - } + for (i = rxr->next_to_check, processed = 0; count != 0;) { - while ((current_desc->status & E1000_RXD_STAT_DD) && - (count != 0) && - (ifp->if_drv_flags & IFF_DRV_RUNNING)) { - struct mbuf *m = NULL; + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + break; - mp = adapter->rx_buffer_area[i].m_head; - /* - * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT - * needs to access the last received byte in the mbuf. - */ - bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map, - BUS_DMASYNC_POSTREAD); + bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - accept_frame = 1; - prev_len_adj = 0; - desc_len = le16toh(current_desc->length); - status = current_desc->status; - if (status & E1000_RXD_STAT_EOP) { - count--; - eop = 1; - if (desc_len < ETHER_CRC_LEN) { - len = 0; - prev_len_adj = ETHER_CRC_LEN - desc_len; - } else - len = desc_len - ETHER_CRC_LEN; - } else { - eop = 0; - len = desc_len; - } + cur = &rxr->rx_base[i]; + status = cur->status; + mp = sendmp = NULL; - if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) { - u8 last_byte; - u32 pkt_len = desc_len; + if ((status & E1000_RXD_STAT_DD) == 0) + break; - if (adapter->fmp != NULL) - pkt_len += adapter->fmp->m_pkthdr.len; + len = le16toh(cur->length); + eop = (status & E1000_RXD_STAT_EOP) != 0; + count--; - last_byte = *(mtod(mp, caddr_t) + desc_len - 1); - if (TBI_ACCEPT(&adapter->hw, status, - current_desc->errors, pkt_len, last_byte, - adapter->min_frame_size, adapter->max_frame_size)) { - e1000_tbi_adjust_stats_82543(&adapter->hw, - &adapter->stats, pkt_len, - adapter->hw.mac.addr, - adapter->max_frame_size); - if (len > 0) - len--; - } else - accept_frame = 0; - } - - if (accept_frame) { - if (em_get_buf(adapter, i) != 0) { - ifp->if_iqdrops++; - goto discard; - } + if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) == 0) { /* Assign correct length to the current fragment */ + mp = rxr->rx_buffers[i].m_head; mp->m_len = len; - if (adapter->fmp == NULL) { + if (rxr->fmp == NULL) { mp->m_pkthdr.len = len; - adapter->fmp = mp; /* Store the first mbuf */ - adapter->lmp = mp; + rxr->fmp = mp; /* Store the first mbuf */ + rxr->lmp = mp; } else { /* Chain mbuf's together */ mp->m_flags &= ~M_PKTHDR; - /* - * Adjust length of previous mbuf in chain if - * we received less than 4 bytes in the last - * descriptor. - */ - if (prev_len_adj > 0) { - adapter->lmp->m_len -= prev_len_adj; - adapter->fmp->m_pkthdr.len -= - prev_len_adj; - } - adapter->lmp->m_next = mp; - adapter->lmp = adapter->lmp->m_next; - adapter->fmp->m_pkthdr.len += len; + rxr->lmp->m_next = mp; + rxr->lmp = rxr->lmp->m_next; + rxr->fmp->m_pkthdr.len += len; } if (eop) { - adapter->fmp->m_pkthdr.rcvif = ifp; + rxr->fmp->m_pkthdr.rcvif = ifp; ifp->if_ipackets++; - em_receive_checksum(adapter, current_desc, - adapter->fmp); + em_receive_checksum(cur, rxr->fmp); #ifndef __NO_STRICT_ALIGNMENT if (adapter->max_frame_size > (MCLBYTES - ETHER_ALIGN) && - em_fixup_rx(adapter) != 0) + em_fixup_rx(rxr) != 0) goto skip; #endif if (status & E1000_RXD_STAT_VP) { -#if __FreeBSD_version < 700000 - VLAN_INPUT_TAG_NEW(ifp, adapter->fmp, - (le16toh(current_desc->special) & - E1000_RXD_SPC_VLAN_MASK)); -#else - adapter->fmp->m_pkthdr.ether_vtag = - (le16toh(current_desc->special) & + rxr->fmp->m_pkthdr.ether_vtag = + (le16toh(cur->special) & E1000_RXD_SPC_VLAN_MASK); - adapter->fmp->m_flags |= M_VLANTAG; -#endif + rxr->fmp->m_flags |= M_VLANTAG; } #ifndef __NO_STRICT_ALIGNMENT skip: #endif - m = adapter->fmp; - adapter->fmp = NULL; - adapter->lmp = NULL; + sendmp = rxr->fmp; + rxr->fmp = NULL; + rxr->lmp = NULL; } } else { ifp->if_ierrors++; -discard: /* Reuse loaded DMA map and just update mbuf chain */ - mp = adapter->rx_buffer_area[i].m_head; + mp = rxr->rx_buffers[i].m_head; mp->m_len = mp->m_pkthdr.len = MCLBYTES; mp->m_data = mp->m_ext.ext_buf; mp->m_next = NULL; if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) m_adj(mp, ETHER_ALIGN); - if (adapter->fmp != NULL) { - m_freem(adapter->fmp); - adapter->fmp = NULL; - adapter->lmp = NULL; + if (rxr->fmp != NULL) { + m_freem(rxr->fmp); + rxr->fmp = NULL; + rxr->lmp = NULL; } - m = NULL; + sendmp = NULL; } /* Zero out the receive descriptors status. */ - current_desc->status = 0; - bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + cur->status = 0; + ++rxdone; /* cumulative for POLL */ + ++processed; /* Advance our pointers to the next descriptor. */ if (++i == adapter->num_rx_desc) i = 0; - /* Call into the stack */ - if (m != NULL) { - adapter->next_rx_desc_to_check = i; - EM_RX_UNLOCK(adapter); - (*ifp->if_input)(ifp, m); - EM_RX_LOCK(adapter); - rx_sent++; - i = adapter->next_rx_desc_to_check; - } - current_desc = &adapter->rx_desc_base[i]; - } - adapter->next_rx_desc_to_check = i; - /* Advance the E1000's Receive Queue #0 "Tail Pointer". */ - if (--i < 0) - i = adapter->num_rx_desc - 1; - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i); - EM_RX_UNLOCK(adapter); - return (rx_sent); + /* Send to the stack */ + if (sendmp != NULL) + (*ifp->if_input)(ifp, sendmp); + + /* Only refresh mbufs every 8 descriptors */ + if (processed == 8) { + em_refresh_mbufs(rxr, i); + processed = 0; + } + } + + /* Catch any remaining refresh work */ + if (processed != 0) { + em_refresh_mbufs(rxr, i); + processed = 0; + } + + rxr->next_to_check = i; + + EM_RX_UNLOCK(rxr); + return (rxdone); } #ifndef __NO_STRICT_ALIGNMENT @@ -4560,13 +4171,14 @@ discard: * not used at all on architectures with strict alignment. */ static int -em_fixup_rx(struct adapter *adapter) +em_fixup_rx(struct rx_ring *rxr) { + struct adapter *adapter = rxr->adapter; struct mbuf *m, *n; int error; error = 0; - m = adapter->fmp; + m = rxr->fmp; if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); m->m_data += ETHER_HDR_LEN; @@ -4579,11 +4191,11 @@ em_fixup_rx(struct adapter *adapter) n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; - adapter->fmp = n; + rxr->fmp = n; } else { adapter->dropped_pkts++; - m_freem(adapter->fmp); - adapter->fmp = NULL; + m_freem(rxr->fmp); + rxr->fmp = NULL; error = ENOMEM; } } @@ -4600,13 +4212,10 @@ em_fixup_rx(struct adapter *adapter) * *********************************************************************/ static void -em_receive_checksum(struct adapter *adapter, - struct e1000_rx_desc *rx_desc, struct mbuf *mp) +em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp) { - /* 82543 or newer only */ - if ((adapter->hw.mac.type < e1000_82543) || - /* Ignore Checksum bit is set */ - (rx_desc->status & E1000_RXD_STAT_IXSM)) { + /* Ignore Checksum bit is set */ + if (rx_desc->status & E1000_RXD_STAT_IXSM) { mp->m_pkthdr.csum_flags = 0; return; } @@ -4633,7 +4242,6 @@ em_receive_checksum(struct adapter *adapter, } } -#if __FreeBSD_version >= 700029 /* * This routine is run via an vlan * config EVENT @@ -4720,7 +4328,6 @@ em_setup_vlan_hw_support(struct adapter *adapter) E1000_WRITE_REG(&adapter->hw, E1000_RLPML, adapter->max_frame_size + VLAN_TAG_SIZE); } -#endif static void em_enable_intr(struct adapter *adapter) @@ -4763,15 +4370,12 @@ em_init_manageability(struct adapter *adapter) manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ - if (adapter->hw.mac.type >= e1000_82571) { - manc |= E1000_MANC_EN_MNG2HOST; + manc |= E1000_MANC_EN_MNG2HOST; #define E1000_MNG2HOST_PORT_623 (1 << 5) #define E1000_MNG2HOST_PORT_664 (1 << 6) - manc2h |= E1000_MNG2HOST_PORT_623; - manc2h |= E1000_MNG2HOST_PORT_664; - E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); - } - + manc2h |= E1000_MNG2HOST_PORT_623; + manc2h |= E1000_MNG2HOST_PORT_664; + E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } @@ -4788,9 +4392,7 @@ em_release_manageability(struct adapter *adapter) /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; - - if (adapter->hw.mac.type >= e1000_82571) - manc &= ~E1000_MANC_EN_MNG2HOST; + manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } @@ -4874,20 +4476,10 @@ em_get_wakeup(device_t dev) apme_mask = EM_EEPROM_APME; switch (adapter->hw.mac.type) { - case e1000_82542: - case e1000_82543: - break; - case e1000_82544: - e1000_read_nvm(&adapter->hw, - NVM_INIT_CONTROL2_REG, 1, &eeprom_data); - apme_mask = EM_82544_APME; - break; case e1000_82573: case e1000_82583: adapter->has_amt = TRUE; /* Falls thru */ - case e1000_82546: - case e1000_82546_rev_3: case e1000_82571: case e1000_82572: case e1000_80003es2lan: @@ -4921,11 +4513,6 @@ em_get_wakeup(device_t dev) */ device_id = pci_get_device(dev); switch (device_id) { - case E1000_DEV_ID_82546GB_PCIE: - adapter->wol = 0; - break; - case E1000_DEV_ID_82546EB_FIBER: - case E1000_DEV_ID_82546GB_FIBER: case E1000_DEV_ID_82571EB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ @@ -4933,7 +4520,6 @@ em_get_wakeup(device_t dev) E1000_STATUS_FUNC_1) adapter->wol = 0; break; - case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: case E1000_DEV_ID_82571EB_QUAD_COPPER: case E1000_DEV_ID_82571EB_QUAD_FIBER: case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: @@ -4969,7 +4555,6 @@ em_enable_wakeup(device_t dev) E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); - /* ICH workaround code */ if ((adapter->hw.mac.type == e1000_ich8lan) || (adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_ich9lan) || @@ -5105,62 +4690,6 @@ out: return ret; } - -/********************************************************************* -* 82544 Coexistence issue workaround. -* There are 2 issues. -* 1. Transmit Hang issue. -* To detect this issue, following equation can be used... -* SIZE[3:0] + ADDR[2:0] = SUM[3:0]. -* If SUM[3:0] is in between 1 to 4, we will have this issue. -* -* 2. DAC issue. -* To detect this issue, following equation can be used... -* SIZE[3:0] + ADDR[2:0] = SUM[3:0]. -* If SUM[3:0] is in between 9 to c, we will have this issue. -* -* -* WORKAROUND: -* Make sure we do not have ending address -* as 1,2,3,4(Hang) or 9,a,b,c (DAC) -* -*************************************************************************/ -static u32 -em_fill_descriptors (bus_addr_t address, u32 length, - PDESC_ARRAY desc_array) -{ - u32 safe_terminator; - - /* Since issue is sensitive to length and address.*/ - /* Let us first check the address...*/ - if (length <= 4) { - desc_array->descriptor[0].address = address; - desc_array->descriptor[0].length = length; - desc_array->elements = 1; - return (desc_array->elements); - } - safe_terminator = (u32)((((u32)address & 0x7) + - (length & 0xF)) & 0xF); - /* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */ - if (safe_terminator == 0 || - (safe_terminator > 4 && - safe_terminator < 9) || - (safe_terminator > 0xC && - safe_terminator <= 0xF)) { - desc_array->descriptor[0].address = address; - desc_array->descriptor[0].length = length; - desc_array->elements = 1; - return (desc_array->elements); - } - - desc_array->descriptor[0].address = address; - desc_array->descriptor[0].length = length - 4; - desc_array->descriptor[1].address = address + (length - 4); - desc_array->descriptor[1].length = 4; - desc_array->elements = 2; - return (desc_array->elements); -} - /********************************************************************** * * Update the board statistics counters. @@ -5270,6 +4799,8 @@ em_print_debug_info(struct adapter *adapter) { device_t dev = adapter->dev; u8 *hw_addr = adapter->hw.hw_addr; + struct rx_ring *rxr = adapter->rx_rings; + struct tx_ring *txr = adapter->tx_rings; device_printf(dev, "Adapter hardware address = %p \n", hw_addr); device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n", @@ -5287,29 +4818,31 @@ em_print_debug_info(struct adapter *adapter) device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n", E1000_READ_REG(&adapter->hw, E1000_RDTR), E1000_READ_REG(&adapter->hw, E1000_RADV)); - device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n", - (long long)adapter->tx_fifo_wrk_cnt, - (long long)adapter->tx_fifo_reset_cnt); - device_printf(dev, "hw tdh = %d, hw tdt = %d\n", - E1000_READ_REG(&adapter->hw, E1000_TDH(0)), - E1000_READ_REG(&adapter->hw, E1000_TDT(0))); - device_printf(dev, "hw rdh = %d, hw rdt = %d\n", - E1000_READ_REG(&adapter->hw, E1000_RDH(0)), - E1000_READ_REG(&adapter->hw, E1000_RDT(0))); - device_printf(dev, "Num Tx descriptors avail = %d\n", - adapter->num_tx_desc_avail); - device_printf(dev, "Tx Descriptors not avail1 = %ld\n", - adapter->no_tx_desc_avail1); - device_printf(dev, "Tx Descriptors not avail2 = %ld\n", - adapter->no_tx_desc_avail2); + + for (int i = 0; i < adapter->num_queues; i++, txr++) { + device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i, + E1000_READ_REG(&adapter->hw, E1000_TDH(i)), + E1000_READ_REG(&adapter->hw, E1000_TDT(i))); + device_printf(dev, "TX(%d) no descriptors avail event = %lld\n", + txr->me, (long long)txr->no_desc_avail); + device_printf(dev, "TX(%d) MSIX IRQ Handled = %lld\n", txr->me, + (long long)txr->tx_irq); + device_printf(dev, "Num Tx descriptors avail = %d\n", + txr->tx_avail); + device_printf(dev, "Tx Descriptors not avail1 = %ld\n", + txr->no_desc_avail); + } + for (int i = 0; i < adapter->num_queues; i++, rxr++) { + device_printf(dev, "hw rdh = %d, hw rdt = %d\n", + E1000_READ_REG(&adapter->hw, E1000_RDH(i)), + E1000_READ_REG(&adapter->hw, E1000_RDT(i))); + } device_printf(dev, "Std mbuf failed = %ld\n", adapter->mbuf_alloc_failed); device_printf(dev, "Std mbuf cluster failed = %ld\n", adapter->mbuf_cluster_failed); device_printf(dev, "Driver dropped packets = %ld\n", adapter->dropped_pkts); - device_printf(dev, "Driver tx dma failure in encap = %ld\n", - adapter->no_tx_dma_setup); } static void @@ -5342,12 +4875,8 @@ em_print_hw_stats(struct adapter *adapter) (long long)adapter->stats.algnerrc); device_printf(dev, "Collision/Carrier extension errors = %lld\n", (long long)adapter->stats.cexterr); - device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns); device_printf(dev, "watchdog timeouts = %ld\n", adapter->watchdog_events); - device_printf(dev, "RX MSIX IRQ = %ld TX MSIX IRQ = %ld" - " LINK MSIX IRQ = %ld\n", adapter->rx_irq, - adapter->tx_irq , adapter->link_irq); device_printf(dev, "XON Rcvd = %lld\n", (long long)adapter->stats.xonrxc); device_printf(dev, "XON Xmtd = %lld\n", @@ -5451,9 +4980,7 @@ em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) struct em_int_delay_info *info; struct adapter *adapter; u32 regval; - int error; - int usecs; - int ticks; + int error, usecs, ticks; info = (struct em_int_delay_info *)arg1; usecs = info->value; @@ -5502,7 +5029,6 @@ em_add_int_delay_sysctl(struct adapter *adapter, const char *name, info, 0, em_sysctl_int_delay, "I", description); } -#ifndef EM_LEGACY_IRQ static void em_add_rx_process_limit(struct adapter *adapter, const char *name, const char *description, int *limit, int value) @@ -5512,6 +5038,5 @@ em_add_rx_process_limit(struct adapter *adapter, const char *name, SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description); } -#endif diff --git a/sys/dev/e1000/if_em.h b/sys/dev/e1000/if_em.h index fe5a99b3b99..d3a1019133c 100644 --- a/sys/dev/e1000/if_em.h +++ b/sys/dev/e1000/if_em.h @@ -52,7 +52,6 @@ * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ #define EM_MIN_TXD 80 -#define EM_MAX_TXD_82543 256 #define EM_MAX_TXD 4096 #define EM_DEFAULT_TXD 1024 @@ -70,7 +69,6 @@ * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ #define EM_MIN_RXD 80 -#define EM_MAX_RXD_82543 256 #define EM_MAX_RXD 4096 #define EM_DEFAULT_RXD 1024 @@ -144,7 +142,6 @@ * transmit descriptors. */ #define EM_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 8) -#define EM_TX_OP_THRESHOLD (adapter->num_tx_desc / 32) /* * This parameter controls whether or not autonegotation is enabled. @@ -182,7 +179,7 @@ #define EM_DEFAULT_PBA 0x00000030 #define EM_SMARTSPEED_DOWNSHIFT 3 #define EM_SMARTSPEED_MAX 15 -#define EM_MAX_INTR 10 +#define EM_MAX_LOOP 10 #define MAX_NUM_MULTICAST_ADDRESSES 128 #define PCI_ANY_ID (~0U) @@ -191,11 +188,6 @@ #define EM_EEPROM_APME 0x400; #define EM_82544_APME 0x0004; -/* Code compatilbility between 6 and 7 */ -#ifndef ETHER_BPF_MTAP -#define ETHER_BPF_MTAP BPF_MTAP -#endif - /* * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will @@ -209,7 +201,6 @@ #define EM_BAR_TYPE(v) ((v) & EM_BAR_TYPE_MASK) #define EM_BAR_TYPE_MASK 0x00000001 #define EM_BAR_TYPE_MMEM 0x00000000 -#define EM_BAR_TYPE_IO 0x00000001 #define EM_BAR_TYPE_FLASH 0x0014 #define EM_BAR_MEM_TYPE(v) ((v) & EM_BAR_MEM_TYPE_MASK) #define EM_BAR_MEM_TYPE_MASK 0x00000006 @@ -237,6 +228,7 @@ #define EM_TSO_SIZE (65535 + sizeof(struct ether_vlan_header)) #define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */ #define EM_MSIX_MASK 0x01F00000 /* For 82574 use */ +#define EM_MSIX_LINK 0x01000000 /* For 82574 use */ #define ETH_ZLEN 60 #define ETH_ADDR_LEN 6 #define CSUM_OFFLOAD 7 /* Offload bits in mbuf flag */ @@ -249,18 +241,6 @@ */ #define EM_EIAC 0x000DC -/* Used in for 82547 10Mb Half workaround */ -#define EM_PBA_BYTES_SHIFT 0xA -#define EM_TX_HEAD_ADDR_SHIFT 7 -#define EM_PBA_TX_MASK 0xFFFF0000 -#define EM_FIFO_HDR 0x10 -#define EM_82547_PKT_THRESH 0x3e0 - -/* Precision Time Sync (IEEE 1588) defines */ -#define ETHERTYPE_IEEE1588 0x88F7 -#define PICOSECS_PER_TICK 20833 -#define TSYNC_PORT 319 /* UDP port for the protocol */ - /* * Bus dma allocation structure used by * e1000_dma_malloc and e1000_dma_free. @@ -282,12 +262,76 @@ struct em_int_delay_info { int value; /* Current value in usecs */ }; +/* + * The transmit ring, one per tx queue + */ +struct tx_ring { + struct adapter *adapter; + struct mtx tx_mtx; + char mtx_name[16]; + u32 me; + u32 msix; + u32 ims; + bool watchdog_check; + int watchdog_time; + struct em_dma_alloc txdma; + struct e1000_tx_desc *tx_base; + struct task tx_task; + struct taskqueue *tq; + u32 next_avail_desc; + u32 next_to_clean; + struct em_buffer *tx_buffers; + volatile u16 tx_avail; + u32 tx_tso; /* last tx was tso */ + u16 last_hw_offload; +#if __FreeBSD_version >= 800000 + struct buf_ring *br; +#endif + /* Interrupt resources */ + bus_dma_tag_t txtag; + void *tag; + struct resource *res; + u64 tx_irq; + u64 no_desc_avail; +}; + +/* + * The Receive ring, one per rx queue + */ +struct rx_ring { + struct adapter *adapter; + u32 me; + u32 msix; + u32 ims; + struct mtx rx_mtx; + char mtx_name[16]; + u32 payload; + struct task rx_task; + struct taskqueue *tq; + struct e1000_rx_desc *rx_base; + struct em_dma_alloc rxdma; + unsigned int next_to_refresh; + unsigned int next_to_check; + struct em_buffer *rx_buffers; + struct mbuf *fmp; + struct mbuf *lmp; + + /* Interrupt resources */ + void *tag; + struct resource *res; + bus_dma_tag_t rxtag; + bus_dmamap_t rx_sparemap; + + /* Soft stats */ + u64 rx_irq; + u64 rx_packets; + u64 rx_bytes; +}; + + /* Our adapter structure */ struct adapter { struct ifnet *ifp; -#if __FreeBSD_version >= 800000 - struct buf_ring *br; -#endif struct e1000_hw hw; /* FreeBSD operating-system-specific structures. */ @@ -296,42 +340,50 @@ struct adapter { struct resource *memory; struct resource *flash; - struct resource *msix; + struct resource *msix_mem; - struct resource *ioport; - int io_rid; - - /* 82574 may use 3 int vectors */ - struct resource *res[3]; - void *tag[3]; - int rid[3]; + struct resource *res; + void *tag; + u32 linkvec; + u32 ivars; struct ifmedia media; struct callout timer; - struct callout tx_fifo_timer; - bool watchdog_check; - int watchdog_time; - int msi; + int msix; int if_flags; int max_frame_size; int min_frame_size; struct mtx core_mtx; - struct mtx tx_mtx; - struct mtx rx_mtx; int em_insert_vlan_header; + u32 ims; + bool in_detach; /* Task for FAST handling */ struct task link_task; - struct task rxtx_task; - struct task rx_task; - struct task tx_task; + struct task que_task; struct taskqueue *tq; /* private task queue */ -#if __FreeBSD_version >= 700029 eventhandler_tag vlan_attach; eventhandler_tag vlan_detach; - u32 num_vlans; -#endif + + u16 num_vlans; + u16 num_queues; + + /* + * Transmit rings: + * Allocated at run time, an array of rings. + */ + struct tx_ring *tx_rings; + int num_tx_desc; + u32 txd_cmd; + + /* + * Receive rings: + * Allocated at run time, an array of rings. + */ + struct rx_ring *rx_rings; + int num_rx_desc; + u32 rx_process_limit; /* Management and WOL features */ u32 wol; @@ -348,96 +400,26 @@ struct adapter { struct em_int_delay_info rx_int_delay; struct em_int_delay_info rx_abs_int_delay; - /* - * Transmit definitions - * - * We have an array of num_tx_desc descriptors (handled - * by the controller) paired with an array of tx_buffers - * (at tx_buffer_area). - * The index of the next available descriptor is next_avail_tx_desc. - * The number of remaining tx_desc is num_tx_desc_avail. - */ - struct em_dma_alloc txdma; /* bus_dma glue for tx desc */ - struct e1000_tx_desc *tx_desc_base; - uint32_t next_avail_tx_desc; - uint32_t next_tx_to_clean; - volatile uint16_t num_tx_desc_avail; - uint16_t num_tx_desc; - uint16_t last_hw_offload; - uint32_t txd_cmd; - struct em_buffer *tx_buffer_area; - bus_dma_tag_t txtag; /* dma tag for tx */ - uint32_t tx_tso; /* last tx was tso */ - - /* - * Receive definitions - * - * we have an array of num_rx_desc rx_desc (handled by the - * controller), and paired with an array of rx_buffers - * (at rx_buffer_area). - * The next pair to check on receive is at offset next_rx_desc_to_check - */ - struct em_dma_alloc rxdma; /* bus_dma glue for rx desc */ - struct e1000_rx_desc *rx_desc_base; - uint32_t next_rx_desc_to_check; - uint32_t rx_buffer_len; - uint16_t num_rx_desc; - int rx_process_limit; - struct em_buffer *rx_buffer_area; - bus_dma_tag_t rxtag; - bus_dmamap_t rx_sparemap; - - /* - * First/last mbuf pointers, for - * collecting multisegment RX packets. - */ - struct mbuf *fmp; - struct mbuf *lmp; - /* Misc stats maintained by the driver */ unsigned long dropped_pkts; unsigned long mbuf_alloc_failed; unsigned long mbuf_cluster_failed; - unsigned long no_tx_desc_avail1; - unsigned long no_tx_desc_avail2; unsigned long no_tx_map_avail; unsigned long no_tx_dma_setup; - unsigned long watchdog_events; unsigned long rx_overruns; - unsigned long rx_irq; - unsigned long tx_irq; + unsigned long watchdog_events; unsigned long link_irq; - /* 82547 workaround */ - uint32_t tx_fifo_size; - uint32_t tx_fifo_head; - uint32_t tx_fifo_head_addr; - uint64_t tx_fifo_reset_cnt; - uint64_t tx_fifo_wrk_cnt; - uint32_t tx_head_addr; - - /* For 82544 PCIX Workaround */ - boolean_t pcix_82544; - boolean_t in_detach; - -#ifdef EM_IEEE1588 - /* IEEE 1588 precision time support */ - struct cyclecounter cycles; - struct nettimer clock; - struct nettime_compare compare; - struct hwtstamp_ctrl hwtstamp; -#endif - struct e1000_hw_stats stats; }; -/* ****************************************************************************** +/******************************************************************************** * vendor_info_array * * This array contains the list of Subvendor/Subdevice IDs on which the driver * should load. * - * ******************************************************************************/ + ********************************************************************************/ typedef struct _em_vendor_info_t { unsigned int vendor_id; unsigned int device_id; @@ -452,19 +434,6 @@ struct em_buffer { bus_dmamap_t map; /* bus_dma map for packet */ }; -/* For 82544 PCIX Workaround */ -typedef struct _ADDRESS_LENGTH_PAIR -{ - uint64_t address; - uint32_t length; -} ADDRESS_LENGTH_PAIR, *PADDRESS_LENGTH_PAIR; - -typedef struct _DESCRIPTOR_PAIR -{ - ADDRESS_LENGTH_PAIR descriptor[4]; - uint32_t elements; -} DESC_ARRAY, *PDESC_ARRAY; - #define EM_CORE_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->core_mtx, _name, "EM Core Lock", MTX_DEF) #define EM_TX_LOCK_INIT(_sc, _name) \ diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c index 2eb113aa4d7..e7305d7b4e3 100644 --- a/sys/dev/e1000/if_igb.c +++ b/sys/dev/e1000/if_igb.c @@ -63,10 +63,6 @@ #include #include -#ifdef IGB_IEEE1588 -#include -#endif - #include #include #include @@ -102,7 +98,7 @@ int igb_display_debug_stats = 0; /********************************************************************* * Driver version: *********************************************************************/ -char igb_driver_version[] = "version - 1.9.1"; +char igb_driver_version[] = "version - 1.9.3"; /********************************************************************* @@ -204,12 +200,11 @@ static void igb_disable_intr(struct adapter *); static void igb_update_stats_counters(struct adapter *); static bool igb_txeof(struct tx_ring *); -static __inline void igb_rx_discard(struct rx_ring *, - union e1000_adv_rx_desc *, int); +static __inline void igb_rx_discard(struct rx_ring *, int); static __inline void igb_rx_input(struct rx_ring *, struct ifnet *, struct mbuf *, u32); -static bool igb_rxeof(struct rx_ring *, int); +static bool igb_rxeof(struct igb_queue *, int); static void igb_rx_checksum(u32, struct mbuf *, u32); static int igb_tx_ctx_setup(struct tx_ring *, struct mbuf *); static bool igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *); @@ -218,7 +213,7 @@ static void igb_disable_promisc(struct adapter *); static void igb_set_multi(struct adapter *); static void igb_print_hw_stats(struct adapter *); static void igb_update_link_status(struct adapter *); -static int igb_get_buf(struct rx_ring *, int, u8); +static void igb_refresh_mbufs(struct rx_ring *, int); static void igb_register_vlan(void *, struct ifnet *, u16); static void igb_unregister_vlan(void *, struct ifnet *, u16); @@ -814,6 +809,9 @@ igb_mq_start(struct ifnet *ifp, struct mbuf *m) /* Which queue to use */ if ((m->m_flags & M_FLOWID) != 0) i = m->m_pkthdr.flowid % adapter->num_queues; + else + i = curcpu % adapter->num_queues; + txr = &adapter->tx_rings[i]; if (IGB_TX_TRYLOCK(txr)) { @@ -1192,15 +1190,15 @@ igb_init(void *arg) static void igb_handle_rxtx(void *context, int pending) { - struct adapter *adapter = context; - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; - struct ifnet *ifp; + struct igb_queue *que = context; + struct adapter *adapter = que->adapter; + struct tx_ring *txr = adapter->tx_rings; + struct ifnet *ifp; ifp = adapter->ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - if (igb_rxeof(rxr, adapter->rx_process_limit)) + if (igb_rxeof(que, adapter->rx_process_limit)) taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); IGB_TX_LOCK(txr); igb_txeof(txr); @@ -1224,14 +1222,13 @@ igb_handle_que(void *context, int pending) struct igb_queue *que = context; struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; - struct rx_ring *rxr = que->rxr; struct ifnet *ifp = adapter->ifp; u32 loop = IGB_MAX_LOOP; bool more; /* RX first */ do { - more = igb_rxeof(rxr, -1); + more = igb_rxeof(que, -1); } while (loop-- && more); if (IGB_TX_TRYLOCK(txr)) { @@ -1388,7 +1385,7 @@ igb_msix_que(void *arg) more_tx = igb_txeof(txr); IGB_TX_UNLOCK(txr); - more_rx = igb_rxeof(rxr, adapter->rx_process_limit); + more_rx = igb_rxeof(que, adapter->rx_process_limit); if (igb_enable_aim == FALSE) goto no_calc; @@ -1955,6 +1952,7 @@ igb_update_link_status(struct adapter *adapter) "Full Duplex" : "Half Duplex")); adapter->link_active = 1; ifp->if_baudrate = adapter->link_speed * 1000000; + /* This can sleep */ if_link_state_change(ifp, LINK_STATE_UP); } else if (!link_check && (adapter->link_active == 1)) { ifp->if_baudrate = adapter->link_speed = 0; @@ -1962,6 +1960,7 @@ igb_update_link_status(struct adapter *adapter) if (bootverbose) device_printf(dev, "Link is Down\n"); adapter->link_active = 0; + /* This can sleep */ if_link_state_change(ifp, LINK_STATE_DOWN); /* Turn off watchdogs */ for (int i = 0; i < adapter->num_queues; i++, txr++) @@ -2080,8 +2079,9 @@ igb_allocate_pci_resources(struct adapter *adapter) static int igb_allocate_legacy(struct adapter *adapter) { - device_t dev = adapter->dev; - int error, rid = 0; + device_t dev = adapter->dev; + struct igb_queue *que = adapter->queues; + int error, rid = 0; /* Turn off all interrupts */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); @@ -2103,7 +2103,7 @@ igb_allocate_legacy(struct adapter *adapter) * Try allocating a fast interrupt and the associated deferred * processing contexts. */ - TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter); + TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, que); /* Make tasklet for deferred link handling */ TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter); adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT, @@ -2433,22 +2433,19 @@ igb_setup_msix(struct adapter *adapter) /* Figure out a reasonable auto config value */ queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus; - /* Can have max of 4 queues on 82575 */ - if (adapter->hw.mac.type == e1000_82575) { - if (queues > 4) - queues = 4; - if (igb_num_queues > 4) - igb_num_queues = 4; - } + /* Manual override */ + if (igb_num_queues != 0) + queues = igb_num_queues; - if (igb_num_queues == 0) - igb_num_queues = queues; + /* Can have max of 4 queues on 82575 */ + if ((adapter->hw.mac.type == e1000_82575) && (queues > 4)) + queues = 4; /* ** One vector (RX/TX pair) per queue ** plus an additional for Link interrupt */ - want = igb_num_queues + 1; + want = queues + 1; if (msgs >= want) msgs = want; else { @@ -2461,7 +2458,7 @@ igb_setup_msix(struct adapter *adapter) if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) { device_printf(adapter->dev, "Using MSIX interrupts with %d vectors\n", msgs); - adapter->num_queues = igb_num_queues; + adapter->num_queues = queues; return (msgs); } msi: @@ -2922,9 +2919,7 @@ err_tx_desc: igb_dma_free(adapter, &txr->txdma); free(adapter->rx_rings, M_DEVBUF); rx_fail: -#if __FreeBSD_version >= 800000 buf_ring_free(txr->br, M_DEVBUF); -#endif free(adapter->tx_rings, M_DEVBUF); tx_fail: free(adapter->queues, M_DEVBUF); @@ -3502,111 +3497,88 @@ igb_txeof(struct tx_ring *txr) /********************************************************************* * - * Setup descriptor buffer(s) from system mbuf buffer pools. - * i - designates the ring index - * clean - tells the function whether to update - * the header, the packet buffer, or both. + * Refresh mbuf buffers for RX descriptor rings + * - now keeps its own state so discards due to resource + * exhaustion are unnecessary, if an mbuf cannot be obtained + * it just returns, keeping its placeholder, thus it can simply + * be recalled to try again. * **********************************************************************/ -static int -igb_get_buf(struct rx_ring *rxr, int i, u8 clean) +static void +igb_refresh_mbufs(struct rx_ring *rxr, int limit) { struct adapter *adapter = rxr->adapter; - struct igb_rx_buf *rxbuf; - struct mbuf *mh, *mp; bus_dma_segment_t hseg[1]; bus_dma_segment_t pseg[1]; - bus_dmamap_t map; - int nsegs, error; + struct igb_rx_buf *rxbuf; + struct mbuf *mh, *mp; + int i, nsegs, error, cleaned; - - rxbuf = &rxr->rx_buffers[i]; - mh = mp = NULL; - if ((clean & IGB_CLEAN_HEADER) != 0) { - mh = m_gethdr(M_DONTWAIT, MT_DATA); - if (mh == NULL) { - adapter->mbuf_header_failed++; - return (ENOBUFS); - } - mh->m_pkthdr.len = mh->m_len = MHLEN; - /* - * Because IGB_HDR_BUF size is less than MHLEN - * and we configure controller to split headers - * we can align mbuf on ETHER_ALIGN boundary. - */ - m_adj(mh, ETHER_ALIGN); - error = bus_dmamap_load_mbuf_sg(rxr->rx_htag, - rxr->rx_hspare_map, mh, hseg, &nsegs, 0); - if (error != 0) { - m_freem(mh); - return (error); - } - mh->m_flags &= ~M_PKTHDR; - } - if ((clean & IGB_CLEAN_PAYLOAD) != 0) { - mp = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, - adapter->rx_mbuf_sz); - if (mp == NULL) { - if (mh != NULL) { - adapter->mbuf_packet_failed++; - bus_dmamap_unload(rxr->rx_htag, - rxbuf->head_map); - mh->m_flags |= M_PKTHDR; - m_freem(mh); + i = rxr->next_to_refresh; + cleaned = -1; /* Signify no completions */ + while (i != limit) { + rxbuf = &rxr->rx_buffers[i]; + if (rxbuf->m_head == NULL) { + mh = m_gethdr(M_DONTWAIT, MT_DATA); + if (mh == NULL) + goto update; + mh->m_pkthdr.len = mh->m_len = MHLEN; + mh->m_len = MHLEN; + mh->m_flags |= M_PKTHDR; + m_adj(mh, ETHER_ALIGN); + /* Get the memory mapping */ + error = bus_dmamap_load_mbuf_sg(rxr->htag, + rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); + if (error != 0) { + printf("GET BUF: dmamap load" + " failure - %d\n", error); + m_free(mh); + goto update; } - return (ENOBUFS); + rxbuf->m_head = mh; + bus_dmamap_sync(rxr->htag, rxbuf->hmap, + BUS_DMASYNC_PREREAD); + rxr->rx_base[i].read.hdr_addr = + htole64(hseg[0].ds_addr); } - mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; - error = bus_dmamap_load_mbuf_sg(rxr->rx_ptag, - rxr->rx_pspare_map, mp, pseg, &nsegs, 0); - if (error != 0) { - if (mh != NULL) { - bus_dmamap_unload(rxr->rx_htag, - rxbuf->head_map); - mh->m_flags |= M_PKTHDR; - m_freem(mh); + + if (rxbuf->m_pack == NULL) { + mp = m_getjcl(M_DONTWAIT, MT_DATA, + M_PKTHDR, adapter->rx_mbuf_sz); + if (mp == NULL) + goto update; + mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; + /* Get the memory mapping */ + error = bus_dmamap_load_mbuf_sg(rxr->ptag, + rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); + if (error != 0) { + printf("GET BUF: dmamap load" + " failure - %d\n", error); + m_free(mp); + goto update; } - m_freem(mp); - return (error); + rxbuf->m_pack = mp; + bus_dmamap_sync(rxr->ptag, rxbuf->pmap, + BUS_DMASYNC_PREREAD); + rxr->rx_base[i].read.pkt_addr = + htole64(pseg[0].ds_addr); } - mp->m_flags &= ~M_PKTHDR; - } - /* Loading new DMA maps complete, unload maps for received buffers. */ - if ((clean & IGB_CLEAN_HEADER) != 0 && rxbuf->m_head != NULL) { - bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map); + cleaned = i; + /* Calculate next index */ + if (++i == adapter->num_rx_desc) + i = 0; + /* This is the work marker for refresh */ + rxr->next_to_refresh = i; } - if ((clean & IGB_CLEAN_PAYLOAD) != 0 && rxbuf->m_pack != NULL) { - bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map); - } - - /* Reflect loaded dmamaps. */ - if ((clean & IGB_CLEAN_HEADER) != 0) { - map = rxbuf->head_map; - rxbuf->head_map = rxr->rx_hspare_map; - rxr->rx_hspare_map = map; - rxbuf->m_head = mh; - bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map, - BUS_DMASYNC_PREREAD); - rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr); - } - if ((clean & IGB_CLEAN_PAYLOAD) != 0) { - map = rxbuf->pack_map; - rxbuf->pack_map = rxr->rx_pspare_map; - rxr->rx_pspare_map = map; - rxbuf->m_pack = mp; - bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map, - BUS_DMASYNC_PREREAD); - rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr); - } - - return (0); +update: + if (cleaned != -1) /* If we refreshed some, bump tail */ + E1000_WRITE_REG(&adapter->hw, + E1000_RDT(rxr->me), cleaned); + return; } + /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one @@ -3643,7 +3615,7 @@ igb_allocate_receive_buffers(struct rx_ring *rxr) 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ - &rxr->rx_htag))) { + &rxr->htag))) { device_printf(dev, "Unable to create RX DMA tag\n"); goto fail; } @@ -3659,40 +3631,22 @@ igb_allocate_receive_buffers(struct rx_ring *rxr) 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ - &rxr->rx_ptag))) { + &rxr->ptag))) { device_printf(dev, "Unable to create RX payload DMA tag\n"); goto fail; } - /* Create the spare maps (used by getbuf) */ - error = bus_dmamap_create(rxr->rx_htag, BUS_DMA_NOWAIT, - &rxr->rx_hspare_map); - if (error) { - device_printf(dev, - "%s: bus_dmamap_create header spare failed: %d\n", - __func__, error); - goto fail; - } - error = bus_dmamap_create(rxr->rx_ptag, BUS_DMA_NOWAIT, - &rxr->rx_pspare_map); - if (error) { - device_printf(dev, - "%s: bus_dmamap_create packet spare failed: %d\n", - __func__, error); - goto fail; - } - for (i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; - error = bus_dmamap_create(rxr->rx_htag, - BUS_DMA_NOWAIT, &rxbuf->head_map); + error = bus_dmamap_create(rxr->htag, + BUS_DMA_NOWAIT, &rxbuf->hmap); if (error) { device_printf(dev, "Unable to create RX head DMA maps\n"); goto fail; } - error = bus_dmamap_create(rxr->rx_ptag, - BUS_DMA_NOWAIT, &rxbuf->pack_map); + error = bus_dmamap_create(rxr->ptag, + BUS_DMA_NOWAIT, &rxbuf->pmap); if (error) { device_printf(dev, "Unable to create RX packet DMA maps\n"); @@ -3720,16 +3674,16 @@ igb_free_receive_ring(struct rx_ring *rxr) for (i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { - bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map, + bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map); + bus_dmamap_unload(rxr->htag, rxbuf->hmap); rxbuf->m_head->m_flags |= M_PKTHDR; m_freem(rxbuf->m_head); } if (rxbuf->m_pack != NULL) { - bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map, + bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map); + bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->m_pack->m_flags |= M_PKTHDR; m_freem(rxbuf->m_pack); } @@ -3750,8 +3704,10 @@ igb_setup_receive_ring(struct rx_ring *rxr) struct adapter *adapter; struct ifnet *ifp; device_t dev; + struct igb_rx_buf *rxbuf; + bus_dma_segment_t pseg[1], hseg[1]; struct lro_ctrl *lro = &rxr->lro; - int j, rsize, error = 0; + int rsize, nsegs, error = 0; adapter = rxr->adapter; dev = adapter->dev; @@ -3768,15 +3724,50 @@ igb_setup_receive_ring(struct rx_ring *rxr) */ igb_free_receive_ring(rxr); - /* Now replenish the ring mbufs */ - for (j = 0; j < adapter->num_rx_desc; j++) { - if ((error = igb_get_buf(rxr, j, IGB_CLEAN_BOTH)) != 0) - goto fail; - } + /* Now replenish the ring mbufs */ + for (int j = 0; j != adapter->num_rx_desc; ++j) { + struct mbuf *mh, *mp; - /* Setup our descriptor indices */ - rxr->next_to_check = 0; - rxr->last_cleaned = 0; + rxbuf = &rxr->rx_buffers[j]; + + /* First the header */ + rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA); + if (rxbuf->m_head == NULL) + goto fail; + m_adj(rxbuf->m_head, ETHER_ALIGN); + mh = rxbuf->m_head; + mh->m_len = mh->m_pkthdr.len = MHLEN; + mh->m_flags |= M_PKTHDR; + /* Get the memory mapping */ + error = bus_dmamap_load_mbuf_sg(rxr->htag, + rxbuf->hmap, rxbuf->m_head, hseg, + &nsegs, BUS_DMA_NOWAIT); + if (error != 0) /* Nothing elegant to do here */ + goto fail; + bus_dmamap_sync(rxr->htag, + rxbuf->hmap, BUS_DMASYNC_PREREAD); + /* Update descriptor */ + rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr); + + /* Now the payload cluster */ + rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA, + M_PKTHDR, adapter->rx_mbuf_sz); + if (rxbuf->m_pack == NULL) + goto fail; + mp = rxbuf->m_pack; + mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; + /* Get the memory mapping */ + error = bus_dmamap_load_mbuf_sg(rxr->ptag, + rxbuf->pmap, mp, pseg, + &nsegs, BUS_DMA_NOWAIT); + if (error != 0) + goto fail; + bus_dmamap_sync(rxr->ptag, + rxbuf->pmap, BUS_DMASYNC_PREREAD); + /* Update descriptor */ + rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr); + } + rxr->next_to_refresh = 0; rxr->lro_enabled = FALSE; if (igb_header_split) @@ -4049,47 +4040,33 @@ igb_free_receive_buffers(struct rx_ring *rxr) INIT_DEBUGOUT("free_receive_structures: begin"); - if (rxr->rx_hspare_map != NULL) { - bus_dmamap_destroy(rxr->rx_htag, rxr->rx_hspare_map); - rxr->rx_hspare_map = NULL; - } - - if (rxr->rx_hspare_map != NULL) { - bus_dmamap_destroy(rxr->rx_ptag, rxr->rx_pspare_map); - rxr->rx_pspare_map = NULL; - } - /* Cleanup any existing buffers */ if (rxr->rx_buffers != NULL) { for (i = 0; i < adapter->num_rx_desc; i++) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head != NULL) { - bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map, + bus_dmamap_sync(rxr->htag, rxbuf->hmap, BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->rx_htag, - rxbuf->head_map); + bus_dmamap_unload(rxr->htag, rxbuf->hmap); rxbuf->m_head->m_flags |= M_PKTHDR; m_freem(rxbuf->m_head); } if (rxbuf->m_pack != NULL) { - bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map, + bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->rx_ptag, - rxbuf->pack_map); + bus_dmamap_unload(rxr->ptag, rxbuf->pmap); rxbuf->m_pack->m_flags |= M_PKTHDR; m_freem(rxbuf->m_pack); } rxbuf->m_head = NULL; rxbuf->m_pack = NULL; - if (rxbuf->head_map != NULL) { - bus_dmamap_destroy(rxr->rx_htag, - rxbuf->head_map); - rxbuf->head_map = NULL; + if (rxbuf->hmap != NULL) { + bus_dmamap_destroy(rxr->htag, rxbuf->hmap); + rxbuf->hmap = NULL; } - if (rxbuf->pack_map != NULL) { - bus_dmamap_destroy(rxr->rx_ptag, - rxbuf->pack_map); - rxbuf->pack_map = NULL; + if (rxbuf->pmap != NULL) { + bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); + rxbuf->pmap = NULL; } } if (rxr->rx_buffers != NULL) { @@ -4098,26 +4075,43 @@ igb_free_receive_buffers(struct rx_ring *rxr) } } - if (rxr->rx_htag != NULL) { - bus_dma_tag_destroy(rxr->rx_htag); - rxr->rx_htag = NULL; + if (rxr->htag != NULL) { + bus_dma_tag_destroy(rxr->htag); + rxr->htag = NULL; } - if (rxr->rx_ptag != NULL) { - bus_dma_tag_destroy(rxr->rx_ptag); - rxr->rx_ptag = NULL; + if (rxr->ptag != NULL) { + bus_dma_tag_destroy(rxr->ptag); + rxr->ptag = NULL; } } static __inline void -igb_rx_discard(struct rx_ring *rxr, union e1000_adv_rx_desc *cur, int i) +igb_rx_discard(struct rx_ring *rxr, int i) { + struct adapter *adapter = rxr->adapter; + struct igb_rx_buf *rbuf; + struct mbuf *mh, *mp; + rbuf = &rxr->rx_buffers[i]; if (rxr->fmp != NULL) { rxr->fmp->m_flags |= M_PKTHDR; m_freem(rxr->fmp); rxr->fmp = NULL; rxr->lmp = NULL; } + + mh = rbuf->m_head; + mp = rbuf->m_pack; + + /* Reuse loaded DMA map and just update mbuf chain */ + mh->m_len = MHLEN; + mh->m_flags |= M_PKTHDR; + mh->m_next = NULL; + + mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz; + mp->m_data = mp->m_ext.ext_buf; + mp->m_next = NULL; + return; } static __inline void @@ -4161,28 +4155,29 @@ igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) * Return TRUE if more to clean, FALSE otherwise *********************************************************************/ static bool -igb_rxeof(struct rx_ring *rxr, int count) +igb_rxeof(struct igb_queue *que, int count) { - struct adapter *adapter = rxr->adapter; + struct adapter *adapter = que->adapter; + struct rx_ring *rxr = que->rxr; struct ifnet *ifp = adapter->ifp; struct lro_ctrl *lro = &rxr->lro; struct lro_entry *queued; - int i, prog = 0; + int i, processed = 0; u32 ptype, staterr = 0; union e1000_adv_rx_desc *cur; IGB_RX_LOCK(rxr); + /* Sync the ring. */ + bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* Main clean loop */ - for (i = rxr->next_to_check; count > 0; prog++) { - struct mbuf *sendmp, *mh, *mp; - u16 hlen, plen, hdr, vtag; - bool eop = FALSE; - u8 dopayload; + for (i = rxr->next_to_check; count != 0;) { + struct mbuf *sendmp, *mh, *mp; + struct igb_rx_buf *rxbuf; + u16 hlen, plen, hdr, vtag; + bool eop = FALSE; - /* Sync the ring. */ - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); cur = &rxr->rx_base[i]; staterr = le32toh(cur->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) @@ -4192,8 +4187,10 @@ igb_rxeof(struct rx_ring *rxr, int count) count--; sendmp = mh = mp = NULL; cur->wb.upper.status_error = 0; + rxbuf = &rxr->rx_buffers[i]; plen = le16toh(cur->wb.upper.length); ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK; + vtag = le16toh(cur->wb.upper.vlan); hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info); eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP); @@ -4206,7 +4203,7 @@ igb_rxeof(struct rx_ring *rxr, int count) rxr->discard = TRUE; else rxr->discard = FALSE; - igb_rx_discard(rxr, cur, i); + igb_rx_discard(rxr, i); goto next_desc; } @@ -4229,7 +4226,8 @@ igb_rxeof(struct rx_ring *rxr, int count) /* Handle the header mbuf */ mh = rxr->rx_buffers[i].m_head; mh->m_len = hlen; - dopayload = IGB_CLEAN_HEADER; + /* clear buf info for refresh */ + rxbuf->m_head = NULL; /* ** Get the payload length, this ** could be zero if its a small @@ -4239,7 +4237,8 @@ igb_rxeof(struct rx_ring *rxr, int count) mp = rxr->rx_buffers[i].m_pack; mp->m_len = plen; mh->m_next = mp; - dopayload = IGB_CLEAN_BOTH; + /* clear buf info for refresh */ + rxbuf->m_pack = NULL; rxr->rx_split_packets++; } } else { @@ -4250,26 +4249,11 @@ igb_rxeof(struct rx_ring *rxr, int count) */ mh = rxr->rx_buffers[i].m_pack; mh->m_len = plen; - dopayload = IGB_CLEAN_PAYLOAD; + /* clear buf info for refresh */ + rxbuf->m_pack = NULL; } - /* - ** get_buf will overwrite the writeback - ** descriptor so save the VLAN tag now. - */ - vtag = le16toh(cur->wb.upper.vlan); - if (igb_get_buf(rxr, i, dopayload) != 0) { - ifp->if_iqdrops++; - /* - * We've dropped a frame due to lack of resources - * so we should drop entire multi-segmented - * frames until we encounter EOP. - */ - if ((staterr & E1000_RXD_STAT_EOP) != 0) - rxr->discard = TRUE; - igb_rx_discard(rxr, cur, i); - goto next_desc; - } + ++processed; /* So we know when to refresh */ /* Initial frame - setup */ if (rxr->fmp == NULL) { @@ -4300,14 +4284,14 @@ igb_rxeof(struct rx_ring *rxr, int count) if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) igb_rx_checksum(staterr, rxr->fmp, ptype); - /* XXX igb(4) always strips VLAN. */ + if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && (staterr & E1000_RXD_STAT_VP) != 0) { rxr->fmp->m_pkthdr.ether_vtag = vtag; rxr->fmp->m_flags |= M_VLANTAG; } #if __FreeBSD_version >= 800000 - rxr->fmp->m_pkthdr.flowid = curcpu; + rxr->fmp->m_pkthdr.flowid = que->msix; rxr->fmp->m_flags |= M_FLOWID; #endif sendmp = rxr->fmp; @@ -4321,31 +4305,30 @@ next_desc: bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - rxr->last_cleaned = i; /* For updating tail */ - /* Advance our pointers to the next descriptor. */ if (++i == adapter->num_rx_desc) i = 0; - /* - ** Note that we hold the RX lock thru - ** the following call so this ring's - ** next_to_check is not gonna change. + ** Send to the stack or LRO */ if (sendmp != NULL) igb_rx_input(rxr, ifp, sendmp, ptype); + + /* Every 8 descriptors we go to refresh mbufs */ + if (processed == 8) { + igb_refresh_mbufs(rxr, i); + processed = 0; + } } - if (prog == 0) { - IGB_RX_UNLOCK(rxr); - return (FALSE); + /* Catch any remainders */ + if (processed != 0) { + igb_refresh_mbufs(rxr, i); + processed = 0; } rxr->next_to_check = i; - /* Advance the E1000's Receive Queue "Tail Pointer". */ - E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned); - /* * Flush any outstanding LRO work */ diff --git a/sys/dev/e1000/if_igb.h b/sys/dev/e1000/if_igb.h index fbcf4a03f85..334c2899069 100644 --- a/sys/dev/e1000/if_igb.h +++ b/sys/dev/e1000/if_igb.h @@ -47,8 +47,8 @@ * desscriptors should meet the following condition. * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ -#define IGB_MIN_TXD 80 -#define IGB_DEFAULT_TXD 256 +#define IGB_MIN_TXD 256 +#define IGB_DEFAULT_TXD 1024 #define IGB_MAX_TXD 4096 /* @@ -62,8 +62,8 @@ * desscriptors should meet the following condition. * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 */ -#define IGB_MIN_RXD 80 -#define IGB_DEFAULT_RXD 256 +#define IGB_MIN_RXD 256 +#define IGB_DEFAULT_RXD 1024 #define IGB_MAX_RXD 4096 /* @@ -333,13 +333,11 @@ struct rx_ring { bool discard; struct mtx rx_mtx; char mtx_name[16]; - u32 last_cleaned; + u32 next_to_refresh; u32 next_to_check; struct igb_rx_buf *rx_buffers; - bus_dma_tag_t rx_htag; /* dma tag for rx head */ - bus_dmamap_t rx_hspare_map; - bus_dma_tag_t rx_ptag; /* dma tag for rx packet */ - bus_dmamap_t rx_pspare_map; + bus_dma_tag_t htag; /* dma tag for rx head */ + bus_dma_tag_t ptag; /* dma tag for rx packet */ /* * First/last mbuf pointers, for * collecting multisegment RX packets. @@ -468,8 +466,8 @@ struct igb_tx_buffer { struct igb_rx_buf { struct mbuf *m_head; struct mbuf *m_pack; - bus_dmamap_t head_map; /* bus_dma map for packet */ - bus_dmamap_t pack_map; /* bus_dma map for packet */ + bus_dmamap_t hmap; /* bus_dma map for header */ + bus_dmamap_t pmap; /* bus_dma map for packet */ }; #define IGB_CORE_LOCK_INIT(_sc, _name) \ diff --git a/sys/dev/e1000/if_lem.c b/sys/dev/e1000/if_lem.c new file mode 100644 index 00000000000..03a679d0ee7 --- /dev/null +++ b/sys/dev/e1000/if_lem.c @@ -0,0 +1,4680 @@ +/****************************************************************************** + + Copyright (c) 2001-2010, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ +/*$FreeBSD$*/ + +#ifdef HAVE_KERNEL_OPTION_HEADERS +#include "opt_device_polling.h" +#include "opt_inet.h" +#endif + +#include +#include +#if __FreeBSD_version >= 800000 +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if __FreeBSD_version >= 700029 +#include +#endif +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "e1000_api.h" +#include "if_lem.h" + +/********************************************************************* + * Set this to one to display debug statistics + *********************************************************************/ +int lem_display_debug_stats = 0; + +/********************************************************************* + * Legacy Em Driver version: + *********************************************************************/ +char lem_driver_version[] = "1.0.0"; + + +/********************************************************************* + * PCI Device ID Table + * + * Used by probe to select devices to load on + * Last field stores an index into e1000_strings + * Last entry must be all 0s + * + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } + *********************************************************************/ + +static em_vendor_info_t lem_vendor_info_array[] = +{ + /* Intel(R) PRO/1000 Network Connection */ + { 0x8086, E1000_DEV_ID_82540EM, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82540EM_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82540EP, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82540EP_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82540EP_LP, PCI_ANY_ID, PCI_ANY_ID, 0}, + + { 0x8086, E1000_DEV_ID_82541EI, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82541ER, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82541ER_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82541EI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82541GI, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82541GI_LF, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82541GI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0}, + + { 0x8086, E1000_DEV_ID_82542, PCI_ANY_ID, PCI_ANY_ID, 0}, + + { 0x8086, E1000_DEV_ID_82543GC_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82543GC_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, + + { 0x8086, E1000_DEV_ID_82544EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82544EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82544GC_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82544GC_LOM, PCI_ANY_ID, PCI_ANY_ID, 0}, + + { 0x8086, E1000_DEV_ID_82545EM_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82545EM_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82545GM_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82545GM_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82545GM_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, + + { 0x8086, E1000_DEV_ID_82546EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82546EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82546GB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82546GB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82546GB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82546GB_PCIE, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3, + PCI_ANY_ID, PCI_ANY_ID, 0}, + + { 0x8086, E1000_DEV_ID_82547EI, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82547EI_MOBILE, PCI_ANY_ID, PCI_ANY_ID, 0}, + { 0x8086, E1000_DEV_ID_82547GI, PCI_ANY_ID, PCI_ANY_ID, 0}, + /* required last entry */ + { 0, 0, 0, 0, 0} +}; + +/********************************************************************* + * Table of branding strings for all supported NICs. + *********************************************************************/ + +static char *lem_strings[] = { + "Intel(R) PRO/1000 Legacy Network Connection" +}; + +/********************************************************************* + * Function prototypes + *********************************************************************/ +static int lem_probe(device_t); +static int lem_attach(device_t); +static int lem_detach(device_t); +static int lem_shutdown(device_t); +static int lem_suspend(device_t); +static int lem_resume(device_t); +static void lem_start(struct ifnet *); +static void lem_start_locked(struct ifnet *ifp); +#if __FreeBSD_version >= 800000 +static int lem_mq_start(struct ifnet *, struct mbuf *); +static int lem_mq_start_locked(struct ifnet *, struct mbuf *); +static void lem_qflush(struct ifnet *); +#endif +static int lem_ioctl(struct ifnet *, u_long, caddr_t); +static void lem_init(void *); +static void lem_init_locked(struct adapter *); +static void lem_stop(void *); +static void lem_media_status(struct ifnet *, struct ifmediareq *); +static int lem_media_change(struct ifnet *); +static void lem_identify_hardware(struct adapter *); +static int lem_allocate_pci_resources(struct adapter *); +static int lem_allocate_irq(struct adapter *adapter); +static void lem_free_pci_resources(struct adapter *); +static void lem_local_timer(void *); +static int lem_hardware_init(struct adapter *); +static void lem_setup_interface(device_t, struct adapter *); +static void lem_setup_transmit_structures(struct adapter *); +static void lem_initialize_transmit_unit(struct adapter *); +static int lem_setup_receive_structures(struct adapter *); +static void lem_initialize_receive_unit(struct adapter *); +static void lem_enable_intr(struct adapter *); +static void lem_disable_intr(struct adapter *); +static void lem_free_transmit_structures(struct adapter *); +static void lem_free_receive_structures(struct adapter *); +static void lem_update_stats_counters(struct adapter *); +static void lem_txeof(struct adapter *); +static void lem_tx_purge(struct adapter *); +static int lem_allocate_receive_structures(struct adapter *); +static int lem_allocate_transmit_structures(struct adapter *); +static int lem_rxeof(struct adapter *, int); +#ifndef __NO_STRICT_ALIGNMENT +static int lem_fixup_rx(struct adapter *); +#endif +static void lem_receive_checksum(struct adapter *, struct e1000_rx_desc *, + struct mbuf *); +static void lem_transmit_checksum_setup(struct adapter *, struct mbuf *, + u32 *, u32 *); +static void lem_set_promisc(struct adapter *); +static void lem_disable_promisc(struct adapter *); +static void lem_set_multi(struct adapter *); +static void lem_print_hw_stats(struct adapter *); +static void lem_update_link_status(struct adapter *); +static int lem_get_buf(struct adapter *, int); +#if __FreeBSD_version >= 700029 +static void lem_register_vlan(void *, struct ifnet *, u16); +static void lem_unregister_vlan(void *, struct ifnet *, u16); +static void lem_setup_vlan_hw_support(struct adapter *); +#endif +static int lem_xmit(struct adapter *, struct mbuf **); +static void lem_smartspeed(struct adapter *); +static int lem_82547_fifo_workaround(struct adapter *, int); +static void lem_82547_update_fifo_head(struct adapter *, int); +static int lem_82547_tx_fifo_reset(struct adapter *); +static void lem_82547_move_tail(void *); +static int lem_dma_malloc(struct adapter *, bus_size_t, + struct em_dma_alloc *, int); +static void lem_dma_free(struct adapter *, struct em_dma_alloc *); +static void lem_print_debug_info(struct adapter *); +static void lem_print_nvm_info(struct adapter *); +static int lem_is_valid_ether_addr(u8 *); +static int lem_sysctl_stats(SYSCTL_HANDLER_ARGS); +static int lem_sysctl_debug_info(SYSCTL_HANDLER_ARGS); +static u32 lem_fill_descriptors (bus_addr_t address, u32 length, + PDESC_ARRAY desc_array); +static int lem_sysctl_int_delay(SYSCTL_HANDLER_ARGS); +static void lem_add_int_delay_sysctl(struct adapter *, const char *, + const char *, struct em_int_delay_info *, int, int); +/* Management and WOL Support */ +static void lem_init_manageability(struct adapter *); +static void lem_release_manageability(struct adapter *); +static void lem_get_hw_control(struct adapter *); +static void lem_release_hw_control(struct adapter *); +static void lem_get_wakeup(device_t); +static void lem_enable_wakeup(device_t); +static int lem_enable_phy_wakeup(struct adapter *); + +#ifdef EM_LEGACY_IRQ +static void lem_intr(void *); +#else /* FAST IRQ */ +#if __FreeBSD_version < 700000 +static void lem_irq_fast(void *); +#else +static int lem_irq_fast(void *); +#endif +static void lem_handle_rxtx(void *context, int pending); +static void lem_handle_link(void *context, int pending); +static void lem_add_rx_process_limit(struct adapter *, const char *, + const char *, int *, int); +#endif /* ~EM_LEGACY_IRQ */ + +#ifdef DEVICE_POLLING +static poll_handler_t em_poll; +#endif /* POLLING */ + +/********************************************************************* + * FreeBSD Device Interface Entry Points + *********************************************************************/ + +static device_method_t lem_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, lem_probe), + DEVMETHOD(device_attach, lem_attach), + DEVMETHOD(device_detach, lem_detach), + DEVMETHOD(device_shutdown, lem_shutdown), + DEVMETHOD(device_suspend, lem_suspend), + DEVMETHOD(device_resume, lem_resume), + {0, 0} +}; + +static driver_t lem_driver = { + "em", lem_methods, sizeof(struct adapter), +}; + +extern devclass_t em_devclass; +DRIVER_MODULE(lem, pci, lem_driver, em_devclass, 0, 0); +MODULE_DEPEND(lem, pci, 1, 1, 1); +MODULE_DEPEND(lem, ether, 1, 1, 1); + +/********************************************************************* + * Tunable default values. + *********************************************************************/ + +#define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) +#define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) +#define M_TSO_LEN 66 + +/* Allow common code without TSO */ +#ifndef CSUM_TSO +#define CSUM_TSO 0 +#endif + +static int lem_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); +static int lem_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); +static int lem_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); +static int lem_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); +static int lem_rxd = EM_DEFAULT_RXD; +static int lem_txd = EM_DEFAULT_TXD; +static int lem_smart_pwr_down = FALSE; + +/* Controls whether promiscuous also shows bad packets */ +static int lem_debug_sbp = FALSE; + +TUNABLE_INT("hw.em.tx_int_delay", &lem_tx_int_delay_dflt); +TUNABLE_INT("hw.em.rx_int_delay", &lem_rx_int_delay_dflt); +TUNABLE_INT("hw.em.tx_abs_int_delay", &lem_tx_abs_int_delay_dflt); +TUNABLE_INT("hw.em.rx_abs_int_delay", &lem_rx_abs_int_delay_dflt); +TUNABLE_INT("hw.em.rxd", &lem_rxd); +TUNABLE_INT("hw.em.txd", &lem_txd); +TUNABLE_INT("hw.em.smart_pwr_down", &lem_smart_pwr_down); +TUNABLE_INT("hw.em.sbp", &lem_debug_sbp); + +#ifndef EM_LEGACY_IRQ +/* How many packets rxeof tries to clean at a time */ +static int lem_rx_process_limit = 100; +TUNABLE_INT("hw.em.rx_process_limit", &lem_rx_process_limit); +#endif + +/* Flow control setting - default to FULL */ +static int lem_fc_setting = e1000_fc_full; +TUNABLE_INT("hw.em.fc_setting", &lem_fc_setting); + +/* +** Shadow VFTA table, this is needed because +** the real vlan filter table gets cleared during +** a soft reset and the driver needs to be able +** to repopulate it. +*/ +static u32 lem_shadow_vfta[EM_VFTA_SIZE]; + +/* Global used in WOL setup with multiport cards */ +static int global_quad_port_a = 0; + +/********************************************************************* + * Device identification routine + * + * em_probe determines if the driver should be loaded on + * adapter based on PCI vendor/device id of the adapter. + * + * return BUS_PROBE_DEFAULT on success, positive on failure + *********************************************************************/ + +static int +lem_probe(device_t dev) +{ + char adapter_name[60]; + u16 pci_vendor_id = 0; + u16 pci_device_id = 0; + u16 pci_subvendor_id = 0; + u16 pci_subdevice_id = 0; + em_vendor_info_t *ent; + + INIT_DEBUGOUT("em_probe: begin"); + + pci_vendor_id = pci_get_vendor(dev); + if (pci_vendor_id != EM_VENDOR_ID) + return (ENXIO); + + pci_device_id = pci_get_device(dev); + pci_subvendor_id = pci_get_subvendor(dev); + pci_subdevice_id = pci_get_subdevice(dev); + + ent = lem_vendor_info_array; + while (ent->vendor_id != 0) { + if ((pci_vendor_id == ent->vendor_id) && + (pci_device_id == ent->device_id) && + + ((pci_subvendor_id == ent->subvendor_id) || + (ent->subvendor_id == PCI_ANY_ID)) && + + ((pci_subdevice_id == ent->subdevice_id) || + (ent->subdevice_id == PCI_ANY_ID))) { + sprintf(adapter_name, "%s %s", + lem_strings[ent->index], + lem_driver_version); + device_set_desc_copy(dev, adapter_name); + return (BUS_PROBE_DEFAULT); + } + ent++; + } + + return (ENXIO); +} + +/********************************************************************* + * Device initialization routine + * + * The attach entry point is called when the driver is being loaded. + * This routine identifies the type of hardware, allocates all resources + * and initializes the hardware. + * + * return 0 on success, positive on failure + *********************************************************************/ + +static int +lem_attach(device_t dev) +{ + struct adapter *adapter; + int tsize, rsize; + int error = 0; + + INIT_DEBUGOUT("lem_attach: begin"); + + adapter = device_get_softc(dev); + adapter->dev = adapter->osdep.dev = dev; + EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); + EM_TX_LOCK_INIT(adapter, device_get_nameunit(dev)); + EM_RX_LOCK_INIT(adapter, device_get_nameunit(dev)); + + /* SYSCTL stuff */ + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, + lem_sysctl_debug_info, "I", "Debug Information"); + + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, + lem_sysctl_stats, "I", "Statistics"); + + callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); + callout_init_mtx(&adapter->tx_fifo_timer, &adapter->tx_mtx, 0); + + /* Determine hardware and mac info */ + lem_identify_hardware(adapter); + + /* Setup PCI resources */ + if (lem_allocate_pci_resources(adapter)) { + device_printf(dev, "Allocation of PCI resources failed\n"); + error = ENXIO; + goto err_pci; + } + + /* Do Shared Code initialization */ + if (e1000_setup_init_funcs(&adapter->hw, TRUE)) { + device_printf(dev, "Setup of Shared code failed\n"); + error = ENXIO; + goto err_pci; + } + + e1000_get_bus_info(&adapter->hw); + + /* Set up some sysctls for the tunable interrupt delays */ + lem_add_int_delay_sysctl(adapter, "rx_int_delay", + "receive interrupt delay in usecs", &adapter->rx_int_delay, + E1000_REGISTER(&adapter->hw, E1000_RDTR), lem_rx_int_delay_dflt); + lem_add_int_delay_sysctl(adapter, "tx_int_delay", + "transmit interrupt delay in usecs", &adapter->tx_int_delay, + E1000_REGISTER(&adapter->hw, E1000_TIDV), lem_tx_int_delay_dflt); + if (adapter->hw.mac.type >= e1000_82540) { + lem_add_int_delay_sysctl(adapter, "rx_abs_int_delay", + "receive interrupt delay limit in usecs", + &adapter->rx_abs_int_delay, + E1000_REGISTER(&adapter->hw, E1000_RADV), + lem_rx_abs_int_delay_dflt); + lem_add_int_delay_sysctl(adapter, "tx_abs_int_delay", + "transmit interrupt delay limit in usecs", + &adapter->tx_abs_int_delay, + E1000_REGISTER(&adapter->hw, E1000_TADV), + lem_tx_abs_int_delay_dflt); + } + +#ifndef EM_LEGACY_IRQ + /* Sysctls for limiting the amount of work done in the taskqueue */ + lem_add_rx_process_limit(adapter, "rx_processing_limit", + "max number of rx packets to process", &adapter->rx_process_limit, + lem_rx_process_limit); +#endif + + /* + * Validate number of transmit and receive descriptors. It + * must not exceed hardware maximum, and must be multiple + * of E1000_DBA_ALIGN. + */ + if (((lem_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 || + (adapter->hw.mac.type >= e1000_82544 && lem_txd > EM_MAX_TXD) || + (adapter->hw.mac.type < e1000_82544 && lem_txd > EM_MAX_TXD_82543) || + (lem_txd < EM_MIN_TXD)) { + device_printf(dev, "Using %d TX descriptors instead of %d!\n", + EM_DEFAULT_TXD, lem_txd); + adapter->num_tx_desc = EM_DEFAULT_TXD; + } else + adapter->num_tx_desc = lem_txd; + if (((lem_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 || + (adapter->hw.mac.type >= e1000_82544 && lem_rxd > EM_MAX_RXD) || + (adapter->hw.mac.type < e1000_82544 && lem_rxd > EM_MAX_RXD_82543) || + (lem_rxd < EM_MIN_RXD)) { + device_printf(dev, "Using %d RX descriptors instead of %d!\n", + EM_DEFAULT_RXD, lem_rxd); + adapter->num_rx_desc = EM_DEFAULT_RXD; + } else + adapter->num_rx_desc = lem_rxd; + + adapter->hw.mac.autoneg = DO_AUTO_NEG; + adapter->hw.phy.autoneg_wait_to_complete = FALSE; + adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; + adapter->rx_buffer_len = 2048; + + e1000_init_script_state_82541(&adapter->hw, TRUE); + e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE); + + /* Copper options */ + if (adapter->hw.phy.media_type == e1000_media_type_copper) { + adapter->hw.phy.mdix = AUTO_ALL_MODES; + adapter->hw.phy.disable_polarity_correction = FALSE; + adapter->hw.phy.ms_type = EM_MASTER_SLAVE; + } + + /* + * Set the frame limits assuming + * standard ethernet sized frames. + */ + adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; + adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE; + + /* + * This controls when hardware reports transmit completion + * status. + */ + adapter->hw.mac.report_tx_early = 1; + + tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc), + EM_DBA_ALIGN); + + /* Allocate Transmit Descriptor ring */ + if (lem_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) { + device_printf(dev, "Unable to allocate tx_desc memory\n"); + error = ENOMEM; + goto err_tx_desc; + } + adapter->tx_desc_base = + (struct e1000_tx_desc *)adapter->txdma.dma_vaddr; + + rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc), + EM_DBA_ALIGN); + + /* Allocate Receive Descriptor ring */ + if (lem_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) { + device_printf(dev, "Unable to allocate rx_desc memory\n"); + error = ENOMEM; + goto err_rx_desc; + } + adapter->rx_desc_base = + (struct e1000_rx_desc *)adapter->rxdma.dma_vaddr; + + /* + ** Start from a known state, this is + ** important in reading the nvm and + ** mac from that. + */ + e1000_reset_hw(&adapter->hw); + + /* Make sure we have a good EEPROM before we read from it */ + if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { + /* + ** Some PCI-E parts fail the first check due to + ** the link being in sleep state, call it again, + ** if it fails a second time its a real issue. + */ + if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { + device_printf(dev, + "The EEPROM Checksum Is Not Valid\n"); + error = EIO; + goto err_hw_init; + } + } + + /* Copy the permanent MAC address out of the EEPROM */ + if (e1000_read_mac_addr(&adapter->hw) < 0) { + device_printf(dev, "EEPROM read error while reading MAC" + " address\n"); + error = EIO; + goto err_hw_init; + } + + if (!lem_is_valid_ether_addr(adapter->hw.mac.addr)) { + device_printf(dev, "Invalid MAC address\n"); + error = EIO; + goto err_hw_init; + } + + /* Initialize the hardware */ + if (lem_hardware_init(adapter)) { + device_printf(dev, "Unable to initialize the hardware\n"); + error = EIO; + goto err_hw_init; + } + + /* Allocate transmit descriptors and buffers */ + if (lem_allocate_transmit_structures(adapter)) { + device_printf(dev, "Could not setup transmit structures\n"); + error = ENOMEM; + goto err_tx_struct; + } + + /* Allocate receive descriptors and buffers */ + if (lem_allocate_receive_structures(adapter)) { + device_printf(dev, "Could not setup receive structures\n"); + error = ENOMEM; + goto err_rx_struct; + } + + /* + ** Do interrupt configuration + */ + error = lem_allocate_irq(adapter); + if (error) + goto err_rx_struct; + + /* + * Get Wake-on-Lan and Management info for later use + */ + lem_get_wakeup(dev); + + /* Setup OS specific network interface */ + lem_setup_interface(dev, adapter); + + /* Initialize statistics */ + lem_update_stats_counters(adapter); + + adapter->hw.mac.get_link_status = 1; + lem_update_link_status(adapter); + + /* Indicate SOL/IDER usage */ + if (e1000_check_reset_block(&adapter->hw)) + device_printf(dev, + "PHY reset is blocked due to SOL/IDER session.\n"); + + /* Do we need workaround for 82544 PCI-X adapter? */ + if (adapter->hw.bus.type == e1000_bus_type_pcix && + adapter->hw.mac.type == e1000_82544) + adapter->pcix_82544 = TRUE; + else + adapter->pcix_82544 = FALSE; + +#if __FreeBSD_version >= 700029 + /* Register for VLAN events */ + adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, + lem_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); + adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, + lem_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); +#endif + + /* Non-AMT based hardware can now take control from firmware */ + if (adapter->has_manage && !adapter->has_amt) + lem_get_hw_control(adapter); + + /* Tell the stack that the interface is not active */ + adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); + + INIT_DEBUGOUT("lem_attach: end"); + + return (0); + +err_rx_struct: + lem_free_transmit_structures(adapter); +err_tx_struct: +err_hw_init: + lem_release_hw_control(adapter); + lem_dma_free(adapter, &adapter->rxdma); +err_rx_desc: + lem_dma_free(adapter, &adapter->txdma); +err_tx_desc: +err_pci: + lem_free_pci_resources(adapter); + EM_TX_LOCK_DESTROY(adapter); + EM_RX_LOCK_DESTROY(adapter); + EM_CORE_LOCK_DESTROY(adapter); + + return (error); +} + +/********************************************************************* + * Device removal routine + * + * The detach entry point is called when the driver is being removed. + * This routine stops the adapter and deallocates all the resources + * that were allocated for driver operation. + * + * return 0 on success, positive on failure + *********************************************************************/ + +static int +lem_detach(device_t dev) +{ + struct adapter *adapter = device_get_softc(dev); + struct ifnet *ifp = adapter->ifp; + + INIT_DEBUGOUT("em_detach: begin"); + + /* Make sure VLANS are not using driver */ +#if __FreeBSD_version >= 700000 + if (adapter->ifp->if_vlantrunk != NULL) { +#else + if (adapter->ifp->if_nvlans != 0) { +#endif + device_printf(dev,"Vlan in use, detach first\n"); + return (EBUSY); + } + +#ifdef DEVICE_POLLING + if (ifp->if_capenable & IFCAP_POLLING) + ether_poll_deregister(ifp); +#endif + + EM_CORE_LOCK(adapter); + EM_TX_LOCK(adapter); + adapter->in_detach = 1; + lem_stop(adapter); + e1000_phy_hw_reset(&adapter->hw); + + lem_release_manageability(adapter); + + EM_TX_UNLOCK(adapter); + EM_CORE_UNLOCK(adapter); + +#if __FreeBSD_version >= 700029 + /* Unregister VLAN events */ + if (adapter->vlan_attach != NULL) + EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); + if (adapter->vlan_detach != NULL) + EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); +#endif + + ether_ifdetach(adapter->ifp); + callout_drain(&adapter->timer); + callout_drain(&adapter->tx_fifo_timer); + + lem_free_pci_resources(adapter); + bus_generic_detach(dev); + if_free(ifp); + + lem_free_transmit_structures(adapter); + lem_free_receive_structures(adapter); + + /* Free Transmit Descriptor ring */ + if (adapter->tx_desc_base) { + lem_dma_free(adapter, &adapter->txdma); + adapter->tx_desc_base = NULL; + } + + /* Free Receive Descriptor ring */ + if (adapter->rx_desc_base) { + lem_dma_free(adapter, &adapter->rxdma); + adapter->rx_desc_base = NULL; + } + + lem_release_hw_control(adapter); + EM_TX_LOCK_DESTROY(adapter); + EM_RX_LOCK_DESTROY(adapter); + EM_CORE_LOCK_DESTROY(adapter); + + return (0); +} + +/********************************************************************* + * + * Shutdown entry point + * + **********************************************************************/ + +static int +lem_shutdown(device_t dev) +{ + return lem_suspend(dev); +} + +/* + * Suspend/resume device methods. + */ +static int +lem_suspend(device_t dev) +{ + struct adapter *adapter = device_get_softc(dev); + + EM_CORE_LOCK(adapter); + + lem_release_manageability(adapter); + lem_release_hw_control(adapter); + lem_enable_wakeup(dev); + + EM_CORE_UNLOCK(adapter); + + return bus_generic_suspend(dev); +} + +static int +lem_resume(device_t dev) +{ + struct adapter *adapter = device_get_softc(dev); + struct ifnet *ifp = adapter->ifp; + + EM_CORE_LOCK(adapter); + lem_init_locked(adapter); + lem_init_manageability(adapter); + EM_CORE_UNLOCK(adapter); + lem_start(ifp); + + return bus_generic_resume(dev); +} + + +/********************************************************************* + * Transmit entry point + * + * em_start is called by the stack to initiate a transmit. + * The driver will remain in this routine as long as there are + * packets to transmit and transmit resources are available. + * In case resources are not available stack is notified and + * the packet is requeued. + **********************************************************************/ + +#if __FreeBSD_version >= 800000 +static int +lem_mq_start_locked(struct ifnet *ifp, struct mbuf *m) +{ + struct adapter *adapter = ifp->if_softc; + struct mbuf *next; + int error = E1000_SUCCESS; + + EM_TX_LOCK_ASSERT(adapter); + /* To allow being called from a tasklet */ + if (m == NULL) + goto process; + + if (((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + || (!adapter->link_active)) { + error = drbr_enqueue(ifp, adapter->br, m); + return (error); + } else if (drbr_empty(ifp, adapter->br) && + (adapter->num_tx_desc_avail > EM_TX_OP_THRESHOLD)) { + if ((error = lem_xmit(adapter, &m)) != 0) { + if (m) + error = drbr_enqueue(ifp, adapter->br, m); + return (error); + } else { + /* + * We've bypassed the buf ring so we need to update + * ifp directly + */ + drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags); + /* + ** Send a copy of the frame to the BPF + ** listener and set the watchdog on. + */ + ETHER_BPF_MTAP(ifp, m); + adapter->watchdog_check = TRUE; + } + } else if ((error = drbr_enqueue(ifp, adapter->br, m)) != 0) + return (error); + +process: + if (drbr_empty(ifp, adapter->br)) + return(error); + /* Process the queue */ + while (TRUE) { + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + break; + next = drbr_dequeue(ifp, adapter->br); + if (next == NULL) + break; + if ((error = lem_xmit(adapter, &next)) != 0) { + if (next != NULL) + error = drbr_enqueue(ifp, adapter->br, next); + break; + } + drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags); + ETHER_BPF_MTAP(ifp, next); + /* Set the watchdog */ + adapter->watchdog_check = TRUE; + } + + if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + + return (error); +} + +/* +** Multiqueue capable stack interface, this is not +** yet truely multiqueue, but that is coming... +*/ +static int +lem_mq_start(struct ifnet *ifp, struct mbuf *m) +{ + + struct adapter *adapter = ifp->if_softc; + int error = 0; + + if (EM_TX_TRYLOCK(adapter)) { + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + error = lem_mq_start_locked(ifp, m); + EM_TX_UNLOCK(adapter); + } else + error = drbr_enqueue(ifp, adapter->br, m); + + return (error); +} + +static void +lem_qflush(struct ifnet *ifp) +{ + struct mbuf *m; + struct adapter *adapter = (struct adapter *)ifp->if_softc; + + EM_TX_LOCK(adapter); + while ((m = buf_ring_dequeue_sc(adapter->br)) != NULL) + m_freem(m); + if_qflush(ifp); + EM_TX_UNLOCK(adapter); +} +#endif /* FreeBSD_version */ + +static void +lem_start_locked(struct ifnet *ifp) +{ + struct adapter *adapter = ifp->if_softc; + struct mbuf *m_head; + + EM_TX_LOCK_ASSERT(adapter); + + if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return; + if (!adapter->link_active) + return; + + while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { + + IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); + if (m_head == NULL) + break; + /* + * Encapsulation can modify our pointer, and or make it + * NULL on failure. In that event, we can't requeue. + */ + if (lem_xmit(adapter, &m_head)) { + if (m_head == NULL) + break; + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + IFQ_DRV_PREPEND(&ifp->if_snd, m_head); + break; + } + + /* Send a copy of the frame to the BPF listener */ + ETHER_BPF_MTAP(ifp, m_head); + + /* Set timeout in case hardware has problems transmitting. */ + adapter->watchdog_check = TRUE; + } + if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + + return; +} + +static void +lem_start(struct ifnet *ifp) +{ + struct adapter *adapter = ifp->if_softc; + + EM_TX_LOCK(adapter); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + lem_start_locked(ifp); + EM_TX_UNLOCK(adapter); +} + +/********************************************************************* + * Ioctl entry point + * + * em_ioctl is called when the user wants to configure the + * interface. + * + * return 0 on success, positive on failure + **********************************************************************/ + +static int +lem_ioctl(struct ifnet *ifp, u_long command, caddr_t data) +{ + struct adapter *adapter = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *)data; +#ifdef INET + struct ifaddr *ifa = (struct ifaddr *)data; +#endif + int error = 0; + + if (adapter->in_detach) + return (error); + + switch (command) { + case SIOCSIFADDR: +#ifdef INET + if (ifa->ifa_addr->sa_family == AF_INET) { + /* + * XXX + * Since resetting hardware takes a very long time + * and results in link renegotiation we only + * initialize the hardware only when it is absolutely + * required. + */ + ifp->if_flags |= IFF_UP; + if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { + EM_CORE_LOCK(adapter); + lem_init_locked(adapter); + EM_CORE_UNLOCK(adapter); + } + arp_ifinit(ifp, ifa); + } else +#endif + error = ether_ioctl(ifp, command, data); + break; + case SIOCSIFMTU: + { + int max_frame_size; + + IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); + + EM_CORE_LOCK(adapter); + switch (adapter->hw.mac.type) { + case e1000_82542: + max_frame_size = ETHER_MAX_LEN; + break; + default: + max_frame_size = MAX_JUMBO_FRAME_SIZE; + } + if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - + ETHER_CRC_LEN) { + EM_CORE_UNLOCK(adapter); + error = EINVAL; + break; + } + + ifp->if_mtu = ifr->ifr_mtu; + adapter->max_frame_size = + ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; + lem_init_locked(adapter); + EM_CORE_UNLOCK(adapter); + break; + } + case SIOCSIFFLAGS: + IOCTL_DEBUGOUT("ioctl rcv'd:\ + SIOCSIFFLAGS (Set Interface Flags)"); + EM_CORE_LOCK(adapter); + if (ifp->if_flags & IFF_UP) { + if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { + if ((ifp->if_flags ^ adapter->if_flags) & + (IFF_PROMISC | IFF_ALLMULTI)) { + lem_disable_promisc(adapter); + lem_set_promisc(adapter); + } + } else + lem_init_locked(adapter); + } else + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + EM_TX_LOCK(adapter); + lem_stop(adapter); + EM_TX_UNLOCK(adapter); + } + adapter->if_flags = ifp->if_flags; + EM_CORE_UNLOCK(adapter); + break; + case SIOCADDMULTI: + case SIOCDELMULTI: + IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + EM_CORE_LOCK(adapter); + lem_disable_intr(adapter); + lem_set_multi(adapter); + if (adapter->hw.mac.type == e1000_82542 && + adapter->hw.revision_id == E1000_REVISION_2) { + lem_initialize_receive_unit(adapter); + } +#ifdef DEVICE_POLLING + if (!(ifp->if_capenable & IFCAP_POLLING)) +#endif + lem_enable_intr(adapter); + EM_CORE_UNLOCK(adapter); + } + break; + case SIOCSIFMEDIA: + /* Check SOL/IDER usage */ + EM_CORE_LOCK(adapter); + if (e1000_check_reset_block(&adapter->hw)) { + EM_CORE_UNLOCK(adapter); + device_printf(adapter->dev, "Media change is" + " blocked due to SOL/IDER session.\n"); + break; + } + EM_CORE_UNLOCK(adapter); + case SIOCGIFMEDIA: + IOCTL_DEBUGOUT("ioctl rcv'd: \ + SIOCxIFMEDIA (Get/Set Interface Media)"); + error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); + break; + case SIOCSIFCAP: + { + int mask, reinit; + + IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); + reinit = 0; + mask = ifr->ifr_reqcap ^ ifp->if_capenable; +#ifdef DEVICE_POLLING + if (mask & IFCAP_POLLING) { + if (ifr->ifr_reqcap & IFCAP_POLLING) { + error = ether_poll_register(lem_poll, ifp); + if (error) + return (error); + EM_CORE_LOCK(adapter); + lem_disable_intr(adapter); + ifp->if_capenable |= IFCAP_POLLING; + EM_CORE_UNLOCK(adapter); + } else { + error = ether_poll_deregister(ifp); + /* Enable interrupt even in error case */ + EM_CORE_LOCK(adapter); + lem_enable_intr(adapter); + ifp->if_capenable &= ~IFCAP_POLLING; + EM_CORE_UNLOCK(adapter); + } + } +#endif + if (mask & IFCAP_HWCSUM) { + ifp->if_capenable ^= IFCAP_HWCSUM; + reinit = 1; + } +#if __FreeBSD_version >= 700000 + if (mask & IFCAP_TSO4) { + ifp->if_capenable ^= IFCAP_TSO4; + reinit = 1; + } +#endif + if (mask & IFCAP_VLAN_HWTAGGING) { + ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; + reinit = 1; + } + if ((mask & IFCAP_WOL) && + (ifp->if_capabilities & IFCAP_WOL) != 0) { + if (mask & IFCAP_WOL_MCAST) + ifp->if_capenable ^= IFCAP_WOL_MCAST; + if (mask & IFCAP_WOL_MAGIC) + ifp->if_capenable ^= IFCAP_WOL_MAGIC; + } + if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) + lem_init(adapter); +#if __FreeBSD_version >= 700000 + VLAN_CAPABILITIES(ifp); +#endif + break; + } + + default: + error = ether_ioctl(ifp, command, data); + break; + } + + return (error); +} + + +/********************************************************************* + * Init entry point + * + * This routine is used in two ways. It is used by the stack as + * init entry point in network interface structure. It is also used + * by the driver as a hw/sw initialization routine to get to a + * consistent state. + * + * return 0 on success, positive on failure + **********************************************************************/ + +static void +lem_init_locked(struct adapter *adapter) +{ + struct ifnet *ifp = adapter->ifp; + device_t dev = adapter->dev; + u32 pba; + + INIT_DEBUGOUT("lem_init: begin"); + + EM_CORE_LOCK_ASSERT(adapter); + + EM_TX_LOCK(adapter); + lem_stop(adapter); + EM_TX_UNLOCK(adapter); + + /* + * Packet Buffer Allocation (PBA) + * Writing PBA sets the receive portion of the buffer + * the remainder is used for the transmit buffer. + * + * Devices before the 82547 had a Packet Buffer of 64K. + * Default allocation: PBA=48K for Rx, leaving 16K for Tx. + * After the 82547 the buffer was reduced to 40K. + * Default allocation: PBA=30K for Rx, leaving 10K for Tx. + * Note: default does not leave enough room for Jumbo Frame >10k. + */ + switch (adapter->hw.mac.type) { + case e1000_82547: + case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */ + if (adapter->max_frame_size > 8192) + pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */ + else + pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */ + adapter->tx_fifo_head = 0; + adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT; + adapter->tx_fifo_size = + (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT; + break; + default: + /* Devices before 82547 had a Packet Buffer of 64K. */ + if (adapter->max_frame_size > 8192) + pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ + else + pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ + } + + INIT_DEBUGOUT1("lem_init: pba=%dK",pba); + E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); + + /* Get the latest mac address, User can use a LAA */ + bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr, + ETHER_ADDR_LEN); + + /* Put the address into the Receive Address Array */ + e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); + + /* Initialize the hardware */ + if (lem_hardware_init(adapter)) { + device_printf(dev, "Unable to initialize the hardware\n"); + return; + } + lem_update_link_status(adapter); + + /* Setup VLAN support, basic and offload if available */ + E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); + +#if __FreeBSD_version < 700029 + if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { + u32 ctrl; + ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); + ctrl |= E1000_CTRL_VME; + E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); + } +#else + /* Use real VLAN Filter support */ + lem_setup_vlan_hw_support(adapter); +#endif + + /* Set hardware offload abilities */ + ifp->if_hwassist = 0; + if (adapter->hw.mac.type >= e1000_82543) { + if (ifp->if_capenable & IFCAP_TXCSUM) + ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); +#if __FreeBSD_version >= 700000 + if (ifp->if_capenable & IFCAP_TSO4) + ifp->if_hwassist |= CSUM_TSO; +#endif + } + + /* Configure for OS presence */ + lem_init_manageability(adapter); + + /* Prepare transmit descriptors and buffers */ + lem_setup_transmit_structures(adapter); + lem_initialize_transmit_unit(adapter); + + /* Setup Multicast table */ + lem_set_multi(adapter); + + /* Prepare receive descriptors and buffers */ + if (lem_setup_receive_structures(adapter)) { + device_printf(dev, "Could not setup receive structures\n"); + EM_TX_LOCK(adapter); + lem_stop(adapter); + EM_TX_UNLOCK(adapter); + return; + } + lem_initialize_receive_unit(adapter); + + /* Don't lose promiscuous settings */ + lem_set_promisc(adapter); + + ifp->if_drv_flags |= IFF_DRV_RUNNING; + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + + callout_reset(&adapter->timer, hz, lem_local_timer, adapter); + e1000_clear_hw_cntrs_base_generic(&adapter->hw); + + /* MSI/X configuration for 82574 */ + if (adapter->hw.mac.type == e1000_82574) { + int tmp; + tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); + tmp |= E1000_CTRL_EXT_PBA_CLR; + E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); + /* + ** Set the IVAR - interrupt vector routing. + ** Each nibble represents a vector, high bit + ** is enable, other 3 bits are the MSIX table + ** entry, we map RXQ0 to 0, TXQ0 to 1, and + ** Link (other) to 2, hence the magic number. + */ + E1000_WRITE_REG(&adapter->hw, E1000_IVAR, 0x800A0908); + } + +#ifdef DEVICE_POLLING + /* + * Only enable interrupts if we are not polling, make sure + * they are off otherwise. + */ + if (ifp->if_capenable & IFCAP_POLLING) + lem_disable_intr(adapter); + else +#endif /* DEVICE_POLLING */ + lem_enable_intr(adapter); + + /* AMT based hardware can now take control from firmware */ + if (adapter->has_manage && adapter->has_amt) + lem_get_hw_control(adapter); + + /* Don't reset the phy next time init gets called */ + adapter->hw.phy.reset_disable = TRUE; +} + +static void +lem_init(void *arg) +{ + struct adapter *adapter = arg; + + EM_CORE_LOCK(adapter); + lem_init_locked(adapter); + EM_CORE_UNLOCK(adapter); +} + + +#ifdef DEVICE_POLLING +/********************************************************************* + * + * Legacy polling routine + * + *********************************************************************/ +static int +lem_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) +{ + struct adapter *adapter = ifp->if_softc; + u32 reg_icr, rx_done = 0; + + EM_CORE_LOCK(adapter); + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { + EM_CORE_UNLOCK(adapter); + return (rx_done); + } + + if (cmd == POLL_AND_CHECK_STATUS) { + reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); + if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { + callout_stop(&adapter->timer); + adapter->hw.mac.get_link_status = 1; + lem_update_link_status(adapter); + callout_reset(&adapter->timer, hz, + lem_local_timer, adapter); + } + } + EM_CORE_UNLOCK(adapter); + + rx_done = lem_rxeof(adapter, count); + + EM_TX_LOCK(adapter); + lem_txeof(adapter); +#if __FreeBSD_version >= 800000 + if (!drbr_empty(ifp, adapter->br)) + lem_mq_start_locked(ifp, NULL); +#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + lem_start_locked(ifp); +#endif + EM_TX_UNLOCK(adapter); + return (rx_done); +} +#endif /* DEVICE_POLLING */ + +#ifdef EM_LEGACY_IRQ +/********************************************************************* + * + * Legacy Interrupt Service routine + * + *********************************************************************/ + +static void +lem_intr(void *arg) +{ + struct adapter *adapter = arg; + struct ifnet *ifp = adapter->ifp; + u32 reg_icr; + + + if (ifp->if_capenable & IFCAP_POLLING) + return; + + EM_CORE_LOCK(adapter); + reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); + if (reg_icr & E1000_ICR_RXO) + adapter->rx_overruns++; + + if ((reg_icr == 0xffffffff) || (reg_icr == 0)) + goto out; + + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + goto out; + + if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { + callout_stop(&adapter->timer); + adapter->hw.mac.get_link_status = 1; + lem_update_link_status(adapter); + /* Deal with TX cruft when link lost */ + lem_tx_purge(adapter); + callout_reset(&adapter->timer, hz, + lem_local_timer, adapter); + goto out; + } + + EM_TX_LOCK(adapter); + lem_txeof(adapter); + lem_rxeof(adapter, -1); + lem_txeof(adapter); + if (ifp->if_drv_flags & IFF_DRV_RUNNING && + !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + lem_start_locked(ifp); + EM_TX_UNLOCK(adapter); + +out: + EM_CORE_UNLOCK(adapter); + return; +} + +#else /* EM_FAST_IRQ, then fast interrupt routines only */ + +static void +lem_handle_link(void *context, int pending) +{ + struct adapter *adapter = context; + struct ifnet *ifp = adapter->ifp; + + if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) + return; + + EM_CORE_LOCK(adapter); + callout_stop(&adapter->timer); + lem_update_link_status(adapter); + /* Deal with TX cruft when link lost */ + lem_tx_purge(adapter); + callout_reset(&adapter->timer, hz, lem_local_timer, adapter); + EM_CORE_UNLOCK(adapter); +} + + +/* Combined RX/TX handler, used by Legacy and MSI */ +static void +lem_handle_rxtx(void *context, int pending) +{ + struct adapter *adapter = context; + struct ifnet *ifp = adapter->ifp; + + + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + if (lem_rxeof(adapter, adapter->rx_process_limit) != 0) + taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); + EM_TX_LOCK(adapter); + lem_txeof(adapter); + +#if __FreeBSD_version >= 800000 + if (!drbr_empty(ifp, adapter->br)) + lem_mq_start_locked(ifp, NULL); +#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + lem_start_locked(ifp); +#endif + EM_TX_UNLOCK(adapter); + } + + lem_enable_intr(adapter); +} + +/********************************************************************* + * + * Fast Legacy/MSI Combined Interrupt Service routine + * + *********************************************************************/ +#if __FreeBSD_version < 700000 +#define FILTER_STRAY +#define FILTER_HANDLED +static void +#else +static int +#endif +lem_irq_fast(void *arg) +{ + struct adapter *adapter = arg; + struct ifnet *ifp; + u32 reg_icr; + + ifp = adapter->ifp; + + reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); + + /* Hot eject? */ + if (reg_icr == 0xffffffff) + return FILTER_STRAY; + + /* Definitely not our interrupt. */ + if (reg_icr == 0x0) + return FILTER_STRAY; + + /* + * Mask interrupts until the taskqueue is finished running. This is + * cheap, just assume that it is needed. This also works around the + * MSI message reordering errata on certain systems. + */ + lem_disable_intr(adapter); + taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); + + /* Link status change */ + if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { + adapter->hw.mac.get_link_status = 1; + taskqueue_enqueue(taskqueue_fast, &adapter->link_task); + } + + if (reg_icr & E1000_ICR_RXO) + adapter->rx_overruns++; + return FILTER_HANDLED; +} +#endif /* ~EM_LEGACY_IRQ */ + + +/********************************************************************* + * + * Media Ioctl callback + * + * This routine is called whenever the user queries the status of + * the interface using ifconfig. + * + **********************************************************************/ +static void +lem_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) +{ + struct adapter *adapter = ifp->if_softc; + u_char fiber_type = IFM_1000_SX; + + INIT_DEBUGOUT("lem_media_status: begin"); + + EM_CORE_LOCK(adapter); + lem_update_link_status(adapter); + + ifmr->ifm_status = IFM_AVALID; + ifmr->ifm_active = IFM_ETHER; + + if (!adapter->link_active) { + EM_CORE_UNLOCK(adapter); + return; + } + + ifmr->ifm_status |= IFM_ACTIVE; + + if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || + (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { + if (adapter->hw.mac.type == e1000_82545) + fiber_type = IFM_1000_LX; + ifmr->ifm_active |= fiber_type | IFM_FDX; + } else { + switch (adapter->link_speed) { + case 10: + ifmr->ifm_active |= IFM_10_T; + break; + case 100: + ifmr->ifm_active |= IFM_100_TX; + break; + case 1000: + ifmr->ifm_active |= IFM_1000_T; + break; + } + if (adapter->link_duplex == FULL_DUPLEX) + ifmr->ifm_active |= IFM_FDX; + else + ifmr->ifm_active |= IFM_HDX; + } + EM_CORE_UNLOCK(adapter); +} + +/********************************************************************* + * + * Media Ioctl callback + * + * This routine is called when the user changes speed/duplex using + * media/mediopt option with ifconfig. + * + **********************************************************************/ +static int +lem_media_change(struct ifnet *ifp) +{ + struct adapter *adapter = ifp->if_softc; + struct ifmedia *ifm = &adapter->media; + + INIT_DEBUGOUT("lem_media_change: begin"); + + if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) + return (EINVAL); + + EM_CORE_LOCK(adapter); + switch (IFM_SUBTYPE(ifm->ifm_media)) { + case IFM_AUTO: + adapter->hw.mac.autoneg = DO_AUTO_NEG; + adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; + break; + case IFM_1000_LX: + case IFM_1000_SX: + case IFM_1000_T: + adapter->hw.mac.autoneg = DO_AUTO_NEG; + adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; + break; + case IFM_100_TX: + adapter->hw.mac.autoneg = FALSE; + adapter->hw.phy.autoneg_advertised = 0; + if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) + adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; + else + adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; + break; + case IFM_10_T: + adapter->hw.mac.autoneg = FALSE; + adapter->hw.phy.autoneg_advertised = 0; + if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) + adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; + else + adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; + break; + default: + device_printf(adapter->dev, "Unsupported media type\n"); + } + + /* As the speed/duplex settings my have changed we need to + * reset the PHY. + */ + adapter->hw.phy.reset_disable = FALSE; + + lem_init_locked(adapter); + EM_CORE_UNLOCK(adapter); + + return (0); +} + +/********************************************************************* + * + * This routine maps the mbufs to tx descriptors. + * + * return 0 on success, positive on failure + **********************************************************************/ + +static int +lem_xmit(struct adapter *adapter, struct mbuf **m_headp) +{ + bus_dma_segment_t segs[EM_MAX_SCATTER]; + bus_dmamap_t map; + struct em_buffer *tx_buffer, *tx_buffer_mapped; + struct e1000_tx_desc *ctxd = NULL; + struct mbuf *m_head; + u32 txd_upper, txd_lower, txd_used, txd_saved; + int error, nsegs, i, j, first, last = 0; +#if __FreeBSD_version < 700000 + struct m_tag *mtag; +#endif + m_head = *m_headp; + txd_upper = txd_lower = txd_used = txd_saved = 0; + + /* + * Force a cleanup if number of TX descriptors + * available hits the threshold + */ + if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) { + lem_txeof(adapter); + /* Now do we at least have a minimal? */ + if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) { + adapter->no_tx_desc_avail1++; + return (ENOBUFS); + } + } + + /* + * Map the packet for DMA + * + * Capture the first descriptor index, + * this descriptor will have the index + * of the EOP which is the only one that + * now gets a DONE bit writeback. + */ + first = adapter->next_avail_tx_desc; + tx_buffer = &adapter->tx_buffer_area[first]; + tx_buffer_mapped = tx_buffer; + map = tx_buffer->map; + + error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, + *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); + + /* + * There are two types of errors we can (try) to handle: + * - EFBIG means the mbuf chain was too long and bus_dma ran + * out of segments. Defragment the mbuf chain and try again. + * - ENOMEM means bus_dma could not obtain enough bounce buffers + * at this point in time. Defer sending and try again later. + * All other errors, in particular EINVAL, are fatal and prevent the + * mbuf chain from ever going through. Drop it and report error. + */ + if (error == EFBIG) { + struct mbuf *m; + + m = m_defrag(*m_headp, M_DONTWAIT); + if (m == NULL) { + adapter->mbuf_alloc_failed++; + m_freem(*m_headp); + *m_headp = NULL; + return (ENOBUFS); + } + *m_headp = m; + + /* Try it again */ + error = bus_dmamap_load_mbuf_sg(adapter->txtag, map, + *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); + + if (error) { + adapter->no_tx_dma_setup++; + m_freem(*m_headp); + *m_headp = NULL; + return (error); + } + } else if (error != 0) { + adapter->no_tx_dma_setup++; + return (error); + } + + if (nsegs > (adapter->num_tx_desc_avail - 2)) { + adapter->no_tx_desc_avail2++; + bus_dmamap_unload(adapter->txtag, map); + return (ENOBUFS); + } + m_head = *m_headp; + + /* Do hardware assists */ + if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) + lem_transmit_checksum_setup(adapter, m_head, + &txd_upper, &txd_lower); + + i = adapter->next_avail_tx_desc; + if (adapter->pcix_82544) + txd_saved = i; + + /* Set up our transmit descriptors */ + for (j = 0; j < nsegs; j++) { + bus_size_t seg_len; + bus_addr_t seg_addr; + /* If adapter is 82544 and on PCIX bus */ + if(adapter->pcix_82544) { + DESC_ARRAY desc_array; + u32 array_elements, counter; + /* + * Check the Address and Length combination and + * split the data accordingly + */ + array_elements = lem_fill_descriptors(segs[j].ds_addr, + segs[j].ds_len, &desc_array); + for (counter = 0; counter < array_elements; counter++) { + if (txd_used == adapter->num_tx_desc_avail) { + adapter->next_avail_tx_desc = txd_saved; + adapter->no_tx_desc_avail2++; + bus_dmamap_unload(adapter->txtag, map); + return (ENOBUFS); + } + tx_buffer = &adapter->tx_buffer_area[i]; + ctxd = &adapter->tx_desc_base[i]; + ctxd->buffer_addr = htole64( + desc_array.descriptor[counter].address); + ctxd->lower.data = htole32( + (adapter->txd_cmd | txd_lower | (u16) + desc_array.descriptor[counter].length)); + ctxd->upper.data = + htole32((txd_upper)); + last = i; + if (++i == adapter->num_tx_desc) + i = 0; + tx_buffer->m_head = NULL; + tx_buffer->next_eop = -1; + txd_used++; + } + } else { + tx_buffer = &adapter->tx_buffer_area[i]; + ctxd = &adapter->tx_desc_base[i]; + seg_addr = segs[j].ds_addr; + seg_len = segs[j].ds_len; + ctxd->buffer_addr = htole64(seg_addr); + ctxd->lower.data = htole32( + adapter->txd_cmd | txd_lower | seg_len); + ctxd->upper.data = + htole32(txd_upper); + last = i; + if (++i == adapter->num_tx_desc) + i = 0; + tx_buffer->m_head = NULL; + tx_buffer->next_eop = -1; + } + } + + adapter->next_avail_tx_desc = i; + + if (adapter->pcix_82544) + adapter->num_tx_desc_avail -= txd_used; + else + adapter->num_tx_desc_avail -= nsegs; + + /* + ** Handle VLAN tag, this is the + ** biggest difference between + ** 6.x and 7 + */ +#if __FreeBSD_version < 700000 + /* Find out if we are in vlan mode. */ + mtag = VLAN_OUTPUT_TAG(ifp, m_head); + if (mtag != NULL) { + ctxd->upper.fields.special = + htole16(VLAN_TAG_VALUE(mtag)); +#else /* FreeBSD 7 */ + if (m_head->m_flags & M_VLANTAG) { + /* Set the vlan id. */ + ctxd->upper.fields.special = + htole16(m_head->m_pkthdr.ether_vtag); +#endif + /* Tell hardware to add tag */ + ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE); + } + + tx_buffer->m_head = m_head; + tx_buffer_mapped->map = tx_buffer->map; + tx_buffer->map = map; + bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE); + + /* + * Last Descriptor of Packet + * needs End Of Packet (EOP) + * and Report Status (RS) + */ + ctxd->lower.data |= + htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); + /* + * Keep track in the first buffer which + * descriptor will be written back + */ + tx_buffer = &adapter->tx_buffer_area[first]; + tx_buffer->next_eop = last; + + /* + * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 + * that this frame is available to transmit. + */ + bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + if (adapter->hw.mac.type == e1000_82547 && + adapter->link_duplex == HALF_DUPLEX) + lem_82547_move_tail(adapter); + else { + E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), i); + if (adapter->hw.mac.type == e1000_82547) + lem_82547_update_fifo_head(adapter, + m_head->m_pkthdr.len); + } + + return (0); +} + +/********************************************************************* + * + * 82547 workaround to avoid controller hang in half-duplex environment. + * The workaround is to avoid queuing a large packet that would span + * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers + * in this case. We do that only when FIFO is quiescent. + * + **********************************************************************/ +static void +lem_82547_move_tail(void *arg) +{ + struct adapter *adapter = arg; + struct e1000_tx_desc *tx_desc; + u16 hw_tdt, sw_tdt, length = 0; + bool eop = 0; + + EM_TX_LOCK_ASSERT(adapter); + + hw_tdt = E1000_READ_REG(&adapter->hw, E1000_TDT(0)); + sw_tdt = adapter->next_avail_tx_desc; + + while (hw_tdt != sw_tdt) { + tx_desc = &adapter->tx_desc_base[hw_tdt]; + length += tx_desc->lower.flags.length; + eop = tx_desc->lower.data & E1000_TXD_CMD_EOP; + if (++hw_tdt == adapter->num_tx_desc) + hw_tdt = 0; + + if (eop) { + if (lem_82547_fifo_workaround(adapter, length)) { + adapter->tx_fifo_wrk_cnt++; + callout_reset(&adapter->tx_fifo_timer, 1, + lem_82547_move_tail, adapter); + break; + } + E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), hw_tdt); + lem_82547_update_fifo_head(adapter, length); + length = 0; + } + } +} + +static int +lem_82547_fifo_workaround(struct adapter *adapter, int len) +{ + int fifo_space, fifo_pkt_len; + + fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR); + + if (adapter->link_duplex == HALF_DUPLEX) { + fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head; + + if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) { + if (lem_82547_tx_fifo_reset(adapter)) + return (0); + else + return (1); + } + } + + return (0); +} + +static void +lem_82547_update_fifo_head(struct adapter *adapter, int len) +{ + int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR); + + /* tx_fifo_head is always 16 byte aligned */ + adapter->tx_fifo_head += fifo_pkt_len; + if (adapter->tx_fifo_head >= adapter->tx_fifo_size) { + adapter->tx_fifo_head -= adapter->tx_fifo_size; + } +} + + +static int +lem_82547_tx_fifo_reset(struct adapter *adapter) +{ + u32 tctl; + + if ((E1000_READ_REG(&adapter->hw, E1000_TDT(0)) == + E1000_READ_REG(&adapter->hw, E1000_TDH(0))) && + (E1000_READ_REG(&adapter->hw, E1000_TDFT) == + E1000_READ_REG(&adapter->hw, E1000_TDFH)) && + (E1000_READ_REG(&adapter->hw, E1000_TDFTS) == + E1000_READ_REG(&adapter->hw, E1000_TDFHS)) && + (E1000_READ_REG(&adapter->hw, E1000_TDFPC) == 0)) { + /* Disable TX unit */ + tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); + E1000_WRITE_REG(&adapter->hw, E1000_TCTL, + tctl & ~E1000_TCTL_EN); + + /* Reset FIFO pointers */ + E1000_WRITE_REG(&adapter->hw, E1000_TDFT, + adapter->tx_head_addr); + E1000_WRITE_REG(&adapter->hw, E1000_TDFH, + adapter->tx_head_addr); + E1000_WRITE_REG(&adapter->hw, E1000_TDFTS, + adapter->tx_head_addr); + E1000_WRITE_REG(&adapter->hw, E1000_TDFHS, + adapter->tx_head_addr); + + /* Re-enable TX unit */ + E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); + E1000_WRITE_FLUSH(&adapter->hw); + + adapter->tx_fifo_head = 0; + adapter->tx_fifo_reset_cnt++; + + return (TRUE); + } + else { + return (FALSE); + } +} + +static void +lem_set_promisc(struct adapter *adapter) +{ + struct ifnet *ifp = adapter->ifp; + u32 reg_rctl; + + reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); + + if (ifp->if_flags & IFF_PROMISC) { + reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); + /* Turn this on if you want to see bad packets */ + if (lem_debug_sbp) + reg_rctl |= E1000_RCTL_SBP; + E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); + } else if (ifp->if_flags & IFF_ALLMULTI) { + reg_rctl |= E1000_RCTL_MPE; + reg_rctl &= ~E1000_RCTL_UPE; + E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); + } +} + +static void +lem_disable_promisc(struct adapter *adapter) +{ + u32 reg_rctl; + + reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); + + reg_rctl &= (~E1000_RCTL_UPE); + reg_rctl &= (~E1000_RCTL_MPE); + reg_rctl &= (~E1000_RCTL_SBP); + E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); +} + + +/********************************************************************* + * Multicast Update + * + * This routine is called whenever multicast address list is updated. + * + **********************************************************************/ + +static void +lem_set_multi(struct adapter *adapter) +{ + struct ifnet *ifp = adapter->ifp; + struct ifmultiaddr *ifma; + u32 reg_rctl = 0; + u8 *mta; /* Multicast array memory */ + int mcnt = 0; + + IOCTL_DEBUGOUT("lem_set_multi: begin"); + + if (adapter->hw.mac.type == e1000_82542 && + adapter->hw.revision_id == E1000_REVISION_2) { + reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); + if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) + e1000_pci_clear_mwi(&adapter->hw); + reg_rctl |= E1000_RCTL_RST; + E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); + msec_delay(5); + } + + /* Allocate temporary memory to setup array */ + mta = malloc(sizeof(u8) * + (ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES), + M_DEVBUF, M_NOWAIT | M_ZERO); + if (mta == NULL) + panic("lem_set_multi memory failure\n"); + +#if __FreeBSD_version < 800000 + IF_ADDR_LOCK(ifp); +#else + if_maddr_rlock(ifp); +#endif + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_LINK) + continue; + + if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) + break; + + bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), + &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); + mcnt++; + } +#if __FreeBSD_version < 800000 + IF_ADDR_UNLOCK(ifp); +#else + if_maddr_runlock(ifp); +#endif + if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { + reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); + reg_rctl |= E1000_RCTL_MPE; + E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); + } else + e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); + + if (adapter->hw.mac.type == e1000_82542 && + adapter->hw.revision_id == E1000_REVISION_2) { + reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); + reg_rctl &= ~E1000_RCTL_RST; + E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); + msec_delay(5); + if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) + e1000_pci_set_mwi(&adapter->hw); + } + free(mta, M_DEVBUF); +} + + +/********************************************************************* + * Timer routine + * + * This routine checks for link status and updates statistics. + * + **********************************************************************/ + +static void +lem_local_timer(void *arg) +{ + struct adapter *adapter = arg; + struct ifnet *ifp = adapter->ifp; + + EM_CORE_LOCK_ASSERT(adapter); + + taskqueue_enqueue(adapter->tq, + &adapter->rxtx_task); + lem_update_link_status(adapter); + lem_update_stats_counters(adapter); + + if (lem_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING) + lem_print_hw_stats(adapter); + + lem_smartspeed(adapter); + + /* + * We check the watchdog: the time since + * the last TX descriptor was cleaned. + * This implies a functional TX engine. + */ + if ((adapter->watchdog_check == TRUE) && + (ticks - adapter->watchdog_time > EM_WATCHDOG)) + goto hung; + + callout_reset(&adapter->timer, hz, lem_local_timer, adapter); + return; +hung: + device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); + adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + adapter->watchdog_events++; + lem_init_locked(adapter); +} + +static void +lem_update_link_status(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct ifnet *ifp = adapter->ifp; + device_t dev = adapter->dev; + u32 link_check = 0; + + /* Get the cached link value or read phy for real */ + switch (hw->phy.media_type) { + case e1000_media_type_copper: + if (hw->mac.get_link_status) { + /* Do the work to read phy */ + e1000_check_for_link(hw); + link_check = !hw->mac.get_link_status; + if (link_check) /* ESB2 fix */ + e1000_cfg_on_link_up(hw); + } else + link_check = TRUE; + break; + case e1000_media_type_fiber: + e1000_check_for_link(hw); + link_check = (E1000_READ_REG(hw, E1000_STATUS) & + E1000_STATUS_LU); + break; + case e1000_media_type_internal_serdes: + e1000_check_for_link(hw); + link_check = adapter->hw.mac.serdes_has_link; + break; + default: + case e1000_media_type_unknown: + break; + } + + /* Now check for a transition */ + if (link_check && (adapter->link_active == 0)) { + e1000_get_speed_and_duplex(hw, &adapter->link_speed, + &adapter->link_duplex); + if (bootverbose) + device_printf(dev, "Link is up %d Mbps %s\n", + adapter->link_speed, + ((adapter->link_duplex == FULL_DUPLEX) ? + "Full Duplex" : "Half Duplex")); + adapter->link_active = 1; + adapter->smartspeed = 0; + ifp->if_baudrate = adapter->link_speed * 1000000; + if_link_state_change(ifp, LINK_STATE_UP); + } else if (!link_check && (adapter->link_active == 1)) { + ifp->if_baudrate = adapter->link_speed = 0; + adapter->link_duplex = 0; + if (bootverbose) + device_printf(dev, "Link is Down\n"); + adapter->link_active = 0; + /* Link down, disable watchdog */ + adapter->watchdog_check = FALSE; + if_link_state_change(ifp, LINK_STATE_DOWN); + } +} + +/********************************************************************* + * + * This routine disables all traffic on the adapter by issuing a + * global reset on the MAC and deallocates TX/RX buffers. + * + * This routine should always be called with BOTH the CORE + * and TX locks. + **********************************************************************/ + +static void +lem_stop(void *arg) +{ + struct adapter *adapter = arg; + struct ifnet *ifp = adapter->ifp; + + EM_CORE_LOCK_ASSERT(adapter); + EM_TX_LOCK_ASSERT(adapter); + + INIT_DEBUGOUT("lem_stop: begin"); + + lem_disable_intr(adapter); + callout_stop(&adapter->timer); + callout_stop(&adapter->tx_fifo_timer); + + /* Tell the stack that the interface is no longer active */ + ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); + + e1000_reset_hw(&adapter->hw); + if (adapter->hw.mac.type >= e1000_82544) + E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); +} + + +/********************************************************************* + * + * Determine hardware revision. + * + **********************************************************************/ +static void +lem_identify_hardware(struct adapter *adapter) +{ + device_t dev = adapter->dev; + + /* Make sure our PCI config space has the necessary stuff set */ + adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); + if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) && + (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) { + device_printf(dev, "Memory Access and/or Bus Master bits " + "were not set!\n"); + adapter->hw.bus.pci_cmd_word |= + (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN); + pci_write_config(dev, PCIR_COMMAND, + adapter->hw.bus.pci_cmd_word, 2); + } + + /* Save off the information about this board */ + adapter->hw.vendor_id = pci_get_vendor(dev); + adapter->hw.device_id = pci_get_device(dev); + adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); + adapter->hw.subsystem_vendor_id = + pci_read_config(dev, PCIR_SUBVEND_0, 2); + adapter->hw.subsystem_device_id = + pci_read_config(dev, PCIR_SUBDEV_0, 2); + + /* Do Shared Code Init and Setup */ + if (e1000_set_mac_type(&adapter->hw)) { + device_printf(dev, "Setup init failure\n"); + return; + } +} + +static int +lem_allocate_pci_resources(struct adapter *adapter) +{ + device_t dev = adapter->dev; + int val, rid, error = E1000_SUCCESS; + + rid = PCIR_BAR(0); + adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &rid, RF_ACTIVE); + if (adapter->memory == NULL) { + device_printf(dev, "Unable to allocate bus resource: memory\n"); + return (ENXIO); + } + adapter->osdep.mem_bus_space_tag = + rman_get_bustag(adapter->memory); + adapter->osdep.mem_bus_space_handle = + rman_get_bushandle(adapter->memory); + adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; + + /* Only older adapters use IO mapping */ + if (adapter->hw.mac.type > e1000_82543) { + /* Figure our where our IO BAR is ? */ + for (rid = PCIR_BAR(0); rid < PCIR_CIS;) { + val = pci_read_config(dev, rid, 4); + if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) { + adapter->io_rid = rid; + break; + } + rid += 4; + /* check for 64bit BAR */ + if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT) + rid += 4; + } + if (rid >= PCIR_CIS) { + device_printf(dev, "Unable to locate IO BAR\n"); + return (ENXIO); + } + adapter->ioport = bus_alloc_resource_any(dev, + SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE); + if (adapter->ioport == NULL) { + device_printf(dev, "Unable to allocate bus resource: " + "ioport\n"); + return (ENXIO); + } + adapter->hw.io_base = 0; + adapter->osdep.io_bus_space_tag = + rman_get_bustag(adapter->ioport); + adapter->osdep.io_bus_space_handle = + rman_get_bushandle(adapter->ioport); + } + + adapter->hw.back = &adapter->osdep; + + return (error); +} + +/********************************************************************* + * + * Setup the Legacy or MSI Interrupt handler + * + **********************************************************************/ +int +lem_allocate_irq(struct adapter *adapter) +{ + device_t dev = adapter->dev; + int error, rid = 0; + + /* Manually turn off all interrupts */ + E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); + + /* We allocate a single interrupt resource */ + adapter->res[0] = bus_alloc_resource_any(dev, + SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); + if (adapter->res[0] == NULL) { + device_printf(dev, "Unable to allocate bus resource: " + "interrupt\n"); + return (ENXIO); + } + +#ifdef EM_LEGACY_IRQ + /* We do Legacy setup */ + if ((error = bus_setup_intr(dev, adapter->res[0], +#if __FreeBSD_version > 700000 + INTR_TYPE_NET | INTR_MPSAFE, NULL, lem_intr, adapter, +#else /* 6.X */ + INTR_TYPE_NET | INTR_MPSAFE, lem_intr, adapter, +#endif + &adapter->tag[0])) != 0) { + device_printf(dev, "Failed to register interrupt handler"); + return (error); + } + +#else /* FAST_IRQ */ + /* + * Try allocating a fast interrupt and the associated deferred + * processing contexts. + */ + TASK_INIT(&adapter->rxtx_task, 0, lem_handle_rxtx, adapter); + TASK_INIT(&adapter->link_task, 0, lem_handle_link, adapter); + adapter->tq = taskqueue_create_fast("lem_taskq", M_NOWAIT, + taskqueue_thread_enqueue, &adapter->tq); + taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq", + device_get_nameunit(adapter->dev)); +#if __FreeBSD_version < 700000 + if ((error = bus_setup_intr(dev, adapter->res[0], + INTR_TYPE_NET | INTR_FAST, lem_irq_fast, adapter, +#else + if ((error = bus_setup_intr(dev, adapter->res[0], + INTR_TYPE_NET, lem_irq_fast, NULL, adapter, +#endif + &adapter->tag[0])) != 0) { + device_printf(dev, "Failed to register fast interrupt " + "handler: %d\n", error); + taskqueue_free(adapter->tq); + adapter->tq = NULL; + return (error); + } +#endif /* EM_LEGACY_IRQ */ + + return (0); +} + + +static void +lem_free_pci_resources(struct adapter *adapter) +{ + device_t dev = adapter->dev; + + + if (adapter->tag[0] != NULL) { + bus_teardown_intr(dev, adapter->res[0], + adapter->tag[0]); + adapter->tag[0] = NULL; + } + + if (adapter->res[0] != NULL) { + bus_release_resource(dev, SYS_RES_IRQ, + 0, adapter->res[0]); + } + + if (adapter->memory != NULL) + bus_release_resource(dev, SYS_RES_MEMORY, + PCIR_BAR(0), adapter->memory); + + if (adapter->ioport != NULL) + bus_release_resource(dev, SYS_RES_IOPORT, + adapter->io_rid, adapter->ioport); +} + + +/********************************************************************* + * + * Initialize the hardware to a configuration + * as specified by the adapter structure. + * + **********************************************************************/ +static int +lem_hardware_init(struct adapter *adapter) +{ + device_t dev = adapter->dev; + u16 rx_buffer_size; + + INIT_DEBUGOUT("lem_hardware_init: begin"); + + /* Issue a global reset */ + e1000_reset_hw(&adapter->hw); + + /* When hardware is reset, fifo_head is also reset */ + adapter->tx_fifo_head = 0; + + /* + * These parameters control the automatic generation (Tx) and + * response (Rx) to Ethernet PAUSE frames. + * - High water mark should allow for at least two frames to be + * received after sending an XOFF. + * - Low water mark works best when it is very near the high water mark. + * This allows the receiver to restart by sending XON when it has + * drained a bit. Here we use an arbitary value of 1500 which will + * restart after one full frame is pulled from the buffer. There + * could be several smaller frames in the buffer and if so they will + * not trigger the XON until their total number reduces the buffer + * by 1500. + * - The pause time is fairly large at 1000 x 512ns = 512 usec. + */ + rx_buffer_size = ((E1000_READ_REG(&adapter->hw, E1000_PBA) & + 0xffff) << 10 ); + + adapter->hw.fc.high_water = rx_buffer_size - + roundup2(adapter->max_frame_size, 1024); + adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500; + + adapter->hw.fc.pause_time = EM_FC_PAUSE_TIME; + adapter->hw.fc.send_xon = TRUE; + + /* Set Flow control, use the tunable location if sane */ + if ((lem_fc_setting >= 0) || (lem_fc_setting < 4)) + adapter->hw.fc.requested_mode = lem_fc_setting; + else + adapter->hw.fc.requested_mode = e1000_fc_none; + + if (e1000_init_hw(&adapter->hw) < 0) { + device_printf(dev, "Hardware Initialization Failed\n"); + return (EIO); + } + + e1000_check_for_link(&adapter->hw); + + return (0); +} + +/********************************************************************* + * + * Setup networking device structure and register an interface. + * + **********************************************************************/ +static void +lem_setup_interface(device_t dev, struct adapter *adapter) +{ + struct ifnet *ifp; + + INIT_DEBUGOUT("lem_setup_interface: begin"); + + ifp = adapter->ifp = if_alloc(IFT_ETHER); + if (ifp == NULL) + panic("%s: can not if_alloc()", device_get_nameunit(dev)); + if_initname(ifp, device_get_name(dev), device_get_unit(dev)); + ifp->if_mtu = ETHERMTU; + ifp->if_init = lem_init; + ifp->if_softc = adapter; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_ioctl = lem_ioctl; + ifp->if_start = lem_start; + IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1); + ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1; + IFQ_SET_READY(&ifp->if_snd); + + ether_ifattach(ifp, adapter->hw.mac.addr); + + ifp->if_capabilities = ifp->if_capenable = 0; + +#if __FreeBSD_version >= 800000 + /* Multiqueue tx functions */ + ifp->if_transmit = lem_mq_start; + ifp->if_qflush = lem_qflush; + adapter->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &adapter->tx_mtx); +#endif + if (adapter->hw.mac.type >= e1000_82543) { + int version_cap; +#if __FreeBSD_version < 700000 + version_cap = IFCAP_HWCSUM; +#else + version_cap = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; +#endif + ifp->if_capabilities |= version_cap; + ifp->if_capenable |= version_cap; + } + + /* + * Tell the upper layer(s) we support long frames. + */ + ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); + ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; + ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; + +#ifdef DEVICE_POLLING + ifp->if_capabilities |= IFCAP_POLLING; +#endif + + /* Enable All WOL methods by default */ + if (adapter->wol) { + ifp->if_capabilities |= IFCAP_WOL; + ifp->if_capenable |= IFCAP_WOL; + } + + /* + * Specify the media types supported by this adapter and register + * callbacks to update media and link information + */ + ifmedia_init(&adapter->media, IFM_IMASK, + lem_media_change, lem_media_status); + if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || + (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { + u_char fiber_type = IFM_1000_SX; /* default type */ + + if (adapter->hw.mac.type == e1000_82545) + fiber_type = IFM_1000_LX; + ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, + 0, NULL); + ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL); + } else { + ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); + ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, + 0, NULL); + ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, + 0, NULL); + ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, + 0, NULL); + if (adapter->hw.phy.type != e1000_phy_ife) { + ifmedia_add(&adapter->media, + IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); + ifmedia_add(&adapter->media, + IFM_ETHER | IFM_1000_T, 0, NULL); + } + } + ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); +} + + +/********************************************************************* + * + * Workaround for SmartSpeed on 82541 and 82547 controllers + * + **********************************************************************/ +static void +lem_smartspeed(struct adapter *adapter) +{ + u16 phy_tmp; + + if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) || + adapter->hw.mac.autoneg == 0 || + (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0) + return; + + if (adapter->smartspeed == 0) { + /* If Master/Slave config fault is asserted twice, + * we assume back-to-back */ + e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); + if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT)) + return; + e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); + if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) { + e1000_read_phy_reg(&adapter->hw, + PHY_1000T_CTRL, &phy_tmp); + if(phy_tmp & CR_1000T_MS_ENABLE) { + phy_tmp &= ~CR_1000T_MS_ENABLE; + e1000_write_phy_reg(&adapter->hw, + PHY_1000T_CTRL, phy_tmp); + adapter->smartspeed++; + if(adapter->hw.mac.autoneg && + !e1000_copper_link_autoneg(&adapter->hw) && + !e1000_read_phy_reg(&adapter->hw, + PHY_CONTROL, &phy_tmp)) { + phy_tmp |= (MII_CR_AUTO_NEG_EN | + MII_CR_RESTART_AUTO_NEG); + e1000_write_phy_reg(&adapter->hw, + PHY_CONTROL, phy_tmp); + } + } + } + return; + } else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) { + /* If still no link, perhaps using 2/3 pair cable */ + e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); + phy_tmp |= CR_1000T_MS_ENABLE; + e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); + if(adapter->hw.mac.autoneg && + !e1000_copper_link_autoneg(&adapter->hw) && + !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { + phy_tmp |= (MII_CR_AUTO_NEG_EN | + MII_CR_RESTART_AUTO_NEG); + e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); + } + } + /* Restart process after EM_SMARTSPEED_MAX iterations */ + if(adapter->smartspeed++ == EM_SMARTSPEED_MAX) + adapter->smartspeed = 0; +} + + +/* + * Manage DMA'able memory. + */ +static void +lem_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) +{ + if (error) + return; + *(bus_addr_t *) arg = segs[0].ds_addr; +} + +static int +lem_dma_malloc(struct adapter *adapter, bus_size_t size, + struct em_dma_alloc *dma, int mapflags) +{ + int error; + +#if __FreeBSD_version >= 700000 + error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ +#else + error = bus_dma_tag_create(NULL, /* parent */ +#endif + EM_DBA_ALIGN, 0, /* alignment, bounds */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + size, /* maxsize */ + 1, /* nsegments */ + size, /* maxsegsize */ + 0, /* flags */ + NULL, /* lockfunc */ + NULL, /* lockarg */ + &dma->dma_tag); + if (error) { + device_printf(adapter->dev, + "%s: bus_dma_tag_create failed: %d\n", + __func__, error); + goto fail_0; + } + + error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, + BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); + if (error) { + device_printf(adapter->dev, + "%s: bus_dmamem_alloc(%ju) failed: %d\n", + __func__, (uintmax_t)size, error); + goto fail_2; + } + + dma->dma_paddr = 0; + error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, + size, lem_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); + if (error || dma->dma_paddr == 0) { + device_printf(adapter->dev, + "%s: bus_dmamap_load failed: %d\n", + __func__, error); + goto fail_3; + } + + return (0); + +fail_3: + bus_dmamap_unload(dma->dma_tag, dma->dma_map); +fail_2: + bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); + bus_dma_tag_destroy(dma->dma_tag); +fail_0: + dma->dma_map = NULL; + dma->dma_tag = NULL; + + return (error); +} + +static void +lem_dma_free(struct adapter *adapter, struct em_dma_alloc *dma) +{ + if (dma->dma_tag == NULL) + return; + if (dma->dma_map != NULL) { + bus_dmamap_sync(dma->dma_tag, dma->dma_map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(dma->dma_tag, dma->dma_map); + bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); + dma->dma_map = NULL; + } + bus_dma_tag_destroy(dma->dma_tag); + dma->dma_tag = NULL; +} + + +/********************************************************************* + * + * Allocate memory for tx_buffer structures. The tx_buffer stores all + * the information needed to transmit a packet on the wire. + * + **********************************************************************/ +static int +lem_allocate_transmit_structures(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct em_buffer *tx_buffer; + int error; + + /* + * Create DMA tags for tx descriptors + */ +#if __FreeBSD_version >= 700000 + if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ +#else + if ((error = bus_dma_tag_create(NULL, /* parent */ +#endif + 1, 0, /* alignment, bounds */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + EM_TSO_SIZE, /* maxsize */ + EM_MAX_SCATTER, /* nsegments */ + EM_TSO_SEG_SIZE, /* maxsegsize */ + 0, /* flags */ + NULL, /* lockfunc */ + NULL, /* lockarg */ + &adapter->txtag)) != 0) { + device_printf(dev, "Unable to allocate TX DMA tag\n"); + goto fail; + } + + adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) * + adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); + if (adapter->tx_buffer_area == NULL) { + device_printf(dev, "Unable to allocate tx_buffer memory\n"); + error = ENOMEM; + goto fail; + } + + /* Create the descriptor buffer dma maps */ + for (int i = 0; i < adapter->num_tx_desc; i++) { + tx_buffer = &adapter->tx_buffer_area[i]; + error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map); + if (error != 0) { + device_printf(dev, "Unable to create TX DMA map\n"); + goto fail; + } + tx_buffer->next_eop = -1; + } + + return (0); +fail: + lem_free_transmit_structures(adapter); + return (error); +} + +/********************************************************************* + * + * (Re)Initialize transmit structures. + * + **********************************************************************/ +static void +lem_setup_transmit_structures(struct adapter *adapter) +{ + struct em_buffer *tx_buffer; + + /* Clear the old ring contents */ + bzero(adapter->tx_desc_base, + (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc); + + /* Free any existing TX buffers */ + for (int i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { + tx_buffer = &adapter->tx_buffer_area[i]; + bus_dmamap_sync(adapter->txtag, tx_buffer->map, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(adapter->txtag, tx_buffer->map); + m_freem(tx_buffer->m_head); + tx_buffer->m_head = NULL; + tx_buffer->next_eop = -1; + } + + /* Reset state */ + adapter->next_avail_tx_desc = 0; + adapter->next_tx_to_clean = 0; + adapter->num_tx_desc_avail = adapter->num_tx_desc; + + bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + + return; +} + +/********************************************************************* + * + * Enable transmit unit. + * + **********************************************************************/ +static void +lem_initialize_transmit_unit(struct adapter *adapter) +{ + u32 tctl, tipg = 0; + u64 bus_addr; + + INIT_DEBUGOUT("lem_initialize_transmit_unit: begin"); + /* Setup the Base and Length of the Tx Descriptor Ring */ + bus_addr = adapter->txdma.dma_paddr; + E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(0), + adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); + E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(0), + (u32)(bus_addr >> 32)); + E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(0), + (u32)bus_addr); + /* Setup the HW Tx Head and Tail descriptor pointers */ + E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), 0); + E1000_WRITE_REG(&adapter->hw, E1000_TDH(0), 0); + + HW_DEBUGOUT2("Base = %x, Length = %x\n", + E1000_READ_REG(&adapter->hw, E1000_TDBAL(0)), + E1000_READ_REG(&adapter->hw, E1000_TDLEN(0))); + + /* Set the default values for the Tx Inter Packet Gap timer */ + switch (adapter->hw.mac.type) { + case e1000_82542: + tipg = DEFAULT_82542_TIPG_IPGT; + tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; + tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; + break; + default: + if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || + (adapter->hw.phy.media_type == + e1000_media_type_internal_serdes)) + tipg = DEFAULT_82543_TIPG_IPGT_FIBER; + else + tipg = DEFAULT_82543_TIPG_IPGT_COPPER; + tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; + tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; + } + + E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); + E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); + if(adapter->hw.mac.type >= e1000_82540) + E1000_WRITE_REG(&adapter->hw, E1000_TADV, + adapter->tx_abs_int_delay.value); + + /* Program the Transmit Control Register */ + tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); + tctl &= ~E1000_TCTL_CT; + tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | + (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); + + /* This write will effectively turn on the transmit unit. */ + E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); + + /* Setup Transmit Descriptor Base Settings */ + adapter->txd_cmd = E1000_TXD_CMD_IFCS; + + if (adapter->tx_int_delay.value > 0) + adapter->txd_cmd |= E1000_TXD_CMD_IDE; +} + +/********************************************************************* + * + * Free all transmit related data structures. + * + **********************************************************************/ +static void +lem_free_transmit_structures(struct adapter *adapter) +{ + struct em_buffer *tx_buffer; + + INIT_DEBUGOUT("free_transmit_structures: begin"); + + if (adapter->tx_buffer_area != NULL) { + for (int i = 0; i < adapter->num_tx_desc; i++) { + tx_buffer = &adapter->tx_buffer_area[i]; + if (tx_buffer->m_head != NULL) { + bus_dmamap_sync(adapter->txtag, tx_buffer->map, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(adapter->txtag, + tx_buffer->map); + m_freem(tx_buffer->m_head); + tx_buffer->m_head = NULL; + } else if (tx_buffer->map != NULL) + bus_dmamap_unload(adapter->txtag, + tx_buffer->map); + if (tx_buffer->map != NULL) { + bus_dmamap_destroy(adapter->txtag, + tx_buffer->map); + tx_buffer->map = NULL; + } + } + } + if (adapter->tx_buffer_area != NULL) { + free(adapter->tx_buffer_area, M_DEVBUF); + adapter->tx_buffer_area = NULL; + } + if (adapter->txtag != NULL) { + bus_dma_tag_destroy(adapter->txtag); + adapter->txtag = NULL; + } +#if __FreeBSD_version >= 800000 + if (adapter->br != NULL) + buf_ring_free(adapter->br, M_DEVBUF); +#endif +} + +/********************************************************************* + * + * The offload context needs to be set when we transfer the first + * packet of a particular protocol (TCP/UDP). This routine has been + * enhanced to deal with inserted VLAN headers, and IPV6 (not complete) + * + * Added back the old method of keeping the current context type + * and not setting if unnecessary, as this is reported to be a + * big performance win. -jfv + **********************************************************************/ +static void +lem_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp, + u32 *txd_upper, u32 *txd_lower) +{ + struct e1000_context_desc *TXD = NULL; + struct em_buffer *tx_buffer; + struct ether_vlan_header *eh; + struct ip *ip = NULL; + struct ip6_hdr *ip6; + int curr_txd, ehdrlen; + u32 cmd, hdr_len, ip_hlen; + u16 etype; + u8 ipproto; + + + cmd = hdr_len = ipproto = 0; + curr_txd = adapter->next_avail_tx_desc; + + /* + * Determine where frame payload starts. + * Jump over vlan headers if already present, + * helpful for QinQ too. + */ + eh = mtod(mp, struct ether_vlan_header *); + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + etype = ntohs(eh->evl_proto); + ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + } else { + etype = ntohs(eh->evl_encap_proto); + ehdrlen = ETHER_HDR_LEN; + } + + /* + * We only support TCP/UDP for IPv4 and IPv6 for the moment. + * TODO: Support SCTP too when it hits the tree. + */ + switch (etype) { + case ETHERTYPE_IP: + ip = (struct ip *)(mp->m_data + ehdrlen); + ip_hlen = ip->ip_hl << 2; + + /* Setup of IP header checksum. */ + if (mp->m_pkthdr.csum_flags & CSUM_IP) { + /* + * Start offset for header checksum calculation. + * End offset for header checksum calculation. + * Offset of place to put the checksum. + */ + TXD = (struct e1000_context_desc *) + &adapter->tx_desc_base[curr_txd]; + TXD->lower_setup.ip_fields.ipcss = ehdrlen; + TXD->lower_setup.ip_fields.ipcse = + htole16(ehdrlen + ip_hlen); + TXD->lower_setup.ip_fields.ipcso = + ehdrlen + offsetof(struct ip, ip_sum); + cmd |= E1000_TXD_CMD_IP; + *txd_upper |= E1000_TXD_POPTS_IXSM << 8; + } + + if (mp->m_len < ehdrlen + ip_hlen) + return; /* failure */ + + hdr_len = ehdrlen + ip_hlen; + ipproto = ip->ip_p; + + break; + case ETHERTYPE_IPV6: + ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); + ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */ + + if (mp->m_len < ehdrlen + ip_hlen) + return; /* failure */ + + /* IPv6 doesn't have a header checksum. */ + + hdr_len = ehdrlen + ip_hlen; + ipproto = ip6->ip6_nxt; + + break; + default: + *txd_upper = 0; + *txd_lower = 0; + return; + } + + switch (ipproto) { + case IPPROTO_TCP: + if (mp->m_pkthdr.csum_flags & CSUM_TCP) { + *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; + *txd_upper |= E1000_TXD_POPTS_TXSM << 8; + /* no need for context if already set */ + if (adapter->last_hw_offload == CSUM_TCP) + return; + adapter->last_hw_offload = CSUM_TCP; + /* + * Start offset for payload checksum calculation. + * End offset for payload checksum calculation. + * Offset of place to put the checksum. + */ + TXD = (struct e1000_context_desc *) + &adapter->tx_desc_base[curr_txd]; + TXD->upper_setup.tcp_fields.tucss = hdr_len; + TXD->upper_setup.tcp_fields.tucse = htole16(0); + TXD->upper_setup.tcp_fields.tucso = + hdr_len + offsetof(struct tcphdr, th_sum); + cmd |= E1000_TXD_CMD_TCP; + } + break; + case IPPROTO_UDP: + { + if (mp->m_pkthdr.csum_flags & CSUM_UDP) { + *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; + *txd_upper |= E1000_TXD_POPTS_TXSM << 8; + /* no need for context if already set */ + if (adapter->last_hw_offload == CSUM_UDP) + return; + adapter->last_hw_offload = CSUM_UDP; + /* + * Start offset for header checksum calculation. + * End offset for header checksum calculation. + * Offset of place to put the checksum. + */ + TXD = (struct e1000_context_desc *) + &adapter->tx_desc_base[curr_txd]; + TXD->upper_setup.tcp_fields.tucss = hdr_len; + TXD->upper_setup.tcp_fields.tucse = htole16(0); + TXD->upper_setup.tcp_fields.tucso = + hdr_len + offsetof(struct udphdr, uh_sum); + } + /* Fall Thru */ + } + default: + break; + } + + TXD->tcp_seg_setup.data = htole32(0); + TXD->cmd_and_length = + htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd); + tx_buffer = &adapter->tx_buffer_area[curr_txd]; + tx_buffer->m_head = NULL; + tx_buffer->next_eop = -1; + + if (++curr_txd == adapter->num_tx_desc) + curr_txd = 0; + + adapter->num_tx_desc_avail--; + adapter->next_avail_tx_desc = curr_txd; +} + + +/********************************************************************** + * + * Examine each tx_buffer in the used queue. If the hardware is done + * processing the packet then free associated resources. The + * tx_buffer is put back on the free queue. + * + **********************************************************************/ +static void +lem_txeof(struct adapter *adapter) +{ + int first, last, done, num_avail; + struct em_buffer *tx_buffer; + struct e1000_tx_desc *tx_desc, *eop_desc; + struct ifnet *ifp = adapter->ifp; + + EM_TX_LOCK_ASSERT(adapter); + + if (adapter->num_tx_desc_avail == adapter->num_tx_desc) + return; + + num_avail = adapter->num_tx_desc_avail; + first = adapter->next_tx_to_clean; + tx_desc = &adapter->tx_desc_base[first]; + tx_buffer = &adapter->tx_buffer_area[first]; + last = tx_buffer->next_eop; + eop_desc = &adapter->tx_desc_base[last]; + + /* + * What this does is get the index of the + * first descriptor AFTER the EOP of the + * first packet, that way we can do the + * simple comparison on the inner while loop. + */ + if (++last == adapter->num_tx_desc) + last = 0; + done = last; + + bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, + BUS_DMASYNC_POSTREAD); + + while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) { + /* We clean the range of the packet */ + while (first != done) { + tx_desc->upper.data = 0; + tx_desc->lower.data = 0; + tx_desc->buffer_addr = 0; + ++num_avail; + + if (tx_buffer->m_head) { + ifp->if_opackets++; + bus_dmamap_sync(adapter->txtag, + tx_buffer->map, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(adapter->txtag, + tx_buffer->map); + + m_freem(tx_buffer->m_head); + tx_buffer->m_head = NULL; + } + tx_buffer->next_eop = -1; + adapter->watchdog_time = ticks; + + if (++first == adapter->num_tx_desc) + first = 0; + + tx_buffer = &adapter->tx_buffer_area[first]; + tx_desc = &adapter->tx_desc_base[first]; + } + /* See if we can continue to the next packet */ + last = tx_buffer->next_eop; + if (last != -1) { + eop_desc = &adapter->tx_desc_base[last]; + /* Get new done point */ + if (++last == adapter->num_tx_desc) last = 0; + done = last; + } else + break; + } + bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + + adapter->next_tx_to_clean = first; + + /* + * If we have enough room, clear IFF_DRV_OACTIVE to + * tell the stack that it is OK to send packets. + * If there are no pending descriptors, clear the watchdog. + */ + if (num_avail > EM_TX_CLEANUP_THRESHOLD) { + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + if (num_avail == adapter->num_tx_desc) { + adapter->watchdog_check = FALSE; + adapter->num_tx_desc_avail = num_avail; + return; + } + } + + adapter->num_tx_desc_avail = num_avail; + return; +} + +/********************************************************************* + * + * When Link is lost sometimes there is work still in the TX ring + * which may result in a watchdog, rather than allow that we do an + * attempted cleanup and then reinit here. Note that this has been + * seens mostly with fiber adapters. + * + **********************************************************************/ +static void +lem_tx_purge(struct adapter *adapter) +{ + if ((!adapter->link_active) && (adapter->watchdog_check)) { + EM_TX_LOCK(adapter); + lem_txeof(adapter); + EM_TX_UNLOCK(adapter); + if (adapter->watchdog_check) /* Still outstanding? */ + lem_init_locked(adapter); + } +} + +/********************************************************************* + * + * Get a buffer from system mbuf buffer pool. + * + **********************************************************************/ +static int +lem_get_buf(struct adapter *adapter, int i) +{ + struct mbuf *m; + bus_dma_segment_t segs[1]; + bus_dmamap_t map; + struct em_buffer *rx_buffer; + int error, nsegs; + + m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); + if (m == NULL) { + adapter->mbuf_cluster_failed++; + return (ENOBUFS); + } + m->m_len = m->m_pkthdr.len = MCLBYTES; + + if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) + m_adj(m, ETHER_ALIGN); + + /* + * Using memory from the mbuf cluster pool, invoke the + * bus_dma machinery to arrange the memory mapping. + */ + error = bus_dmamap_load_mbuf_sg(adapter->rxtag, + adapter->rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT); + if (error != 0) { + m_free(m); + return (error); + } + + /* If nsegs is wrong then the stack is corrupt. */ + KASSERT(nsegs == 1, ("Too many segments returned!")); + + rx_buffer = &adapter->rx_buffer_area[i]; + if (rx_buffer->m_head != NULL) + bus_dmamap_unload(adapter->rxtag, rx_buffer->map); + + map = rx_buffer->map; + rx_buffer->map = adapter->rx_sparemap; + adapter->rx_sparemap = map; + bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD); + rx_buffer->m_head = m; + + adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr); + return (0); +} + +/********************************************************************* + * + * Allocate memory for rx_buffer structures. Since we use one + * rx_buffer per received packet, the maximum number of rx_buffer's + * that we'll need is equal to the number of receive descriptors + * that we've allocated. + * + **********************************************************************/ +static int +lem_allocate_receive_structures(struct adapter *adapter) +{ + device_t dev = adapter->dev; + struct em_buffer *rx_buffer; + int i, error; + + adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) * + adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO); + if (adapter->rx_buffer_area == NULL) { + device_printf(dev, "Unable to allocate rx_buffer memory\n"); + return (ENOMEM); + } + +#if __FreeBSD_version >= 700000 + error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ +#else + error = bus_dma_tag_create(NULL, /* parent */ +#endif + 1, 0, /* alignment, bounds */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + MCLBYTES, /* maxsize */ + 1, /* nsegments */ + MCLBYTES, /* maxsegsize */ + 0, /* flags */ + NULL, /* lockfunc */ + NULL, /* lockarg */ + &adapter->rxtag); + if (error) { + device_printf(dev, "%s: bus_dma_tag_create failed %d\n", + __func__, error); + goto fail; + } + + /* Create the spare map (used by getbuf) */ + error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT, + &adapter->rx_sparemap); + if (error) { + device_printf(dev, "%s: bus_dmamap_create failed: %d\n", + __func__, error); + goto fail; + } + + rx_buffer = adapter->rx_buffer_area; + for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { + error = bus_dmamap_create(adapter->rxtag, BUS_DMA_NOWAIT, + &rx_buffer->map); + if (error) { + device_printf(dev, "%s: bus_dmamap_create failed: %d\n", + __func__, error); + goto fail; + } + } + + return (0); + +fail: + lem_free_receive_structures(adapter); + return (error); +} + +/********************************************************************* + * + * (Re)initialize receive structures. + * + **********************************************************************/ +static int +lem_setup_receive_structures(struct adapter *adapter) +{ + struct em_buffer *rx_buffer; + int i, error; + + /* Reset descriptor ring */ + bzero(adapter->rx_desc_base, + (sizeof(struct e1000_rx_desc)) * adapter->num_rx_desc); + + /* Free current RX buffers. */ + rx_buffer = adapter->rx_buffer_area; + for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { + if (rx_buffer->m_head != NULL) { + bus_dmamap_sync(adapter->rxtag, rx_buffer->map, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(adapter->rxtag, rx_buffer->map); + m_freem(rx_buffer->m_head); + rx_buffer->m_head = NULL; + } + } + + /* Allocate new ones. */ + for (i = 0; i < adapter->num_rx_desc; i++) { + error = lem_get_buf(adapter, i); + if (error) + return (error); + } + + /* Setup our descriptor pointers */ + adapter->next_rx_desc_to_check = 0; + bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + + return (0); +} + +/********************************************************************* + * + * Enable receive unit. + * + **********************************************************************/ +#define MAX_INTS_PER_SEC 8000 +#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256) + +static void +lem_initialize_receive_unit(struct adapter *adapter) +{ + struct ifnet *ifp = adapter->ifp; + u64 bus_addr; + u32 rctl, rxcsum; + + INIT_DEBUGOUT("lem_initialize_receive_unit: begin"); + + /* + * Make sure receives are disabled while setting + * up the descriptor ring + */ + rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); + E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); + + if (adapter->hw.mac.type >= e1000_82540) { + E1000_WRITE_REG(&adapter->hw, E1000_RADV, + adapter->rx_abs_int_delay.value); + /* + * Set the interrupt throttling rate. Value is calculated + * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) + */ + E1000_WRITE_REG(&adapter->hw, E1000_ITR, DEFAULT_ITR); + } + + /* + ** When using MSIX interrupts we need to throttle + ** using the EITR register (82574 only) + */ + if (adapter->msix) + for (int i = 0; i < 4; i++) + E1000_WRITE_REG(&adapter->hw, + E1000_EITR_82574(i), DEFAULT_ITR); + + /* Disable accelerated ackknowledge */ + if (adapter->hw.mac.type == e1000_82574) + E1000_WRITE_REG(&adapter->hw, + E1000_RFCTL, E1000_RFCTL_ACK_DIS); + + /* Setup the Base and Length of the Rx Descriptor Ring */ + bus_addr = adapter->rxdma.dma_paddr; + E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(0), + adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); + E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(0), + (u32)(bus_addr >> 32)); + E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(0), + (u32)bus_addr); + + /* Setup the Receive Control Register */ + rctl &= ~(3 << E1000_RCTL_MO_SHIFT); + rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | + E1000_RCTL_RDMTS_HALF | + (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT); + + /* Make sure VLAN Filters are off */ + rctl &= ~E1000_RCTL_VFE; + + if (e1000_tbi_sbp_enabled_82543(&adapter->hw)) + rctl |= E1000_RCTL_SBP; + else + rctl &= ~E1000_RCTL_SBP; + + switch (adapter->rx_buffer_len) { + default: + case 2048: + rctl |= E1000_RCTL_SZ_2048; + break; + case 4096: + rctl |= E1000_RCTL_SZ_4096 | + E1000_RCTL_BSEX | E1000_RCTL_LPE; + break; + case 8192: + rctl |= E1000_RCTL_SZ_8192 | + E1000_RCTL_BSEX | E1000_RCTL_LPE; + break; + case 16384: + rctl |= E1000_RCTL_SZ_16384 | + E1000_RCTL_BSEX | E1000_RCTL_LPE; + break; + } + + if (ifp->if_mtu > ETHERMTU) + rctl |= E1000_RCTL_LPE; + else + rctl &= ~E1000_RCTL_LPE; + + /* Enable 82543 Receive Checksum Offload for TCP and UDP */ + if ((adapter->hw.mac.type >= e1000_82543) && + (ifp->if_capenable & IFCAP_RXCSUM)) { + rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM); + rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL); + E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum); + } + + /* Enable Receives */ + E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); + + /* + * Setup the HW Rx Head and + * Tail Descriptor Pointers + */ + E1000_WRITE_REG(&adapter->hw, E1000_RDH(0), 0); + E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), adapter->num_rx_desc - 1); + + return; +} + +/********************************************************************* + * + * Free receive related data structures. + * + **********************************************************************/ +static void +lem_free_receive_structures(struct adapter *adapter) +{ + struct em_buffer *rx_buffer; + int i; + + INIT_DEBUGOUT("free_receive_structures: begin"); + + if (adapter->rx_sparemap) { + bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap); + adapter->rx_sparemap = NULL; + } + + /* Cleanup any existing buffers */ + if (adapter->rx_buffer_area != NULL) { + rx_buffer = adapter->rx_buffer_area; + for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) { + if (rx_buffer->m_head != NULL) { + bus_dmamap_sync(adapter->rxtag, rx_buffer->map, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(adapter->rxtag, + rx_buffer->map); + m_freem(rx_buffer->m_head); + rx_buffer->m_head = NULL; + } else if (rx_buffer->map != NULL) + bus_dmamap_unload(adapter->rxtag, + rx_buffer->map); + if (rx_buffer->map != NULL) { + bus_dmamap_destroy(adapter->rxtag, + rx_buffer->map); + rx_buffer->map = NULL; + } + } + } + + if (adapter->rx_buffer_area != NULL) { + free(adapter->rx_buffer_area, M_DEVBUF); + adapter->rx_buffer_area = NULL; + } + + if (adapter->rxtag != NULL) { + bus_dma_tag_destroy(adapter->rxtag); + adapter->rxtag = NULL; + } +} + +/********************************************************************* + * + * This routine executes in interrupt context. It replenishes + * the mbufs in the descriptor and sends data which has been + * dma'ed into host memory to upper layer. + * + * We loop at most count times if count is > 0, or until done if + * count < 0. + * + * For polling we also now return the number of cleaned packets + *********************************************************************/ +static int +lem_rxeof(struct adapter *adapter, int count) +{ + struct ifnet *ifp = adapter->ifp;; + struct mbuf *mp; + u8 status, accept_frame = 0, eop = 0; + u16 len, desc_len, prev_len_adj; + int i, rx_sent = 0; + struct e1000_rx_desc *current_desc; + + EM_RX_LOCK(adapter); + i = adapter->next_rx_desc_to_check; + current_desc = &adapter->rx_desc_base[i]; + bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, + BUS_DMASYNC_POSTREAD); + + if (!((current_desc->status) & E1000_RXD_STAT_DD)) { + EM_RX_UNLOCK(adapter); + return (rx_sent); + } + + while ((current_desc->status & E1000_RXD_STAT_DD) && + (count != 0) && + (ifp->if_drv_flags & IFF_DRV_RUNNING)) { + struct mbuf *m = NULL; + + mp = adapter->rx_buffer_area[i].m_head; + /* + * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT + * needs to access the last received byte in the mbuf. + */ + bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map, + BUS_DMASYNC_POSTREAD); + + accept_frame = 1; + prev_len_adj = 0; + desc_len = le16toh(current_desc->length); + status = current_desc->status; + if (status & E1000_RXD_STAT_EOP) { + count--; + eop = 1; + if (desc_len < ETHER_CRC_LEN) { + len = 0; + prev_len_adj = ETHER_CRC_LEN - desc_len; + } else + len = desc_len - ETHER_CRC_LEN; + } else { + eop = 0; + len = desc_len; + } + + if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) { + u8 last_byte; + u32 pkt_len = desc_len; + + if (adapter->fmp != NULL) + pkt_len += adapter->fmp->m_pkthdr.len; + + last_byte = *(mtod(mp, caddr_t) + desc_len - 1); + if (TBI_ACCEPT(&adapter->hw, status, + current_desc->errors, pkt_len, last_byte, + adapter->min_frame_size, adapter->max_frame_size)) { + e1000_tbi_adjust_stats_82543(&adapter->hw, + &adapter->stats, pkt_len, + adapter->hw.mac.addr, + adapter->max_frame_size); + if (len > 0) + len--; + } else + accept_frame = 0; + } + + if (accept_frame) { + if (lem_get_buf(adapter, i) != 0) { + ifp->if_iqdrops++; + goto discard; + } + + /* Assign correct length to the current fragment */ + mp->m_len = len; + + if (adapter->fmp == NULL) { + mp->m_pkthdr.len = len; + adapter->fmp = mp; /* Store the first mbuf */ + adapter->lmp = mp; + } else { + /* Chain mbuf's together */ + mp->m_flags &= ~M_PKTHDR; + /* + * Adjust length of previous mbuf in chain if + * we received less than 4 bytes in the last + * descriptor. + */ + if (prev_len_adj > 0) { + adapter->lmp->m_len -= prev_len_adj; + adapter->fmp->m_pkthdr.len -= + prev_len_adj; + } + adapter->lmp->m_next = mp; + adapter->lmp = adapter->lmp->m_next; + adapter->fmp->m_pkthdr.len += len; + } + + if (eop) { + adapter->fmp->m_pkthdr.rcvif = ifp; + ifp->if_ipackets++; + lem_receive_checksum(adapter, current_desc, + adapter->fmp); +#ifndef __NO_STRICT_ALIGNMENT + if (adapter->max_frame_size > + (MCLBYTES - ETHER_ALIGN) && + lem_fixup_rx(adapter) != 0) + goto skip; +#endif + if (status & E1000_RXD_STAT_VP) { +#if __FreeBSD_version < 700000 + VLAN_INPUT_TAG_NEW(ifp, adapter->fmp, + (le16toh(current_desc->special) & + E1000_RXD_SPC_VLAN_MASK)); +#else + adapter->fmp->m_pkthdr.ether_vtag = + (le16toh(current_desc->special) & + E1000_RXD_SPC_VLAN_MASK); + adapter->fmp->m_flags |= M_VLANTAG; +#endif + } +#ifndef __NO_STRICT_ALIGNMENT +skip: +#endif + m = adapter->fmp; + adapter->fmp = NULL; + adapter->lmp = NULL; + } + } else { + ifp->if_ierrors++; +discard: + /* Reuse loaded DMA map and just update mbuf chain */ + mp = adapter->rx_buffer_area[i].m_head; + mp->m_len = mp->m_pkthdr.len = MCLBYTES; + mp->m_data = mp->m_ext.ext_buf; + mp->m_next = NULL; + if (adapter->max_frame_size <= + (MCLBYTES - ETHER_ALIGN)) + m_adj(mp, ETHER_ALIGN); + if (adapter->fmp != NULL) { + m_freem(adapter->fmp); + adapter->fmp = NULL; + adapter->lmp = NULL; + } + m = NULL; + } + + /* Zero out the receive descriptors status. */ + current_desc->status = 0; + bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + + /* Advance our pointers to the next descriptor. */ + if (++i == adapter->num_rx_desc) + i = 0; + /* Call into the stack */ + if (m != NULL) { + adapter->next_rx_desc_to_check = i; + EM_RX_UNLOCK(adapter); + (*ifp->if_input)(ifp, m); + EM_RX_LOCK(adapter); + rx_sent++; + i = adapter->next_rx_desc_to_check; + } + current_desc = &adapter->rx_desc_base[i]; + } + adapter->next_rx_desc_to_check = i; + + /* Advance the E1000's Receive Queue #0 "Tail Pointer". */ + if (--i < 0) + i = adapter->num_rx_desc - 1; + E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i); + EM_RX_UNLOCK(adapter); + return (rx_sent); +} + +#ifndef __NO_STRICT_ALIGNMENT +/* + * When jumbo frames are enabled we should realign entire payload on + * architecures with strict alignment. This is serious design mistake of 8254x + * as it nullifies DMA operations. 8254x just allows RX buffer size to be + * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its + * payload. On architecures without strict alignment restrictions 8254x still + * performs unaligned memory access which would reduce the performance too. + * To avoid copying over an entire frame to align, we allocate a new mbuf and + * copy ethernet header to the new mbuf. The new mbuf is prepended into the + * existing mbuf chain. + * + * Be aware, best performance of the 8254x is achived only when jumbo frame is + * not used at all on architectures with strict alignment. + */ +static int +lem_fixup_rx(struct adapter *adapter) +{ + struct mbuf *m, *n; + int error; + + error = 0; + m = adapter->fmp; + if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { + bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); + m->m_data += ETHER_HDR_LEN; + } else { + MGETHDR(n, M_DONTWAIT, MT_DATA); + if (n != NULL) { + bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); + m->m_data += ETHER_HDR_LEN; + m->m_len -= ETHER_HDR_LEN; + n->m_len = ETHER_HDR_LEN; + M_MOVE_PKTHDR(n, m); + n->m_next = m; + adapter->fmp = n; + } else { + adapter->dropped_pkts++; + m_freem(adapter->fmp); + adapter->fmp = NULL; + error = ENOMEM; + } + } + + return (error); +} +#endif + +/********************************************************************* + * + * Verify that the hardware indicated that the checksum is valid. + * Inform the stack about the status of checksum so that stack + * doesn't spend time verifying the checksum. + * + *********************************************************************/ +static void +lem_receive_checksum(struct adapter *adapter, + struct e1000_rx_desc *rx_desc, struct mbuf *mp) +{ + /* 82543 or newer only */ + if ((adapter->hw.mac.type < e1000_82543) || + /* Ignore Checksum bit is set */ + (rx_desc->status & E1000_RXD_STAT_IXSM)) { + mp->m_pkthdr.csum_flags = 0; + return; + } + + if (rx_desc->status & E1000_RXD_STAT_IPCS) { + /* Did it pass? */ + if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) { + /* IP Checksum Good */ + mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; + mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; + + } else { + mp->m_pkthdr.csum_flags = 0; + } + } + + if (rx_desc->status & E1000_RXD_STAT_TCPCS) { + /* Did it pass? */ + if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) { + mp->m_pkthdr.csum_flags |= + (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + mp->m_pkthdr.csum_data = htons(0xffff); + } + } +} + +#if __FreeBSD_version >= 700029 +/* + * This routine is run via an vlan + * config EVENT + */ +static void +lem_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) +{ + struct adapter *adapter = ifp->if_softc; + u32 index, bit; + + if (ifp->if_softc != arg) /* Not our event */ + return; + + if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */ + return; + + index = (vtag >> 5) & 0x7F; + bit = vtag & 0x1F; + lem_shadow_vfta[index] |= (1 << bit); + ++adapter->num_vlans; + /* Re-init to load the changes */ + lem_init(adapter); +} + +/* + * This routine is run via an vlan + * unconfig EVENT + */ +static void +lem_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) +{ + struct adapter *adapter = ifp->if_softc; + u32 index, bit; + + if (ifp->if_softc != arg) + return; + + if ((vtag == 0) || (vtag > 4095)) /* Invalid */ + return; + + index = (vtag >> 5) & 0x7F; + bit = vtag & 0x1F; + lem_shadow_vfta[index] &= ~(1 << bit); + --adapter->num_vlans; + /* Re-init to load the changes */ + lem_init(adapter); +} + +static void +lem_setup_vlan_hw_support(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 reg; + + /* + ** We get here thru init_locked, meaning + ** a soft reset, this has already cleared + ** the VFTA and other state, so if there + ** have been no vlan's registered do nothing. + */ + if (adapter->num_vlans == 0) + return; + + /* + ** A soft reset zero's out the VFTA, so + ** we need to repopulate it now. + */ + for (int i = 0; i < EM_VFTA_SIZE; i++) + if (lem_shadow_vfta[i] != 0) + E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, + i, lem_shadow_vfta[i]); + + reg = E1000_READ_REG(hw, E1000_CTRL); + reg |= E1000_CTRL_VME; + E1000_WRITE_REG(hw, E1000_CTRL, reg); + + /* Enable the Filter Table */ + reg = E1000_READ_REG(hw, E1000_RCTL); + reg &= ~E1000_RCTL_CFIEN; + reg |= E1000_RCTL_VFE; + E1000_WRITE_REG(hw, E1000_RCTL, reg); + + /* Update the frame size */ + E1000_WRITE_REG(&adapter->hw, E1000_RLPML, + adapter->max_frame_size + VLAN_TAG_SIZE); +} +#endif + +static void +lem_enable_intr(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 ims_mask = IMS_ENABLE_MASK; + + if (adapter->msix) { + E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK); + ims_mask |= EM_MSIX_MASK; + } + E1000_WRITE_REG(hw, E1000_IMS, ims_mask); +} + +static void +lem_disable_intr(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + + if (adapter->msix) + E1000_WRITE_REG(hw, EM_EIAC, 0); + E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); +} + +/* + * Bit of a misnomer, what this really means is + * to enable OS management of the system... aka + * to disable special hardware management features + */ +static void +lem_init_manageability(struct adapter *adapter) +{ + /* A shared code workaround */ + if (adapter->has_manage) { + int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); + /* disable hardware interception of ARP */ + manc &= ~(E1000_MANC_ARP_EN); + E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); + } +} + +/* + * Give control back to hardware management + * controller if there is one. + */ +static void +lem_release_manageability(struct adapter *adapter) +{ + if (adapter->has_manage) { + int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); + + /* re-enable hardware interception of ARP */ + manc |= E1000_MANC_ARP_EN; + E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); + } +} + +/* + * lem_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means + * that the driver is loaded. For AMT version type f/w + * this means that the network i/f is open. + */ +static void +lem_get_hw_control(struct adapter *adapter) +{ + u32 ctrl_ext; + + ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); + E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, + ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); + return; +} + +/* + * lem_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that + * the driver is no longer loaded. For AMT versions of the + * f/w this means that the network i/f is closed. + */ +static void +lem_release_hw_control(struct adapter *adapter) +{ + u32 ctrl_ext; + + if (!adapter->has_manage) + return; + + ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); + E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, + ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); + return; +} + +static int +lem_is_valid_ether_addr(u8 *addr) +{ + char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; + + if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { + return (FALSE); + } + + return (TRUE); +} + +/* +** Parse the interface capabilities with regard +** to both system management and wake-on-lan for +** later use. +*/ +static void +lem_get_wakeup(device_t dev) +{ + struct adapter *adapter = device_get_softc(dev); + u16 eeprom_data = 0, device_id, apme_mask; + + adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); + apme_mask = EM_EEPROM_APME; + + switch (adapter->hw.mac.type) { + case e1000_82542: + case e1000_82543: + break; + case e1000_82544: + e1000_read_nvm(&adapter->hw, + NVM_INIT_CONTROL2_REG, 1, &eeprom_data); + apme_mask = EM_82544_APME; + break; + case e1000_82546: + case e1000_82546_rev_3: + if (adapter->hw.bus.func == 1) { + e1000_read_nvm(&adapter->hw, + NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); + break; + } else + e1000_read_nvm(&adapter->hw, + NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); + break; + default: + e1000_read_nvm(&adapter->hw, + NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); + break; + } + if (eeprom_data & apme_mask) + adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); + /* + * We have the eeprom settings, now apply the special cases + * where the eeprom may be wrong or the board won't support + * wake on lan on a particular port + */ + device_id = pci_get_device(dev); + switch (device_id) { + case E1000_DEV_ID_82546GB_PCIE: + adapter->wol = 0; + break; + case E1000_DEV_ID_82546EB_FIBER: + case E1000_DEV_ID_82546GB_FIBER: + /* Wake events only supported on port A for dual fiber + * regardless of eeprom setting */ + if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & + E1000_STATUS_FUNC_1) + adapter->wol = 0; + break; + case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: + /* if quad port adapter, disable WoL on all but port A */ + if (global_quad_port_a != 0) + adapter->wol = 0; + /* Reset for multiple quad port adapters */ + if (++global_quad_port_a == 4) + global_quad_port_a = 0; + break; + } + return; +} + + +/* + * Enable PCI Wake On Lan capability + */ +void +lem_enable_wakeup(device_t dev) +{ + struct adapter *adapter = device_get_softc(dev); + struct ifnet *ifp = adapter->ifp; + u32 pmc, ctrl, ctrl_ext, rctl; + u16 status; + + if ((pci_find_extcap(dev, PCIY_PMG, &pmc) != 0)) + return; + + /* Advertise the wakeup capability */ + ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); + ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); + E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); + E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); + + /* Keep the laser running on Fiber adapters */ + if (adapter->hw.phy.media_type == e1000_media_type_fiber || + adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { + ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); + ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; + E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); + } + + /* + ** Determine type of Wakeup: note that wol + ** is set with all bits on by default. + */ + if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0) + adapter->wol &= ~E1000_WUFC_MAG; + + if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0) + adapter->wol &= ~E1000_WUFC_MC; + else { + rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); + rctl |= E1000_RCTL_MPE; + E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); + } + + if (adapter->hw.mac.type == e1000_pchlan) { + if (lem_enable_phy_wakeup(adapter)) + return; + } else { + E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); + E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); + } + + + /* Request PME */ + status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); + status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); + if (ifp->if_capenable & IFCAP_WOL) + status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; + pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); + + return; +} + +/* +** WOL in the newer chipset interfaces (pchlan) +** require thing to be copied into the phy +*/ +static int +lem_enable_phy_wakeup(struct adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 mreg, ret = 0; + u16 preg; + + /* copy MAC RARs to PHY RARs */ + for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) { + mreg = E1000_READ_REG(hw, E1000_RAL(i)); + e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF)); + e1000_write_phy_reg(hw, BM_RAR_M(i), + (u16)((mreg >> 16) & 0xFFFF)); + mreg = E1000_READ_REG(hw, E1000_RAH(i)); + e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF)); + e1000_write_phy_reg(hw, BM_RAR_CTRL(i), + (u16)((mreg >> 16) & 0xFFFF)); + } + + /* copy MAC MTA to PHY MTA */ + for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { + mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); + e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); + e1000_write_phy_reg(hw, BM_MTA(i) + 1, + (u16)((mreg >> 16) & 0xFFFF)); + } + + /* configure PHY Rx Control register */ + e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); + mreg = E1000_READ_REG(hw, E1000_RCTL); + if (mreg & E1000_RCTL_UPE) + preg |= BM_RCTL_UPE; + if (mreg & E1000_RCTL_MPE) + preg |= BM_RCTL_MPE; + preg &= ~(BM_RCTL_MO_MASK); + if (mreg & E1000_RCTL_MO_3) + preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) + << BM_RCTL_MO_SHIFT); + if (mreg & E1000_RCTL_BAM) + preg |= BM_RCTL_BAM; + if (mreg & E1000_RCTL_PMCF) + preg |= BM_RCTL_PMCF; + mreg = E1000_READ_REG(hw, E1000_CTRL); + if (mreg & E1000_CTRL_RFCE) + preg |= BM_RCTL_RFCE; + e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); + + /* enable PHY wakeup in MAC register */ + E1000_WRITE_REG(hw, E1000_WUC, + E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN); + E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); + + /* configure and enable PHY wakeup in PHY registers */ + e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); + e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); + + /* activate PHY wakeup */ + ret = hw->phy.ops.acquire(hw); + if (ret) { + printf("Could not acquire PHY\n"); + return ret; + } + e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, + (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); + ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); + if (ret) { + printf("Could not read PHY page 769\n"); + goto out; + } + preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; + ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); + if (ret) + printf("Could not set PHY Host Wakeup bit\n"); +out: + hw->phy.ops.release(hw); + + return ret; +} + + +/********************************************************************* +* 82544 Coexistence issue workaround. +* There are 2 issues. +* 1. Transmit Hang issue. +* To detect this issue, following equation can be used... +* SIZE[3:0] + ADDR[2:0] = SUM[3:0]. +* If SUM[3:0] is in between 1 to 4, we will have this issue. +* +* 2. DAC issue. +* To detect this issue, following equation can be used... +* SIZE[3:0] + ADDR[2:0] = SUM[3:0]. +* If SUM[3:0] is in between 9 to c, we will have this issue. +* +* +* WORKAROUND: +* Make sure we do not have ending address +* as 1,2,3,4(Hang) or 9,a,b,c (DAC) +* +*************************************************************************/ +static u32 +lem_fill_descriptors (bus_addr_t address, u32 length, + PDESC_ARRAY desc_array) +{ + u32 safe_terminator; + + /* Since issue is sensitive to length and address.*/ + /* Let us first check the address...*/ + if (length <= 4) { + desc_array->descriptor[0].address = address; + desc_array->descriptor[0].length = length; + desc_array->elements = 1; + return (desc_array->elements); + } + safe_terminator = (u32)((((u32)address & 0x7) + + (length & 0xF)) & 0xF); + /* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */ + if (safe_terminator == 0 || + (safe_terminator > 4 && + safe_terminator < 9) || + (safe_terminator > 0xC && + safe_terminator <= 0xF)) { + desc_array->descriptor[0].address = address; + desc_array->descriptor[0].length = length; + desc_array->elements = 1; + return (desc_array->elements); + } + + desc_array->descriptor[0].address = address; + desc_array->descriptor[0].length = length - 4; + desc_array->descriptor[1].address = address + (length - 4); + desc_array->descriptor[1].length = 4; + desc_array->elements = 2; + return (desc_array->elements); +} + +/********************************************************************** + * + * Update the board statistics counters. + * + **********************************************************************/ +static void +lem_update_stats_counters(struct adapter *adapter) +{ + struct ifnet *ifp; + + if(adapter->hw.phy.media_type == e1000_media_type_copper || + (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { + adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); + adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); + } + adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); + adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); + adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); + adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); + + adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); + adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); + adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); + adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); + adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); + adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); + adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); + adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); + adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); + adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); + adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); + adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); + adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); + adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); + adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); + adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); + adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); + adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); + adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); + adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); + + /* For the 64-bit byte counters the low dword must be read first. */ + /* Both registers clear on the read of the high dword */ + + adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH); + adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH); + + adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); + adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); + adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); + adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); + adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); + + adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); + adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); + + adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); + adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); + adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); + adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); + adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); + adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); + adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); + adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); + adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); + adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); + + if (adapter->hw.mac.type >= e1000_82543) { + adapter->stats.algnerrc += + E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); + adapter->stats.rxerrc += + E1000_READ_REG(&adapter->hw, E1000_RXERRC); + adapter->stats.tncrs += + E1000_READ_REG(&adapter->hw, E1000_TNCRS); + adapter->stats.cexterr += + E1000_READ_REG(&adapter->hw, E1000_CEXTERR); + adapter->stats.tsctc += + E1000_READ_REG(&adapter->hw, E1000_TSCTC); + adapter->stats.tsctfc += + E1000_READ_REG(&adapter->hw, E1000_TSCTFC); + } + ifp = adapter->ifp; + + ifp->if_collisions = adapter->stats.colc; + + /* Rx Errors */ + ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc + + adapter->stats.crcerrs + adapter->stats.algnerrc + + adapter->stats.ruc + adapter->stats.roc + + adapter->stats.mpc + adapter->stats.cexterr; + + /* Tx Errors */ + ifp->if_oerrors = adapter->stats.ecol + + adapter->stats.latecol + adapter->watchdog_events; +} + + +/********************************************************************** + * + * This routine is called only when lem_display_debug_stats is enabled. + * This routine provides a way to take a look at important statistics + * maintained by the driver and hardware. + * + **********************************************************************/ +static void +lem_print_debug_info(struct adapter *adapter) +{ + device_t dev = adapter->dev; + u8 *hw_addr = adapter->hw.hw_addr; + + device_printf(dev, "Adapter hardware address = %p \n", hw_addr); + device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n", + E1000_READ_REG(&adapter->hw, E1000_CTRL), + E1000_READ_REG(&adapter->hw, E1000_RCTL)); + device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n", + ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\ + (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) ); + device_printf(dev, "Flow control watermarks high = %d low = %d\n", + adapter->hw.fc.high_water, + adapter->hw.fc.low_water); + device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n", + E1000_READ_REG(&adapter->hw, E1000_TIDV), + E1000_READ_REG(&adapter->hw, E1000_TADV)); + device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n", + E1000_READ_REG(&adapter->hw, E1000_RDTR), + E1000_READ_REG(&adapter->hw, E1000_RADV)); + device_printf(dev, "fifo workaround = %lld, fifo_reset_count = %lld\n", + (long long)adapter->tx_fifo_wrk_cnt, + (long long)adapter->tx_fifo_reset_cnt); + device_printf(dev, "hw tdh = %d, hw tdt = %d\n", + E1000_READ_REG(&adapter->hw, E1000_TDH(0)), + E1000_READ_REG(&adapter->hw, E1000_TDT(0))); + device_printf(dev, "hw rdh = %d, hw rdt = %d\n", + E1000_READ_REG(&adapter->hw, E1000_RDH(0)), + E1000_READ_REG(&adapter->hw, E1000_RDT(0))); + device_printf(dev, "Num Tx descriptors avail = %d\n", + adapter->num_tx_desc_avail); + device_printf(dev, "Tx Descriptors not avail1 = %ld\n", + adapter->no_tx_desc_avail1); + device_printf(dev, "Tx Descriptors not avail2 = %ld\n", + adapter->no_tx_desc_avail2); + device_printf(dev, "Std mbuf failed = %ld\n", + adapter->mbuf_alloc_failed); + device_printf(dev, "Std mbuf cluster failed = %ld\n", + adapter->mbuf_cluster_failed); + device_printf(dev, "Driver dropped packets = %ld\n", + adapter->dropped_pkts); + device_printf(dev, "Driver tx dma failure in encap = %ld\n", + adapter->no_tx_dma_setup); +} + +static void +lem_print_hw_stats(struct adapter *adapter) +{ + device_t dev = adapter->dev; + + device_printf(dev, "Excessive collisions = %lld\n", + (long long)adapter->stats.ecol); +#if (DEBUG_HW > 0) /* Dont output these errors normally */ + device_printf(dev, "Symbol errors = %lld\n", + (long long)adapter->stats.symerrs); +#endif + device_printf(dev, "Sequence errors = %lld\n", + (long long)adapter->stats.sec); + device_printf(dev, "Defer count = %lld\n", + (long long)adapter->stats.dc); + device_printf(dev, "Missed Packets = %lld\n", + (long long)adapter->stats.mpc); + device_printf(dev, "Receive No Buffers = %lld\n", + (long long)adapter->stats.rnbc); + /* RLEC is inaccurate on some hardware, calculate our own. */ + device_printf(dev, "Receive Length Errors = %lld\n", + ((long long)adapter->stats.roc + (long long)adapter->stats.ruc)); + device_printf(dev, "Receive errors = %lld\n", + (long long)adapter->stats.rxerrc); + device_printf(dev, "Crc errors = %lld\n", + (long long)adapter->stats.crcerrs); + device_printf(dev, "Alignment errors = %lld\n", + (long long)adapter->stats.algnerrc); + device_printf(dev, "Collision/Carrier extension errors = %lld\n", + (long long)adapter->stats.cexterr); + device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns); + device_printf(dev, "watchdog timeouts = %ld\n", + adapter->watchdog_events); + device_printf(dev, "RX MSIX IRQ = %ld TX MSIX IRQ = %ld" + " LINK MSIX IRQ = %ld\n", adapter->rx_irq, + adapter->tx_irq , adapter->link_irq); + device_printf(dev, "XON Rcvd = %lld\n", + (long long)adapter->stats.xonrxc); + device_printf(dev, "XON Xmtd = %lld\n", + (long long)adapter->stats.xontxc); + device_printf(dev, "XOFF Rcvd = %lld\n", + (long long)adapter->stats.xoffrxc); + device_printf(dev, "XOFF Xmtd = %lld\n", + (long long)adapter->stats.xofftxc); + device_printf(dev, "Good Packets Rcvd = %lld\n", + (long long)adapter->stats.gprc); + device_printf(dev, "Good Packets Xmtd = %lld\n", + (long long)adapter->stats.gptc); + device_printf(dev, "TSO Contexts Xmtd = %lld\n", + (long long)adapter->stats.tsctc); + device_printf(dev, "TSO Contexts Failed = %lld\n", + (long long)adapter->stats.tsctfc); +} + +/********************************************************************** + * + * This routine provides a way to dump out the adapter eeprom, + * often a useful debug/service tool. This only dumps the first + * 32 words, stuff that matters is in that extent. + * + **********************************************************************/ +static void +lem_print_nvm_info(struct adapter *adapter) +{ + u16 eeprom_data; + int i, j, row = 0; + + /* Its a bit crude, but it gets the job done */ + printf("\nInterface EEPROM Dump:\n"); + printf("Offset\n0x0000 "); + for (i = 0, j = 0; i < 32; i++, j++) { + if (j == 8) { /* Make the offset block */ + j = 0; ++row; + printf("\n0x00%x0 ",row); + } + e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); + printf("%04x ", eeprom_data); + } + printf("\n"); +} + +static int +lem_sysctl_debug_info(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter; + int error; + int result; + + result = -1; + error = sysctl_handle_int(oidp, &result, 0, req); + + if (error || !req->newptr) + return (error); + + if (result == 1) { + adapter = (struct adapter *)arg1; + lem_print_debug_info(adapter); + } + /* + * This value will cause a hex dump of the + * first 32 16-bit words of the EEPROM to + * the screen. + */ + if (result == 2) { + adapter = (struct adapter *)arg1; + lem_print_nvm_info(adapter); + } + + return (error); +} + + +static int +lem_sysctl_stats(SYSCTL_HANDLER_ARGS) +{ + struct adapter *adapter; + int error; + int result; + + result = -1; + error = sysctl_handle_int(oidp, &result, 0, req); + + if (error || !req->newptr) + return (error); + + if (result == 1) { + adapter = (struct adapter *)arg1; + lem_print_hw_stats(adapter); + } + + return (error); +} + +static int +lem_sysctl_int_delay(SYSCTL_HANDLER_ARGS) +{ + struct em_int_delay_info *info; + struct adapter *adapter; + u32 regval; + int error; + int usecs; + int ticks; + + info = (struct em_int_delay_info *)arg1; + usecs = info->value; + error = sysctl_handle_int(oidp, &usecs, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) + return (EINVAL); + info->value = usecs; + ticks = EM_USECS_TO_TICKS(usecs); + + adapter = info->adapter; + + EM_CORE_LOCK(adapter); + regval = E1000_READ_OFFSET(&adapter->hw, info->offset); + regval = (regval & ~0xffff) | (ticks & 0xffff); + /* Handle a few special cases. */ + switch (info->offset) { + case E1000_RDTR: + break; + case E1000_TIDV: + if (ticks == 0) { + adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; + /* Don't write 0 into the TIDV register. */ + regval++; + } else + adapter->txd_cmd |= E1000_TXD_CMD_IDE; + break; + } + E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); + EM_CORE_UNLOCK(adapter); + return (0); +} + +static void +lem_add_int_delay_sysctl(struct adapter *adapter, const char *name, + const char *description, struct em_int_delay_info *info, + int offset, int value) +{ + info->adapter = adapter; + info->offset = offset; + info->value = value; + SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), + OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, + info, 0, lem_sysctl_int_delay, "I", description); +} + +#ifndef EM_LEGACY_IRQ +static void +lem_add_rx_process_limit(struct adapter *adapter, const char *name, + const char *description, int *limit, int value) +{ + *limit = value; + SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), + OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description); +} +#endif + + diff --git a/sys/dev/e1000/if_lem.h b/sys/dev/e1000/if_lem.h new file mode 100644 index 00000000000..446c0aacccc --- /dev/null +++ b/sys/dev/e1000/if_lem.h @@ -0,0 +1,480 @@ +/****************************************************************************** + + Copyright (c) 2001-2010, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ +/*$FreeBSD$*/ + + +#ifndef _LEM_H_DEFINED_ +#define _LEM_H_DEFINED_ + + +/* Tunables */ + +/* + * EM_TXD: Maximum number of Transmit Descriptors + * Valid Range: 80-256 for 82542 and 82543-based adapters + * 80-4096 for others + * Default Value: 256 + * This value is the number of transmit descriptors allocated by the driver. + * Increasing this value allows the driver to queue more transmits. Each + * descriptor is 16 bytes. + * Since TDLEN should be multiple of 128bytes, the number of transmit + * desscriptors should meet the following condition. + * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 + */ +#define EM_MIN_TXD 80 +#define EM_MAX_TXD_82543 256 +#define EM_MAX_TXD 4096 +#define EM_DEFAULT_TXD EM_MAX_TXD_82543 + +/* + * EM_RXD - Maximum number of receive Descriptors + * Valid Range: 80-256 for 82542 and 82543-based adapters + * 80-4096 for others + * Default Value: 256 + * This value is the number of receive descriptors allocated by the driver. + * Increasing this value allows the driver to buffer more incoming packets. + * Each descriptor is 16 bytes. A receive buffer is also allocated for each + * descriptor. The maximum MTU size is 16110. + * Since TDLEN should be multiple of 128bytes, the number of transmit + * desscriptors should meet the following condition. + * (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0 + */ +#define EM_MIN_RXD 80 +#define EM_MAX_RXD_82543 256 +#define EM_MAX_RXD 4096 +#define EM_DEFAULT_RXD EM_MAX_RXD_82543 + +/* + * EM_TIDV - Transmit Interrupt Delay Value + * Valid Range: 0-65535 (0=off) + * Default Value: 64 + * This value delays the generation of transmit interrupts in units of + * 1.024 microseconds. Transmit interrupt reduction can improve CPU + * efficiency if properly tuned for specific network traffic. If the + * system is reporting dropped transmits, this value may be set too high + * causing the driver to run out of available transmit descriptors. + */ +#define EM_TIDV 64 + +/* + * EM_TADV - Transmit Absolute Interrupt Delay Value + * (Not valid for 82542/82543/82544) + * Valid Range: 0-65535 (0=off) + * Default Value: 64 + * This value, in units of 1.024 microseconds, limits the delay in which a + * transmit interrupt is generated. Useful only if EM_TIDV is non-zero, + * this value ensures that an interrupt is generated after the initial + * packet is sent on the wire within the set amount of time. Proper tuning, + * along with EM_TIDV, may improve traffic throughput in specific + * network conditions. + */ +#define EM_TADV 64 + +/* + * EM_RDTR - Receive Interrupt Delay Timer (Packet Timer) + * Valid Range: 0-65535 (0=off) + * Default Value: 0 + * This value delays the generation of receive interrupts in units of 1.024 + * microseconds. Receive interrupt reduction can improve CPU efficiency if + * properly tuned for specific network traffic. Increasing this value adds + * extra latency to frame reception and can end up decreasing the throughput + * of TCP traffic. If the system is reporting dropped receives, this value + * may be set too high, causing the driver to run out of available receive + * descriptors. + * + * CAUTION: When setting EM_RDTR to a value other than 0, adapters + * may hang (stop transmitting) under certain network conditions. + * If this occurs a WATCHDOG message is logged in the system + * event log. In addition, the controller is automatically reset, + * restoring the network connection. To eliminate the potential + * for the hang ensure that EM_RDTR is set to 0. + */ +#define EM_RDTR 0 + +/* + * Receive Interrupt Absolute Delay Timer (Not valid for 82542/82543/82544) + * Valid Range: 0-65535 (0=off) + * Default Value: 64 + * This value, in units of 1.024 microseconds, limits the delay in which a + * receive interrupt is generated. Useful only if EM_RDTR is non-zero, + * this value ensures that an interrupt is generated after the initial + * packet is received within the set amount of time. Proper tuning, + * along with EM_RDTR, may improve traffic throughput in specific network + * conditions. + */ +#define EM_RADV 64 + +/* + * This parameter controls the max duration of transmit watchdog. + */ +#define EM_WATCHDOG (10 * hz) + +/* + * This parameter controls when the driver calls the routine to reclaim + * transmit descriptors. + */ +#define EM_TX_CLEANUP_THRESHOLD (adapter->num_tx_desc / 8) +#define EM_TX_OP_THRESHOLD (adapter->num_tx_desc / 32) + +/* + * This parameter controls whether or not autonegotation is enabled. + * 0 - Disable autonegotiation + * 1 - Enable autonegotiation + */ +#define DO_AUTO_NEG 1 + +/* + * This parameter control whether or not the driver will wait for + * autonegotiation to complete. + * 1 - Wait for autonegotiation to complete + * 0 - Don't wait for autonegotiation to complete + */ +#define WAIT_FOR_AUTO_NEG_DEFAULT 0 + +/* Tunables -- End */ + +#define AUTONEG_ADV_DEFAULT (ADVERTISE_10_HALF | ADVERTISE_10_FULL | \ + ADVERTISE_100_HALF | ADVERTISE_100_FULL | \ + ADVERTISE_1000_FULL) + +#define AUTO_ALL_MODES 0 + +/* PHY master/slave setting */ +#define EM_MASTER_SLAVE e1000_ms_hw_default + +/* + * Micellaneous constants + */ +#define EM_VENDOR_ID 0x8086 +#define EM_FLASH 0x0014 + +#define EM_JUMBO_PBA 0x00000028 +#define EM_DEFAULT_PBA 0x00000030 +#define EM_SMARTSPEED_DOWNSHIFT 3 +#define EM_SMARTSPEED_MAX 15 +#define EM_MAX_LOOP 10 + +#define MAX_NUM_MULTICAST_ADDRESSES 128 +#define PCI_ANY_ID (~0U) +#define ETHER_ALIGN 2 +#define EM_FC_PAUSE_TIME 0x0680 +#define EM_EEPROM_APME 0x400; +#define EM_82544_APME 0x0004; + +/* Code compatilbility between 6 and 7 */ +#ifndef ETHER_BPF_MTAP +#define ETHER_BPF_MTAP BPF_MTAP +#endif + +/* + * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be + * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will + * also optimize cache line size effect. H/W supports up to cache line size 128. + */ +#define EM_DBA_ALIGN 128 + +#define SPEED_MODE_BIT (1<<21) /* On PCI-E MACs only */ + +/* PCI Config defines */ +#define EM_BAR_TYPE(v) ((v) & EM_BAR_TYPE_MASK) +#define EM_BAR_TYPE_MASK 0x00000001 +#define EM_BAR_TYPE_MMEM 0x00000000 +#define EM_BAR_TYPE_IO 0x00000001 +#define EM_BAR_TYPE_FLASH 0x0014 +#define EM_BAR_MEM_TYPE(v) ((v) & EM_BAR_MEM_TYPE_MASK) +#define EM_BAR_MEM_TYPE_MASK 0x00000006 +#define EM_BAR_MEM_TYPE_32BIT 0x00000000 +#define EM_BAR_MEM_TYPE_64BIT 0x00000004 +#define EM_MSIX_BAR 3 /* On 82575 */ + +/* Defines for printing debug information */ +#define DEBUG_INIT 0 +#define DEBUG_IOCTL 0 +#define DEBUG_HW 0 + +#define INIT_DEBUGOUT(S) if (DEBUG_INIT) printf(S "\n") +#define INIT_DEBUGOUT1(S, A) if (DEBUG_INIT) printf(S "\n", A) +#define INIT_DEBUGOUT2(S, A, B) if (DEBUG_INIT) printf(S "\n", A, B) +#define IOCTL_DEBUGOUT(S) if (DEBUG_IOCTL) printf(S "\n") +#define IOCTL_DEBUGOUT1(S, A) if (DEBUG_IOCTL) printf(S "\n", A) +#define IOCTL_DEBUGOUT2(S, A, B) if (DEBUG_IOCTL) printf(S "\n", A, B) +#define HW_DEBUGOUT(S) if (DEBUG_HW) printf(S "\n") +#define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) +#define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) + +#define EM_MAX_SCATTER 64 +#define EM_VFTA_SIZE 128 +#define EM_TSO_SIZE (65535 + sizeof(struct ether_vlan_header)) +#define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */ +#define EM_MSIX_MASK 0x01F00000 /* For 82574 use */ +#define ETH_ZLEN 60 +#define ETH_ADDR_LEN 6 +#define CSUM_OFFLOAD 7 /* Offload bits in mbuf flag */ + +/* + * 82574 has a nonstandard address for EIAC + * and since its only used in MSIX, and in + * the em driver only 82574 uses MSIX we can + * solve it just using this define. + */ +#define EM_EIAC 0x000DC + +/* Used in for 82547 10Mb Half workaround */ +#define EM_PBA_BYTES_SHIFT 0xA +#define EM_TX_HEAD_ADDR_SHIFT 7 +#define EM_PBA_TX_MASK 0xFFFF0000 +#define EM_FIFO_HDR 0x10 +#define EM_82547_PKT_THRESH 0x3e0 + +/* Precision Time Sync (IEEE 1588) defines */ +#define ETHERTYPE_IEEE1588 0x88F7 +#define PICOSECS_PER_TICK 20833 +#define TSYNC_PORT 319 /* UDP port for the protocol */ + +/* + * Bus dma allocation structure used by + * e1000_dma_malloc and e1000_dma_free. + */ +struct em_dma_alloc { + bus_addr_t dma_paddr; + caddr_t dma_vaddr; + bus_dma_tag_t dma_tag; + bus_dmamap_t dma_map; + bus_dma_segment_t dma_seg; + int dma_nseg; +}; + +struct adapter; + +struct em_int_delay_info { + struct adapter *adapter; /* Back-pointer to the adapter struct */ + int offset; /* Register offset to read/write */ + int value; /* Current value in usecs */ +}; + +/* Our adapter structure */ +struct adapter { + struct ifnet *ifp; +#if __FreeBSD_version >= 800000 + struct buf_ring *br; +#endif + struct e1000_hw hw; + + /* FreeBSD operating-system-specific structures. */ + struct e1000_osdep osdep; + struct device *dev; + + struct resource *memory; + struct resource *flash; + struct resource *msix; + + struct resource *ioport; + int io_rid; + + /* 82574 may use 3 int vectors */ + struct resource *res[3]; + void *tag[3]; + int rid[3]; + + struct ifmedia media; + struct callout timer; + struct callout tx_fifo_timer; + bool watchdog_check; + int watchdog_time; + int msi; + int if_flags; + int max_frame_size; + int min_frame_size; + struct mtx core_mtx; + struct mtx tx_mtx; + struct mtx rx_mtx; + int em_insert_vlan_header; + + /* Task for FAST handling */ + struct task link_task; + struct task rxtx_task; + struct task rx_task; + struct task tx_task; + struct taskqueue *tq; /* private task queue */ + +#if __FreeBSD_version >= 700029 + eventhandler_tag vlan_attach; + eventhandler_tag vlan_detach; + u32 num_vlans; +#endif + + /* Management and WOL features */ + u32 wol; + bool has_manage; + bool has_amt; + + /* Info about the board itself */ + uint8_t link_active; + uint16_t link_speed; + uint16_t link_duplex; + uint32_t smartspeed; + struct em_int_delay_info tx_int_delay; + struct em_int_delay_info tx_abs_int_delay; + struct em_int_delay_info rx_int_delay; + struct em_int_delay_info rx_abs_int_delay; + + /* + * Transmit definitions + * + * We have an array of num_tx_desc descriptors (handled + * by the controller) paired with an array of tx_buffers + * (at tx_buffer_area). + * The index of the next available descriptor is next_avail_tx_desc. + * The number of remaining tx_desc is num_tx_desc_avail. + */ + struct em_dma_alloc txdma; /* bus_dma glue for tx desc */ + struct e1000_tx_desc *tx_desc_base; + uint32_t next_avail_tx_desc; + uint32_t next_tx_to_clean; + volatile uint16_t num_tx_desc_avail; + uint16_t num_tx_desc; + uint16_t last_hw_offload; + uint32_t txd_cmd; + struct em_buffer *tx_buffer_area; + bus_dma_tag_t txtag; /* dma tag for tx */ + uint32_t tx_tso; /* last tx was tso */ + + /* + * Receive definitions + * + * we have an array of num_rx_desc rx_desc (handled by the + * controller), and paired with an array of rx_buffers + * (at rx_buffer_area). + * The next pair to check on receive is at offset next_rx_desc_to_check + */ + struct em_dma_alloc rxdma; /* bus_dma glue for rx desc */ + struct e1000_rx_desc *rx_desc_base; + uint32_t next_rx_desc_to_check; + uint32_t rx_buffer_len; + uint16_t num_rx_desc; + int rx_process_limit; + struct em_buffer *rx_buffer_area; + bus_dma_tag_t rxtag; + bus_dmamap_t rx_sparemap; + + /* + * First/last mbuf pointers, for + * collecting multisegment RX packets. + */ + struct mbuf *fmp; + struct mbuf *lmp; + + /* Misc stats maintained by the driver */ + unsigned long dropped_pkts; + unsigned long mbuf_alloc_failed; + unsigned long mbuf_cluster_failed; + unsigned long no_tx_desc_avail1; + unsigned long no_tx_desc_avail2; + unsigned long no_tx_map_avail; + unsigned long no_tx_dma_setup; + unsigned long watchdog_events; + unsigned long rx_overruns; + unsigned long rx_irq; + unsigned long tx_irq; + unsigned long link_irq; + + /* 82547 workaround */ + uint32_t tx_fifo_size; + uint32_t tx_fifo_head; + uint32_t tx_fifo_head_addr; + uint64_t tx_fifo_reset_cnt; + uint64_t tx_fifo_wrk_cnt; + uint32_t tx_head_addr; + + /* For 82544 PCIX Workaround */ + boolean_t pcix_82544; + boolean_t in_detach; + + + struct e1000_hw_stats stats; +}; + +/* ****************************************************************************** + * vendor_info_array + * + * This array contains the list of Subvendor/Subdevice IDs on which the driver + * should load. + * + * ******************************************************************************/ +typedef struct _em_vendor_info_t { + unsigned int vendor_id; + unsigned int device_id; + unsigned int subvendor_id; + unsigned int subdevice_id; + unsigned int index; +} em_vendor_info_t; + +struct em_buffer { + int next_eop; /* Index of the desc to watch */ + struct mbuf *m_head; + bus_dmamap_t map; /* bus_dma map for packet */ +}; + +/* For 82544 PCIX Workaround */ +typedef struct _ADDRESS_LENGTH_PAIR +{ + uint64_t address; + uint32_t length; +} ADDRESS_LENGTH_PAIR, *PADDRESS_LENGTH_PAIR; + +typedef struct _DESCRIPTOR_PAIR +{ + ADDRESS_LENGTH_PAIR descriptor[4]; + uint32_t elements; +} DESC_ARRAY, *PDESC_ARRAY; + +#define EM_CORE_LOCK_INIT(_sc, _name) \ + mtx_init(&(_sc)->core_mtx, _name, "EM Core Lock", MTX_DEF) +#define EM_TX_LOCK_INIT(_sc, _name) \ + mtx_init(&(_sc)->tx_mtx, _name, "EM TX Lock", MTX_DEF) +#define EM_RX_LOCK_INIT(_sc, _name) \ + mtx_init(&(_sc)->rx_mtx, _name, "EM RX Lock", MTX_DEF) +#define EM_CORE_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->core_mtx) +#define EM_TX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->tx_mtx) +#define EM_RX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_mtx) +#define EM_CORE_LOCK(_sc) mtx_lock(&(_sc)->core_mtx) +#define EM_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_mtx) +#define EM_TX_TRYLOCK(_sc) mtx_trylock(&(_sc)->tx_mtx) +#define EM_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_mtx) +#define EM_CORE_UNLOCK(_sc) mtx_unlock(&(_sc)->core_mtx) +#define EM_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_mtx) +#define EM_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_mtx) +#define EM_CORE_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->core_mtx, MA_OWNED) +#define EM_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_mtx, MA_OWNED) + +#endif /* _LEM_H_DEFINED_ */ diff --git a/sys/modules/em/Makefile b/sys/modules/em/Makefile index b5e2c3b6681..3e91c14fbbf 100644 --- a/sys/modules/em/Makefile +++ b/sys/modules/em/Makefile @@ -2,15 +2,19 @@ .PATH: ${.CURDIR}/../../dev/e1000 KMOD = if_em SRCS = device_if.h bus_if.h pci_if.h opt_inet.h -SRCS += if_em.c $(SHARED_SRCS) -SHARED_SRCS = e1000_api.c e1000_phy.c e1000_nvm.c e1000_mac.c e1000_manage.c -SHARED_SRCS += e1000_80003es2lan.c e1000_82542.c e1000_82541.c e1000_82543.c -SHARED_SRCS += e1000_82540.c e1000_ich8lan.c e1000_82571.c e1000_osdep.c -SHARED_SRCS += e1000_82575.c +SRCS += $(CORE_SRC) $(LEGACY_SRC) +SRCS += $(COMMON_SHARED) $(LEGACY_SHARED) $(PCIE_SHARED) +CORE_SRC = if_em.c e1000_osdep.c +# This is the Legacy, pre-PCIE source, it can be +# undefined when using modular driver if not needed +LEGACY_SRC += if_lem.c +COMMON_SHARED = e1000_api.c e1000_phy.c e1000_nvm.c e1000_mac.c e1000_manage.c +PCIE_SHARED = e1000_80003es2lan.c e1000_ich8lan.c e1000_82571.c e1000_82575.c +LEGACY_SHARED = e1000_82540.c e1000_82542.c e1000_82541.c e1000_82543.c -CFLAGS+= -I${.CURDIR}/../../dev/e1000 +CFLAGS += -I${.CURDIR}/../../dev/e1000 -# DEVICE_POLLING gives you Legacy interrupt handling +# DEVICE_POLLING for a non-interrupt-driven method #CFLAGS += -DDEVICE_POLLING clean: