diff -ruNbB ixgbe-1.3.56.5/src/ixgbe_82598.c ixgbe-1.3.56.5patched/src/ixgbe_82598.c --- ixgbe-1.3.56.5/src/ixgbe_82598.c 2008-12-12 18:02:40.000000000 -0500 +++ ixgbe-1.3.56.5patched/src/ixgbe_82598.c 2009-07-27 10:00:55.000000000 -0400 @@ -827,6 +827,7 @@ rar_high &= ~IXGBE_RAH_VIND_MASK; rar_high |= ((vmdq << IXGBE_RAH_VIND_SHIFT) & IXGBE_RAH_VIND_MASK); IXGBE_WRITE_REG(hw, IXGBE_RAH(rar), rar_high); + //printk("set VMDQ=%d of rar=%d\n", vmdq, rar); return 0; } diff -ruNbB ixgbe-1.3.56.5/src/ixgbe_common.c ixgbe-1.3.56.5patched/src/ixgbe_common.c --- ixgbe-1.3.56.5/src/ixgbe_common.c 2008-12-12 18:02:40.000000000 -0500 +++ ixgbe-1.3.56.5patched/src/ixgbe_common.c 2009-07-27 13:08:16.000000000 -0400 @@ -24,7 +24,7 @@ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 *******************************************************************************/ - +#include "ixgbe.h" #include "ixgbe_common.h" #include "ixgbe_api.h" @@ -1393,9 +1393,12 @@ { u32 rar_entries = hw->mac.num_rar_entries; u32 rar; + struct ixgbe_adapter *adapter = hw->back; hw_dbg(hw, " UC Addr = %.2X %.2X %.2X %.2X %.2X %.2X\n", addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + //printk(" UC Addr = %.2X %.2X %.2X %.2X %.2X %.2X\n", + // addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); /* * Place this address in the RAR if there is room, @@ -1406,6 +1409,8 @@ hw->addr_ctrl.mc_addr_in_rar_count; hw->mac.ops.set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV); hw_dbg(hw, "Added a secondary address to RAR[%d]\n", rar); + //printk("Added a secondary address to RAR[%d]\n", rar); + DPRINTK(LINK, DEBUG, "Setting UC Addr = %.2X:%.2X:%.2X:%.2X:%.2X:%.2X with VMDQ=%d of rar=[%d]\n",addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], vmdq, rar); hw->addr_ctrl.rar_used_count++; } else { hw->addr_ctrl.overflow_promisc++; @@ -1458,6 +1463,8 @@ for (i = 0; i < addr_count; i++) { hw_dbg(hw, " Adding the secondary addresses:\n"); addr = next(hw, &addr_list, &vmdq); + /* adding the vmd queue mapping to address at this point it is 0*/ + vmdq = i+1; ixgbe_add_uc_addr(hw, addr, vmdq); } @@ -1557,6 +1564,64 @@ IXGBE_WRITE_REG(hw, IXGBE_MTA(vector_reg), mta_reg); } +int __ixgbe_dev_addr_add(struct dev_addr_list **list, int *count, + void *addr, int alen, int glbl) +{ + struct dev_addr_list *da; + + for (da = *list; da != NULL; da = da->next) { + if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && + da->da_addrlen == alen) { + if (glbl) { + int old_glbl = da->da_gusers; + da->da_gusers = 1; + if (old_glbl) + return 0; + } + da->da_users++; + return 0; + } + } + + da = kzalloc(sizeof(*da), GFP_ATOMIC); + if (da == NULL) + return -ENOMEM; + memcpy(da->da_addr, addr, alen); + da->da_addrlen = alen; + da->da_users = 1; + da->da_gusers = glbl ? 1 : 0; + da->next = *list; + *list = da; + (*count)++; + return 0; +} + +int __ixgbe_dev_addr_delete(struct dev_addr_list **list, int *count, + void *addr, int alen, int glbl) +{ + struct dev_addr_list *da; + + for (; (da = *list) != NULL; list = &da->next) { + if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && + alen == da->da_addrlen) { + if (glbl) { + int old_glbl = da->da_gusers; + da->da_gusers = 0; + if (old_glbl == 0) + break; + } + if (--da->da_users) + return 0; + + *list = da->next; + kfree(da); + (*count)--; + return 0; + } + } + return -ENOENT; +} + /** * ixgbe_add_mc_addr - Adds a multicast address. * @hw: pointer to hardware structure @@ -1609,6 +1674,26 @@ u32 i; u32 rar_entries = hw->mac.num_rar_entries; u32 vmdq; + struct ixgbe_adapter *adapter = hw->back; + struct net_device *netdev = adapter->netdev; + u8 *uc_addr, *tmp_mc_addr_list = mc_addr_list; + + /* + * check to see if this is a unicast address and vmdq is on + */ + if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) + for (i = 0; i < mc_addr_count; i++) { + uc_addr = next(hw, &tmp_mc_addr_list, &vmdq); + if (!(uc_addr[0] & 0x01)) { + __ixgbe_dev_addr_add(&netdev->uc_list, &netdev->uc_count, uc_addr, netdev->addr_len, 0); + /* dev_unicast_add(netdev, mc_addr, netdev->addr_len); */ + //printk(" Adding the unicast addresses from update_mc_addr_list\n"); + /*printk(" UC Addr = %.2X %.2X %.2X %.2X %.2X %.2X\n", + uc_addr[0], uc_addr[1], uc_addr[2], uc_addr[3], uc_addr[4], uc_addr[5]); */ + __ixgbe_dev_addr_delete(&netdev->mc_list, &netdev->mc_count, uc_addr, netdev->addr_len, 1); + return 0; //nothing to be done here, it is a unicast address being added + } + } /* * Set the new number of MC addresses that we are being requested to diff -ruNbB ixgbe-1.3.56.5/src/ixgbe.h ixgbe-1.3.56.5patched/src/ixgbe.h --- ixgbe-1.3.56.5/src/ixgbe.h 2008-12-12 18:02:40.000000000 -0500 +++ ixgbe-1.3.56.5patched/src/ixgbe.h 2009-01-29 13:55:49.000000000 -0500 @@ -224,6 +224,10 @@ #endif u16 work_limit; /* max work per interrupt */ u16 rx_buf_len; + + /* click added stats */ + unsigned int interim_count_bytes; + unsigned int interim_count_packets; }; #define RING_F_DCB 0 @@ -285,6 +289,12 @@ #define IXGBE_MAX_JUMBO_FRAME_SIZE 16128 +/* for Click lockup detection */ +#define IXGBE_RX_STATE_NORMAL 0 +#define IXGBE_RX_STATE_QUIET 1 +#define IXGBE_RX_STATE_LOCKUP 2 +#define IXGBE_RX_STATE_EMPTY_POLL 3 + #ifdef IXGBE_TCP_TIMER #define TCP_TIMER_VECTOR 1 #else @@ -427,6 +437,16 @@ struct work_struct watchdog_task; struct work_struct sfp_task; struct timer_list sfp_timer; + + int do_poll_watchdog; /* Click polling */ + /* Receive Lockup detection and recovery */ + int rx_state; /* can be either: NORMAL, QUIET, LOCKUP */ + int rx_lockup_recoveries; /* # of times the recovery seq is invoked */ + unsigned long rx_normal_jiffies; /* jiffies timeout for the NORMAL state */ + unsigned long rx_quiet_jiffies; /* jiffies timeout for the QUIET state */ + int prev_rdfh; /* prev value of Rcv Data Fifo Head register */ + int prev_rdft; /* prev value of Rcv Data Fifo Tail register */ + }; enum ixbge_state_t { diff -ruNbB ixgbe-1.3.56.5/src/ixgbe_main.c ixgbe-1.3.56.5patched/src/ixgbe_main.c --- ixgbe-1.3.56.5/src/ixgbe_main.c 2008-12-12 18:02:40.000000000 -0500 +++ ixgbe-1.3.56.5patched/src/ixgbe_main.c 2009-08-10 13:55:28.000000000 -0400 @@ -60,13 +60,14 @@ "Intel(R) 10 Gigabit PCI Express Network Driver"; #define DRV_HW_PERF +#define MULTIQUEUE "-click-MQ-2.2" #ifndef CONFIG_IXGBE_NAPI #define DRIVERNAPI #else #define DRIVERNAPI "-NAPI" #endif -#define DRV_VERSION "1.3.56.5" DRIVERNAPI DRV_HW_PERF +#define DRV_VERSION "1.3.56.5" DRIVERNAPI DRV_HW_PERF MULTIQUEUE const char ixgbe_driver_version[] = DRV_VERSION; static char ixgbe_copyright[] = "Copyright (c) 1999-2008 Intel Corporation."; /* ixgbe_pci_tbl - PCI Device ID Table @@ -110,6 +111,26 @@ #define DEFAULT_DEBUG_LEVEL_SHIFT 3 +/* Click polling */ +static int ixgbe_mq_tx_pqueue(struct net_device *dev, unsigned int queue_num, struct sk_buff *skb); +static int ixgbe_mq_tx_start(struct net_device *dev, unsigned int queue_num); +static int ixgbe_mq_rx_refill(struct net_device *dev, unsigned int queue_num, struct sk_buff **); +static int ixgbe_mq_tx_eob(struct net_device *dev, unsigned int queue_num); +static struct sk_buff *ixgbe_mq_tx_clean(struct net_device *dev, unsigned int queue_num); +static struct sk_buff *ixgbe_mq_rx_poll(struct net_device *dev, unsigned int queue_num, int *want); +static int ixgbe_poll_on(struct net_device *dev); +static int ixgbe_poll_off(struct net_device *dev); +static unsigned int ixgbe_mq_get_tx_bytes(struct net_device *dev, unsigned int queue_num); +//static void ixgbe_watchdog_1(struct ixgbe_adapter *adapter); + +static int ixgbe_tx_pqueue(struct net_device *dev, struct sk_buff *skb); +static int ixgbe_tx_start(struct net_device *dev); +static int ixgbe_rx_refill(struct net_device *dev, struct sk_buff **); +static int ixgbe_tx_eob(struct net_device *dev); +static struct sk_buff *ixgbe_tx_clean(struct net_device *dev); +static struct sk_buff *ixgbe_rx_poll(struct net_device *dev, int *want); + + static void ixgbe_release_hw_control(struct ixgbe_adapter *adapter) { u32 ctrl_ext; @@ -2021,9 +2042,13 @@ **/ static void ixgbe_configure_tx(struct ixgbe_adapter *adapter) { - u64 tdba, tdwba; + u64 tdba; struct ixgbe_hw *hw = &adapter->hw; - u32 i, j, tdlen, txctrl; + u32 i, j, tdlen; + /* + u64 tdwba; + u32 txctrl; + */ /* Setup the HW Tx Head and Tail descriptor pointers */ for (i = 0; i < adapter->num_tx_queues; i++) { @@ -2034,11 +2059,14 @@ IXGBE_WRITE_REG(hw, IXGBE_TDBAL(j), (tdba & DMA_32BIT_MASK)); IXGBE_WRITE_REG(hw, IXGBE_TDBAH(j), (tdba >> 32)); + /* Click needs these to be disabled for now */ + /* tdwba = ring->dma + (ring->count * sizeof(union ixgbe_adv_tx_desc)); tdwba |= IXGBE_TDWBAL_HEAD_WB_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(j), tdwba & DMA_32BIT_MASK); IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(j), (tdwba >> 32)); + */ IXGBE_WRITE_REG(hw, IXGBE_TDLEN(j), tdlen); IXGBE_WRITE_REG(hw, IXGBE_TDH(j), 0); IXGBE_WRITE_REG(hw, IXGBE_TDT(j), 0); @@ -2047,9 +2075,12 @@ /* Disable Tx Head Writeback RO bit, since this hoses * bookkeeping if things aren't delivered in order. */ + /* click needs for these to be disabled */ + /* txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(j)); txctrl &= ~IXGBE_DCA_TXCTRL_TX_WB_RO_EN; IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(j), txctrl); + */ } } @@ -2738,8 +2769,9 @@ for (i = 0; i < adapter->num_tx_queues; i++) { j = adapter->tx_ring[i].reg_idx; txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(j)); - /* enable WTHRESH=8 descriptors, to encourage burst writeback */ - txdctl |= (8 << 16); + //txdctl |= (8 << 16); + txdctl |= 0x0010; + txdctl |= (16 << 16); txdctl |= IXGBE_TXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(j), txdctl); } @@ -2747,10 +2779,8 @@ for (i = 0; i < adapter->num_rx_queues; i++) { j = adapter->rx_ring[i].reg_idx; rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(j)); - /* enable PTHRESH=32 descriptors (half the internal cache) - * and HTHRESH=0 descriptors (to minimize latency on fetch), - * this also removes a pesky rx_no_buffer_count increment */ - rxdctl |= 0x0020; + rxdctl |= 0x0010; + rxdctl |= (16<<16); rxdctl |= IXGBE_RXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(j), rxdctl); } @@ -3140,6 +3170,9 @@ #else ntq = 1; #endif +#ifdef CLICK_ENABLED + ntq = rss_i; +#endif break; case 0: default: @@ -3172,6 +3205,11 @@ } adapter->num_rx_queues = nrq; + + /* set the number tx queues only if the users didn't enter then as + * module load options + */ + if (adapter->num_tx_queues == 0) adapter->num_tx_queues = ntq; } @@ -4266,6 +4304,28 @@ adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC); /* Fill out the OS statistics structure */ +#ifdef CLICK_ENABLED +#if 0 + for (i = 0; i < adapter->num_tx_queues; i++) { + adapter->net_stats.tx_bytes +=adapter->tx_ring[i].interim_count_bytes; + adapter->net_stats.tx_packets +=adapter->tx_ring[i].interim_count_packets; + /*reset the counters */ + adapter->tx_ring[i].interim_count_bytes = 0; + adapter->tx_ring[i].interim_count_packets = 0; + } + for (i = 0; i < adapter->num_rx_queues; i++) { + adapter->net_stats.rx_bytes +=adapter->rx_ring[i].interim_count_bytes; + adapter->net_stats.rx_packets +=adapter->rx_ring[i].interim_count_packets; + /*reset the counters */ + adapter->rx_ring[i].interim_count_bytes = 0; + adapter->rx_ring[i].interim_count_packets = 0; + } +#endif + adapter->net_stats.rx_packets = adapter->stats.gprc; + adapter->net_stats.tx_packets = adapter->stats.gptc; + adapter->net_stats.rx_bytes = adapter->stats.gorc; + adapter->net_stats.tx_bytes = adapter->stats.gotc; +#endif adapter->net_stats.multicast = adapter->stats.mprc; /* Rx Errors */ @@ -4286,6 +4346,9 @@ struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)data; struct ixgbe_hw *hw = &adapter->hw; + if (adapter->netdev->polling) + adapter->do_poll_watchdog = 1; + else { /* Do the watchdog outside of interrupt context due to the lovely * delays that some of the newer hardware requires */ if (!test_bit(__IXGBE_DOWN, &adapter->state)) { @@ -4307,6 +4370,7 @@ } schedule_work(&adapter->watchdog_task); + } } /** @@ -4377,6 +4442,13 @@ ixgbe_update_stats(adapter); adapter->flags &= ~IXGBE_FLAG_IN_WATCHDOG_TASK; + + if (adapter->netdev->polling) { + if (!test_bit(__IXGBE_DOWN, &adapter->state)) + /* Reset the timer */ + mod_timer(&adapter->watchdog_timer, + round_jiffies(jiffies + 2 * HZ)); + } } static int ixgbe_tso(struct ixgbe_adapter *adapter, struct ixgbe_ring *tx_ring, @@ -5048,6 +5120,26 @@ goto err_sw_init; } +/* maz Click - polling extensions */ + netdev->polling = 0; + netdev->rx_poll = ixgbe_rx_poll; + netdev->rx_refill = ixgbe_rx_refill; + netdev->tx_queue = ixgbe_tx_pqueue; + netdev->tx_eob = ixgbe_tx_eob; + netdev->tx_start = ixgbe_tx_start; + netdev->tx_clean = ixgbe_tx_clean; + netdev->poll_off = ixgbe_poll_off; + netdev->poll_on = ixgbe_poll_on; + + netdev->mq_rx_poll = ixgbe_mq_rx_poll; + netdev->mq_rx_refill = ixgbe_mq_rx_refill; + netdev->mq_tx_queue = ixgbe_mq_tx_pqueue; + netdev->mq_tx_eob = ixgbe_mq_tx_eob; + netdev->mq_tx_start = ixgbe_mq_tx_start; + netdev->mq_tx_clean = ixgbe_mq_tx_clean; + netdev->mq_get_tx_bytes = ixgbe_mq_get_tx_bytes; +/* Click - polling */ + /* check_options must be called before setup_link_speed to set up * hw->fc completely */ @@ -5459,6 +5551,1002 @@ return ret_val ? NOTIFY_BAD : NOTIFY_DONE; } #endif /* CONFIG_DCA or CONFIG_DCA_MODULE */ + + +/*******************************************/ +/***** click polling extension *************/ +/*******************************************/ +static unsigned int ixgbe_mq_get_tx_bytes (struct net_device *dev, unsigned int queue_num) +{ + + struct ixgbe_adapter *adapter = dev->priv; + + //adapter = (ixgbe_adapter*) netdev_priv(dev); + return adapter->tx_ring[queue_num].stats.bytes; +} + +static int ixgbe_poll_on (struct net_device *dev) { + + struct ixgbe_adapter *adapter = dev->priv; + unsigned long flags; + unsigned int i; + u32 txdctl, rxdctl, mhadd; + struct ixgbe_hw *hw = &adapter->hw; + int max_frame = dev->mtu + ETH_HLEN + ETH_FCS_LEN; + u32 frctl, rmcs; + + if (!dev->polling) { + printk("ixgbe_poll_on\n"); + DPRINTK(LINK, INFO, "rx_ring->head=%d, tail=%d\n", adapter->rx_ring->head, adapter->rx_ring->tail); + DPRINTK(LINK, INFO, "rx_ring->rx_buf_len=%d, tx_ring->count=%d\n", adapter->rx_ring->rx_buf_len, adapter->tx_ring->count); + local_irq_save(flags); + local_irq_disable(); + ixgbe_irq_disable(adapter); + + /* reset the card - start in a clean state */ + /* taken from ixgbe_down() */ + ixgbe_reset(adapter); + ixgbe_irq_disable(adapter); + ixgbe_clean_all_tx_rings(adapter); + ixgbe_clean_all_rx_rings(adapter); + DPRINTK(LINK, INFO, "rx_ring->next_to_clean=%d, next_to_use=%d\n", adapter->rx_ring->next_to_clean, adapter->rx_ring->next_to_use); + //DPRINTK(LINK, INFO, "adapter->rx_state=%d\n", adapter->rx_state); + DPRINTK(LINK, INFO, "rx_ring->rx_buf_len=%d, tx_ring->count=%d\n", adapter->rx_ring->rx_buf_len, adapter->tx_ring->count); + + /* taken from ixgbe_up() */ + //ixgbe_set_multi(dev); + ixgbe_set_rx_mode(dev); + ixgbe_configure_tx(adapter); + //click_ixgbe_configure_rx(adapter); + ixgbe_configure_rx(adapter); + if (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED) { + //adapter->flags &= ~IXGBE_FLAG_RX_PS_ENABLED; + printk(KERN_INFO "Disabling PS mode and flag in ixgbe adapter\n"); + } + //this will set all rx queues, might need to only set rx->ring[0] + for (i = 0; i < adapter->num_rx_queues; i++) + ixgbe_alloc_rx_buffers(adapter, &adapter->rx_ring[i], + (adapter->rx_ring[i].count - 1)); + + + //high mac address + mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); + + if (max_frame != (mhadd >> IXGBE_MHADD_MFS_SHIFT)) { + mhadd &= ~IXGBE_MHADD_MFS_MASK; + mhadd |= max_frame << IXGBE_MHADD_MFS_SHIFT; + + IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); + } + + for (i = 0; i < adapter->num_tx_queues; i++) { + txdctl = IXGBE_READ_REG(&adapter->hw, IXGBE_TXDCTL(i)); + txdctl |= 0x0010; + txdctl |= (16 << 16); + //txdctl |= (8 << 16); + txdctl |= IXGBE_TXDCTL_ENABLE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_TXDCTL(i), txdctl); + /*reset the counters */ + adapter->tx_ring[i].interim_count_bytes = 0; + adapter->tx_ring[i].interim_count_packets = 0; + } + + for (i = 0; i < adapter->num_rx_queues; i++) { + rxdctl = IXGBE_READ_REG(&adapter->hw, IXGBE_RXDCTL(i)); + rxdctl |= 0x0010; + rxdctl |= (16<<16); + //rxdctl |= (4<<16); + rxdctl |= IXGBE_RXDCTL_ENABLE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_RXDCTL(i), rxdctl); + /*reset the counters */ + adapter->rx_ring[i].interim_count_bytes = 0; + adapter->rx_ring[i].interim_count_packets = 0; + } + + /* enable all receives */ + rxdctl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); + rxdctl |= (IXGBE_RXCTRL_DMBYPS | IXGBE_RXCTRL_RXEN); + IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxdctl); + + clear_bit(__IXGBE_DOWN, &adapter->state); + IXGBE_WRITE_FLUSH(&adapter->hw); + + dev->polling = 2; + /* setting the initial state to empty polling */ + adapter->rx_state = IXGBE_RX_STATE_EMPTY_POLL; + + frctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); + rmcs = IXGBE_READ_REG(&adapter->hw, IXGBE_RMCS); +#define FLOW_RXB (frctl & IXGBE_FCTRL_RFCE) +#define FLOW_TX (rmcs & IXGBE_RMCS_TFCE_802_3X) + DPRINTK(LINK, INFO, "NIC Link is Up %s, " + "Flow Control: %s\n", + (10 == IXGBE_LINK_SPEED_10GB_FULL ? + "10 Gbps" : + (0 == IXGBE_LINK_SPEED_1GB_FULL ? + "1 Gpbs" : "unknown speed")), + ((FLOW_RX && FLOW_TX) ? "RX/TX" : + (FLOW_RX ? "RX" : + (FLOW_TX ? "TX" : "None" )))); + + + local_irq_restore(flags); + + DPRINTK(LINK, INFO, "adapter->num_rx_queues=%d, adapter->num_tx_queues=%d, rx_ring->count=%d, tx_ring->count=%d\n", + adapter->num_rx_queues, adapter->num_tx_queues, + adapter->rx_ring->count, adapter->tx_ring->count); + + }//end if + + return 0; +} + +static int ixgbe_poll_off(struct net_device *dev) +{ + struct ixgbe_adapter *adapter = dev->priv; + + if(dev->polling > 0){ + dev->polling = 0; + //ixgbe_down(adapter); + //ixgbe_up(adapter); + ixgbe_irq_enable(adapter); + printk("ixgbe_poll_off\n"); + } + + return 0; +} + +#if 0 +static void click_ixgbe_setup_rctl(struct ixgbe_adapter *adapter) { + + struct net_device *dev = adapter->netdev; + u32 txdctl, rxdctl, mhadd, fctrl; + struct ixgbe_hw *hw = &adapter->hw; + unsigned int i; + int max_frame = dev->mtu + ETH_HLEN + ETH_FCS_LEN; + + + mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); + + if (max_frame != (mhadd >> IXGBE_MHADD_MFS_SHIFT)) { + mhadd &= ~IXGBE_MHADD_MFS_MASK; + mhadd |= max_frame << IXGBE_MHADD_MFS_SHIFT; + + IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); + } + + /*setup promisc mode for now */ + fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); + fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); + + + for (i = 0; i < adapter->num_tx_queues; i++) { + txdctl = IXGBE_READ_REG(&adapter->hw, IXGBE_TXDCTL(i)); + txdctl |= IXGBE_TXDCTL_ENABLE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_TXDCTL(i), txdctl); + } + + for (i = 0; i < adapter->num_rx_queues; i++) { + rxdctl = IXGBE_READ_REG(&adapter->hw, IXGBE_RXDCTL(i)); + rxdctl |= IXGBE_RXDCTL_ENABLE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_RXDCTL(i), rxdctl); + } + + /* enable all receives */ + rxdctl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); + rxdctl |= (IXGBE_RXCTRL_DMBYPS | IXGBE_RXCTRL_RXEN); + IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxdctl); + + clear_bit(__IXGBE_DOWN, &adapter->state); + IXGBE_WRITE_FLUSH(&adapter->hw); +} +#endif + + +static struct sk_buff * ixgbe_mq_rx_poll(struct net_device *dev, unsigned int queue_num, int *want) +{ + struct ixgbe_adapter *adapter = dev->priv; + struct pci_dev *pdev = adapter->pdev; + union ixgbe_adv_rx_desc *rx_desc; //adv desc used instead of the leagacy_rx_desc + struct ixgbe_ring *rx_ring = &adapter->rx_ring[queue_num]; + struct sk_buff *skb_head = NULL, **skb; + int got, next; + //static int count_empty_polls = 0; + u32 len, staterr; + + skb = &skb_head; + + for( got = 0, next = (rx_ring->next_to_clean + 1) % rx_ring->count; + got < *want && next != rx_ring->next_to_use; + got++, rx_ring->next_to_clean = next, + next = (rx_ring->next_to_clean + 1) % rx_ring->count) { + + int i = rx_ring->next_to_clean; + rx_desc = IXGBE_RX_DESC_ADV(*rx_ring, i); + staterr = le32_to_cpu(rx_desc->wb.upper.status_error); //adv rx_desc is used + if(!(staterr & IXGBE_RXD_STAT_DD)) { + //printk(KERN_INFO "rx_poll got nothing\n"); + break; + } + //printk(KERN_INFO "rx_poll got dma=%p, i=%d\n", (void *)rx_ring->rx_buffer_info[i].dma, i); + + //prefetch(rx_ring->rx_buffer_info[i].skb->data - NET_IP_ALIGN); +#if 0 + pci_unmap_single(pdev, rx_ring->rx_buffer_info[i].dma, + rx_ring->rx_buf_len + NET_IP_ALIGN, + PCI_DMA_FROMDEVICE); +#else + pci_unmap_page(pdev, rx_ring->rx_buffer_info[i].dma, + PAGE_SIZE, PCI_DMA_FROMDEVICE); +#endif + + //printk(KERN_INFO "rx_poll got skb=%p, skb->data=%p\n", rx_ring->rx_buffer_info[i].skb, rx_ring->rx_buffer_info[i].skb->data); + *skb = rx_ring->rx_buffer_info[i].skb; + rx_ring->rx_buffer_info[i].skb = NULL; + + if(!(staterr & IXGBE_RXD_STAT_EOP) || + (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK)) { + printk(KERN_INFO "IXGBE_RXD_STAT_EOP or IXGBE_RXDADV_ERR_FRAME_ERR_MASK\n"); + rx_desc->wb.upper.status_error = 0; + dev_kfree_skb(*skb); + *skb = NULL; + got--; + continue; + } + + len = le16_to_cpu(rx_desc->wb.upper.length); + rx_desc->wb.upper.status_error = 0; + + //printk(KERN_INFO "length of polled pkt=%d\n", len); + //skb_put(*skb, len - ETH_FCS_LEN); + + skb_put(*skb, len); + //ixgbe_rx_checksum(adapter, staterr, *skb); + skb_pull(*skb, dev->hard_header_len); + + /* update rx ring stats for each queue */ + rx_ring->stats.packets++; + rx_ring->stats.bytes += len; + rx_ring->total_packets++; + rx_ring->total_bytes += len; + rx_ring->interim_count_bytes += len; + rx_ring->interim_count_packets++; + + skb = &((*skb)->next); + *skb = NULL; + } //end for + + *want = got; +#if 0 + if (got == 0) { + //mdelay(1); //busy wait for 1 milsec + count_empty_polls++; + if (count_empty_polls % 1000000 == 0) { + struct timeval tt; + do_gettimeofday(&tt); + printk(KERN_INFO "count_empty_polls = %d %u.%06u\n", count_empty_polls, tt.tv_sec, tt. \ + tv_usec); + } + } + + /* + * Receive Lockup detection and recovery for ixgbe + */ + if (got) { + adapter->rx_state = IXGBE_RX_STATE_NORMAL; + adapter->rx_normal_jiffies = jiffies + HZ; + } else { + int rdfh; + int rdft; + switch (adapter->rx_state) { + case IXGBE_RX_STATE_NORMAL: + if (time_before(jiffies, adapter->rx_normal_jiffies)) + break; + adapter->rx_state = IXGBE_RX_STATE_QUIET; + adapter->rx_quiet_jiffies = jiffies + HZ; + /* getting rdh and rdt of the ring 0 for now */ + adapter->prev_rdfh = IXGBE_READ_REG(&adapter->hw, IXGBE_RDH(0)); + adapter->prev_rdft = IXGBE_READ_REG(&adapter->hw, IXGBE_RDT(0)); + break; + case IXGBE_RX_STATE_QUIET: + rdfh = IXGBE_READ_REG(&adapter->hw, IXGBE_RDH(0)); + rdft = IXGBE_READ_REG(&adapter->hw, IXGBE_RDT(0)); + if (adapter->prev_rdfh != rdfh || + adapter->prev_rdft != rdft || + adapter->prev_rdfh == adapter->prev_rdft) { + adapter->prev_rdfh = rdfh; + adapter->prev_rdft = rdft; + adapter->rx_quiet_jiffies = jiffies + HZ; + break; + } + if (time_before(jiffies, adapter->rx_quiet_jiffies)) + break; + /* Fall into the lockup case */ + case IXGBE_RX_STATE_LOCKUP: + /* Receive lockup detected: perform a recovery */ + adapter->rx_lockup_recoveries++; + /* taken from ixgbe_down() */ + ixgbe_reset(adapter); + ixgbe_irq_disable(adapter); + ixgbe_clean_all_tx_rings(adapter); + ixgbe_clean_all_rx_rings(adapter); + /* taken from ixgbe_up() */ + ixgbe_set_multi(dev); + ixgbe_configure_tx(adapter); + ixgbe_configure_rx(adapter); + for (i = 0; i < adapter->num_rx_queues; i++) + ixgbe_alloc_rx_buffers(adapter, &adapter->rx_ring[i], + (adapter->rx_ring[i].count - 1)); + click_ixgbe_setup_rctl(adapter); + + /* reset the lockup detection */ + adapter->rx_state = IXGBE_RX_STATE_EMPTY_POLL; + adapter->rx_normal_jiffies = jiffies + HZ; + break; + } + } +#endif + + return skb_head; +} + +static int ixgbe_mq_rx_refill(struct net_device *dev, unsigned int queue_num, struct sk_buff **skbs) +{ + struct ixgbe_adapter *adapter = dev->priv; + struct ixgbe_ring *rx_ring = &adapter->rx_ring[queue_num]; //get ring number + struct pci_dev *pdev = adapter->pdev; + union ixgbe_adv_rx_desc *rx_desc; + struct sk_buff *skb; + int next; + + /* + * Update statistics counters, check link. + * do_poll_watchdog is set by the timer interrupt ixgbe_watchdog(), + * but we don't want to do the work in an interrupt (since it may + * happen while polling code is active), so defer it to here. + */ + + if (queue_num == 0) + if(adapter->do_poll_watchdog) { + adapter->do_poll_watchdog = 0; + ixgbe_watchdog_task(&adapter->watchdog_task); + } +#if 0 + if (queue_num == 0) + if (!(jiffies % HZ)) { + ixgbe_watchdog_task(&adapter->watchdog_task); + } +#endif + + if (!netif_carrier_ok(dev)) + return 0; + + if(skbs == 0) + return IXGBE_DESC_UNUSED(rx_ring); + + for( next = (rx_ring->next_to_use + 1) % rx_ring->count; + next != rx_ring->next_to_clean; + rx_ring->next_to_use = next, + next = (rx_ring->next_to_use + 1) % rx_ring->count ) { + int i = rx_ring->next_to_use; + if(rx_ring->rx_buffer_info[i].skb != NULL) + break; + + if(!(skb = *skbs)) + break; + *skbs = skb->next; + skb->next = NULL; + skb->dev = dev; + + rx_ring->rx_buffer_info[i].skb = skb; + //no length in IXGBE buffer_info + //rx_ring->rx_buffer_info[i].length = adapter->rx_buffer_len; + //we may have to get rid of NET_IP_ALIGN, and in poll too, in e1000_click + //this is not set for pci_map_single + rx_ring->rx_buffer_info[i].dma = + pci_map_single(pdev, + skb->data, + rx_ring->rx_buf_len + NET_IP_ALIGN, + //rx_ring->rx_buf_len, + PCI_DMA_FROMDEVICE); + + rx_desc = IXGBE_RX_DESC_ADV(*rx_ring, i); + rx_desc->read.pkt_addr = cpu_to_le64(rx_ring->rx_buffer_info[i].dma); + //rx_desc->wb.upper.status_error = 0; + //printk(KERN_INFO "rx_refill set dma=%p, i=%d\n", (void *)rx_ring->rx_buffer_info[i].dma, i); + + //wmb(); + //next and rx_ring->next_to_use will be the same here looking at one beyond what's availabe to hw + //writel(next, adapter->hw.hw_addr + rx_ring->tail); + + } + writel(rx_ring->next_to_use, adapter->hw.hw_addr + rx_ring->tail); + IXGBE_WRITE_FLUSH(&adapter->hw); + return IXGBE_DESC_UNUSED(rx_ring); +} + +static int +ixgbe_mq_tx_eob(struct net_device *dev, unsigned int queue_num) +{ + struct ixgbe_adapter *adapter = dev->priv; + struct ixgbe_ring *tx_ring = &adapter->tx_ring[queue_num]; + + writel(tx_ring->next_to_use, adapter->hw.hw_addr + tx_ring->tail); + IXGBE_WRITE_FLUSH(&adapter->hw); + return 0; +} + +static int +ixgbe_mq_tx_start(struct net_device *dev, unsigned int queue_num) +{ + /* printk("ixgbe_tx_start called\n"); */ + ixgbe_mq_tx_eob(dev, queue_num); + return 0; +} + +static int +ixgbe_mq_tx_pqueue(struct net_device *netdev, unsigned int queue_num, struct sk_buff *skb) +{ + /* + * This function is a streamlined version of + * return ixgbe_xmit_frame(skb, netdev); + */ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_ring *tx_ring = &adapter->tx_ring[queue_num]; + unsigned int len = skb->len; + //unsigned int txd_needed; + unsigned int tx_flags = 0; + //unsigned long flags = 0; + + union ixgbe_adv_tx_desc *tx_desc = NULL; + struct ixgbe_tx_buffer *tx_buffer_info; + u32 olinfo_status = 0, cmd_type_len = 0, txd_cmd, paylen; + u8 hdr_len; + unsigned int i, offset; + + if (skb->len <= 0) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + if(!netif_carrier_ok(netdev)) { + netif_stop_queue(netdev); + return -1; + } + + //txd_needed = TXD_USE_COUNT(skb->len); + + + //spin_lock_irqsave(&tx_ring->tx_lock, flags); + //might +1 would be enough + //if (IXGBE_DESC_UNUSED(tx_ring) < (txd_needed + 1)) { + if (unlikely(IXGBE_DESC_UNUSED(tx_ring) < 1)) { + adapter->tx_busy++; + adapter->net_stats.tx_dropped++; + //netif_stop_queue(netdev); + //spin_unlock_irqrestore(&tx_ring->tx_lock, flags); + return -1; + } + //spin_unlock_irqrestore(&tx_ring->tx_lock, flags); + + + /*this part is from tx_queue */ + txd_cmd = IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS | IXGBE_TXD_CMD_IFCS; + + cmd_type_len |= IXGBE_ADVTXD_DTYP_DATA; + + cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT; + + if (skb->protocol == ntohs(ETH_P_IP)) + tx_flags |= IXGBE_TX_FLAGS_IPV4; + + if (ixgbe_tx_csum(adapter, tx_ring, skb, tx_flags)) { + olinfo_status |= IXGBE_TXD_POPTS_TXSM << IXGBE_ADVTXD_POPTS_SHIFT; + } + hdr_len = 0; + paylen = skb->len; + olinfo_status |= ((paylen - hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT); + + i = tx_ring->next_to_use; + tx_buffer_info = &tx_ring->tx_buffer_info[i]; + tx_desc = IXGBE_TX_DESC_ADV(*tx_ring, i); + tx_buffer_info->length = len; + offset = 0; + tx_buffer_info->dma = + pci_map_page(adapter->pdev, virt_to_page(skb->data + offset), + (unsigned long) (skb->data + offset) & ~PAGE_MASK, len, + PCI_DMA_TODEVICE); + + tx_buffer_info->time_stamp = jiffies; + + tx_desc->read.buffer_addr = cpu_to_le64(tx_buffer_info->dma); + tx_desc->read.cmd_type_len = + cpu_to_le32(cmd_type_len | tx_buffer_info->length); + tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); + tx_desc->read.cmd_type_len |= cpu_to_le32(txd_cmd); + tx_ring->tx_buffer_info[i].skb = skb; + + i++; + if (i >= tx_ring->count) + i = 0; + + /* Move the HW Tx Tail Pointer */ + tx_ring->next_to_use = i; + + netdev->trans_start = jiffies; + + return 0; +} //end tx_pqueue + +#define IXGBE_TX_QUEUE_WAKE 16 + +static struct sk_buff * +ixgbe_mq_tx_clean(struct net_device *netdev, unsigned int queue_num) +{ + /* + * This function is a streamlined version of + * return ixgbe_clean_tx_irq(adapter, 1); + */ + + struct ixgbe_adapter *adapter = netdev->priv; + struct pci_dev *pdev = adapter->pdev; + //struct ixgbe_tx_buffer *tx_buffer_info; + unsigned int i; + union ixgbe_adv_tx_desc *tx_desc; + struct sk_buff *skb_head, *skb_last; + struct ixgbe_ring *tx_ring = &adapter->tx_ring[queue_num]; + + skb_head = skb_last = 0; + + i = tx_ring->next_to_clean; + tx_desc = IXGBE_TX_DESC_ADV(*tx_ring, i); + while (tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)) { + if(tx_ring->tx_buffer_info[i].dma != 0) { + pci_unmap_page(pdev, tx_ring->tx_buffer_info[i].dma, + tx_ring->tx_buffer_info[i].length, + PCI_DMA_TODEVICE); + tx_ring->tx_buffer_info[i].dma = 0; + } + + if(tx_ring->tx_buffer_info[i].skb != NULL) { + struct sk_buff *skb = tx_ring->tx_buffer_info[i].skb; + if (skb_head == 0) { + skb_head = skb; + skb_last = skb; + skb_last->next = NULL; + } else { + skb_last->next = skb; + skb->next = NULL; + skb_last = skb; + } + tx_ring->tx_buffer_info[i].skb = NULL; + } + + /* update stats for this tx ring */ + tx_ring->stats.bytes += tx_ring->tx_buffer_info[i].length; + tx_ring->stats.packets++; + tx_ring->total_bytes += tx_ring->tx_buffer_info[i].length; + tx_ring->total_packets++; + tx_ring->interim_count_bytes += tx_ring->tx_buffer_info[i].length; + tx_ring->interim_count_packets++; + + i = (i + 1) % tx_ring->count; + + tx_desc->wb.status = 0; + tx_desc = IXGBE_TX_DESC_ADV(*tx_ring, i); + + }//end while + + tx_ring->next_to_clean = i; + + //#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) + + //#define IXGBE_TX_QUEUE_WAKE 16 + if (skb_head && netif_carrier_ok(netdev) && + (IXGBE_DESC_UNUSED(tx_ring) >= IXGBE_TX_QUEUE_WAKE)) { + //(IXGBE_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD)) { + /* Make sure that anybody stopping the queue after this + * sees the new next_to_clean. + */ + //smp_mb(); + /* Multi queue stuff should go here but we don't support it yet,look at ixgbe_clean_tx_irq */ + if (netif_queue_stopped(netdev) && + !test_bit(__IXGBE_DOWN, &adapter->state)) { + //netif_wake_queue(netdev); + netif_start_queue(netdev); + adapter->restart_queue++; + } + } + + return skb_head; +}//tx_clean + +static struct sk_buff * ixgbe_rx_poll(struct net_device *dev, int *want) +{ + struct ixgbe_adapter *adapter = dev->priv; + struct pci_dev *pdev = adapter->pdev; + union ixgbe_adv_rx_desc *rx_desc; //adv desc used instead of the leagacy_rx_desc + struct ixgbe_ring *rx_ring = adapter->rx_ring; + struct sk_buff *skb_head = NULL, **skb; + + int got, next; + //static int count_empty_polls = 0; + u32 len, staterr; + + skb = &skb_head; + + for( got = 0, next = (rx_ring->next_to_clean + 1) % rx_ring->count; + got < *want && next != rx_ring->next_to_use; + got++, rx_ring->next_to_clean = next, + next = (rx_ring->next_to_clean + 1) % rx_ring->count) { + + int i = rx_ring->next_to_clean; + rx_desc = IXGBE_RX_DESC_ADV(*rx_ring, i); + staterr = le32_to_cpu(rx_desc->wb.upper.status_error); //adv rx_desc is used + if(!(staterr & IXGBE_RXD_STAT_DD)) { + //printk(KERN_INFO "rx_poll got nothing\n"); + break; + } + //printk(KERN_INFO "rx_poll got dma=%p, i=%d\n", (void *)rx_ring->rx_buffer_info[i].dma, i); + + /* maz: there is no rx_buffer_info.length therefore must use alternative */ + //prefetch(rx_ring->rx_buffer_info[i].skb->data - NET_IP_ALIGN); + pci_unmap_single(pdev, rx_ring->rx_buffer_info[i].dma, + rx_ring->rx_buf_len + NET_IP_ALIGN, + //rx_ring->rx_buf_len, + PCI_DMA_FROMDEVICE); + + //printk(KERN_INFO "rx_poll got skb=%p, skb->data=%p\n", rx_ring->rx_buffer_info[i].skb, rx_ring->rx_buffer_info[i].skb->data); + *skb = rx_ring->rx_buffer_info[i].skb; + rx_ring->rx_buffer_info[i].skb = NULL; + + if(!(staterr & IXGBE_RXD_STAT_EOP) || + (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK)) { + printk(KERN_INFO "IXGBE_RXD_STAT_EOP or IXGBE_RXDADV_ERR_FRAME_ERR_MASK\n"); + rx_desc->wb.upper.status_error = 0; + dev_kfree_skb(*skb); + *skb = NULL; + got--; + continue; + } + + len = le16_to_cpu(rx_desc->wb.upper.length); + rx_desc->wb.upper.status_error = 0; + + printk(KERN_INFO "length of polled pkt=%d\n", len); + //skb_put(*skb, len - ETH_FCS_LEN); + + skb_put(*skb, len); + ixgbe_rx_checksum(adapter, staterr, *skb); + skb_pull(*skb, dev->hard_header_len); + + skb = &((*skb)->next); + *skb = NULL; + } //end for + + *want = got; + +#if 0 + if (got == 0) { + //mdelay(1); //busy wait for 1 milsec + count_empty_polls++; + if (count_empty_polls % 1000000 == 0) { + struct timeval tt; + do_gettimeofday(&tt); + printk(KERN_INFO "count_empty_polls = %d %u.%06u\n", count_empty_polls, tt.tv_sec, tt. \ + tv_usec); + } + } + /* + * Receive Lockup detection and recovery for ixgbe + */ + + if (got) { + adapter->rx_state = IXGBE_RX_STATE_NORMAL; + adapter->rx_normal_jiffies = jiffies + HZ; + } else { + int rdfh; + int rdft; + switch (adapter->rx_state) { + case IXGBE_RX_STATE_NORMAL: + if (time_before(jiffies, adapter->rx_normal_jiffies)) + break; + adapter->rx_state = IXGBE_RX_STATE_QUIET; + adapter->rx_quiet_jiffies = jiffies + HZ; + /* getting rdh and rdt of the ring 0 for now */ + adapter->prev_rdfh = IXGBE_READ_REG(&adapter->hw, IXGBE_RDH(0)); + adapter->prev_rdft = IXGBE_READ_REG(&adapter->hw, IXGBE_RDT(0)); + break; + case IXGBE_RX_STATE_QUIET: + rdfh = IXGBE_READ_REG(&adapter->hw, IXGBE_RDH(0)); + rdft = IXGBE_READ_REG(&adapter->hw, IXGBE_RDT(0)); + if (adapter->prev_rdfh != rdfh || + adapter->prev_rdft != rdft || + adapter->prev_rdfh == adapter->prev_rdft) { + adapter->prev_rdfh = rdfh; + adapter->prev_rdft = rdft; + adapter->rx_quiet_jiffies = jiffies + HZ; + break; + } + if (time_before(jiffies, adapter->rx_quiet_jiffies)) + break; + /* Fall into the lockup case */ + case IXGBE_RX_STATE_LOCKUP: + /* Receive lockup detected: perform a recovery */ + adapter->rx_lockup_recoveries++; + /* taken from ixgbe_down() */ + ixgbe_reset(adapter); + ixgbe_irq_disable(adapter); + ixgbe_clean_all_tx_rings(adapter); + ixgbe_clean_all_rx_rings(adapter); + /* taken from ixgbe_up() */ + ixgbe_set_multi(dev); + ixgbe_configure_tx(adapter); + ixgbe_configure_rx(adapter); + for (i = 0; i < adapter->num_rx_queues; i++) + ixgbe_alloc_rx_buffers(adapter, &adapter->rx_ring[i], + (adapter->rx_ring[i].count - 1)); + click_ixgbe_setup_rctl(adapter); + + /* reset the lockup detection */ + adapter->rx_state = IXGBE_RX_STATE_EMPTY_POLL; + adapter->rx_normal_jiffies = jiffies + HZ; + break; + } + } +#endif + + return skb_head; +} + +static int ixgbe_rx_refill(struct net_device *dev, struct sk_buff **skbs) +{ + struct ixgbe_adapter *adapter = dev->priv; + struct ixgbe_ring *rx_ring = adapter->rx_ring; //get ring 0 + struct pci_dev *pdev = adapter->pdev; + union ixgbe_adv_rx_desc *rx_desc; + struct sk_buff *skb; + int next; + + /* + * Update statistics counters, check link. + * do_poll_watchdog is set by the timer interrupt ixgbe_watchdog(), + * but we don't want to do the work in an interrupt (since it may + * happen while polling code is active), so defer it to here. + */ +#if 0 + if(adapter->do_poll_watchdog){ + adapter->do_poll_watchdog = 0; + ixgbe_watchdog_1(adapter); + } +#endif + if (!(jiffies % HZ)) { + ixgbe_watchdog_task(&adapter->watchdog_task); + } + + + if (!netif_carrier_ok(dev)) + return 0; + + if(skbs == 0) + return IXGBE_DESC_UNUSED(rx_ring); + + for( next = (rx_ring->next_to_use + 1) % rx_ring->count; + next != rx_ring->next_to_clean; + rx_ring->next_to_use = next, + next = (rx_ring->next_to_use + 1) % rx_ring->count ) { + int i = rx_ring->next_to_use; + if(rx_ring->rx_buffer_info[i].skb != NULL) + break; + + if(!(skb = *skbs)) + break; + *skbs = skb->next; + skb->next = NULL; + skb->dev = dev; + + rx_ring->rx_buffer_info[i].skb = skb; + rx_ring->rx_buffer_info[i].dma = + pci_map_single(pdev, + skb->data, + rx_ring->rx_buf_len + NET_IP_ALIGN, + PCI_DMA_FROMDEVICE); + + rx_desc = IXGBE_RX_DESC_ADV(*rx_ring, i); + rx_desc->read.pkt_addr = cpu_to_le64(rx_ring->rx_buffer_info[i].dma); + //rx_desc->wb.upper.status_error = 0; + printk(KERN_INFO "rx_refill set dma=%p, i=%d\n", (void *)rx_ring->rx_buffer_info[i].dma, i); + + /* Intel documnetation says: "Software adds receive descriptors by + * writing the tail pointer with the index of the entry beyond the + * last valid descriptor."it's still so for ixgbe, according + *to ixgbe_rx_alloc_buffers and doc p3-195 for ixgbe. this step could be optimized + *by doing only once after all refilling been done on the ring */ + //wmb(); + //writel(next, adapter->hw.hw_addr + rx_ring->tail); + } + writel(rx_ring->next_to_use, adapter->hw.hw_addr + rx_ring->tail); + + return IXGBE_DESC_UNUSED(rx_ring); +} + +static int +ixgbe_tx_eob(struct net_device *dev) +{ + struct ixgbe_adapter *adapter = dev->priv; + writel(adapter->tx_ring->next_to_use, adapter->hw.hw_addr + adapter->tx_ring->tail); + return 0; +} + +static int +ixgbe_tx_start(struct net_device *dev) +{ + /* printk("ixgbe_tx_start called\n"); */ + ixgbe_tx_eob(dev); + return 0; +} + +static int +ixgbe_tx_pqueue(struct net_device *netdev, struct sk_buff *skb) +{ + /* + * This function is a streamlined version of + * return ixgbe_xmit_frame(skb, netdev); + */ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_ring *tx_ring = adapter->tx_ring; + unsigned int txd_needed, len = skb->len; + unsigned int tx_flags = 0; + //unsigned long flags = 0; + u8 hdr_len; + u32 paylen, txd_cmd; + + union ixgbe_adv_tx_desc *tx_desc = NULL; + struct ixgbe_tx_buffer *tx_buffer_info; + u32 olinfo_status = 0, cmd_type_len = 0; + unsigned int i, offset; + + if (skb->len <= 0) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + if(!netif_carrier_ok(netdev)) { + netif_stop_queue(netdev); + return -1; + } + + txd_needed = TXD_USE_COUNT(skb->len); + + //spin_lock_irqsave(&tx_ring->tx_lock, flags); + //might +1 would be enough + if (IXGBE_DESC_UNUSED(tx_ring) < (txd_needed)) { + adapter->tx_busy++; + adapter->net_stats.tx_dropped++; + netif_stop_queue(netdev); + //spin_unlock_irqrestore(&tx_ring->tx_lock, flags); + return -1; + } + //spin_unlock_irqrestore(&tx_ring->tx_lock, flags); + + /*this part is from tx_queue */ + txd_cmd = IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS | IXGBE_TXD_CMD_IFCS; + + cmd_type_len |= IXGBE_ADVTXD_DTYP_DATA; + + cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT; + + if (skb->protocol == ntohs(ETH_P_IP)) + tx_flags |= IXGBE_TX_FLAGS_IPV4; + + if (ixgbe_tx_csum(adapter, tx_ring, skb, tx_flags)) { + olinfo_status |= IXGBE_TXD_POPTS_TXSM << IXGBE_ADVTXD_POPTS_SHIFT; + } + hdr_len = 0; + paylen = skb->len; + olinfo_status |= ((paylen - hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT); + + i = tx_ring->next_to_use; + tx_buffer_info = &tx_ring->tx_buffer_info[i]; + tx_desc = IXGBE_TX_DESC_ADV(*tx_ring, i); + tx_buffer_info->length = len; + offset = 0; + tx_buffer_info->dma = + pci_map_page(adapter->pdev, virt_to_page(skb->data + offset), + (unsigned long) (skb->data + offset) & ~PAGE_MASK, len, + PCI_DMA_TODEVICE); + + tx_buffer_info->time_stamp = jiffies; + + tx_desc->read.buffer_addr = cpu_to_le64(tx_buffer_info->dma); + tx_desc->read.cmd_type_len = + cpu_to_le32(cmd_type_len | tx_buffer_info->length); + tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); + tx_desc->read.cmd_type_len |= cpu_to_le32(txd_cmd); + tx_ring->tx_buffer_info[i].skb = skb; + + i++; + if (i >= tx_ring->count) + i = 0; + + /* Move the HW Tx Tail Pointer */ + tx_ring->next_to_use = i; + + netdev->trans_start = jiffies; + + return 0; +} //end tx_pqueue + +static struct sk_buff * +ixgbe_tx_clean(struct net_device *netdev) +{ + /* + * This function is a streamlined version of + * return ixgbe_clean_tx_irq(adapter, 1); + */ + + struct ixgbe_adapter *adapter = netdev->priv; + struct pci_dev *pdev = adapter->pdev; + //struct ixgbe_tx_buffer *tx_buffer_info; + unsigned int i; + union ixgbe_adv_tx_desc *tx_desc; + struct sk_buff *skb_head, *skb_last; + struct ixgbe_ring *tx_ring = adapter->tx_ring; + + skb_head = skb_last = 0; + + i = tx_ring->next_to_clean; + tx_desc = IXGBE_TX_DESC_ADV(*tx_ring, i); + while (tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)) { + if(tx_ring->tx_buffer_info[i].dma != 0) { + pci_unmap_page(pdev, tx_ring->tx_buffer_info[i].dma, + tx_ring->tx_buffer_info[i].length, + PCI_DMA_TODEVICE); + tx_ring->tx_buffer_info[i].dma = 0; + } + + if(tx_ring->tx_buffer_info[i].skb != NULL) { + struct sk_buff *skb = tx_ring->tx_buffer_info[i].skb; + if (skb_head == 0) { + skb_head = skb; + skb_last = skb; + skb_last->next = NULL; + } else { + skb_last->next = skb; + skb->next = NULL; + skb_last = skb; + } + tx_ring->tx_buffer_info[i].skb = NULL; + } + + i = (i + 1) % tx_ring->count; + + tx_desc->wb.status = 0; + tx_desc = IXGBE_TX_DESC_ADV(*tx_ring, i); + + }//end while + + tx_ring->next_to_clean = i; + + //#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) + //#define IXGBE_TX_QUEUE_WAKE 16 + if (skb_head && netif_carrier_ok(netdev) && + (IXGBE_DESC_UNUSED(tx_ring) >= IXGBE_TX_QUEUE_WAKE)) { + //(IXGBE_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD)) { + /* Make sure that anybody stopping the queue after this + * sees the new next_to_clean. + */ + smp_mb(); + /* Multi queue stuff should go here but we don't support it yet,look at ixgbe_clean_tx_irq */ + if (netif_queue_stopped(netdev) && + !test_bit(__IXGBE_DOWN, &adapter->state)) { + netif_start_queue(netdev); + adapter->restart_queue++; + } + } + + return skb_head; +}//tx_clean + module_exit(ixgbe_exit_module); /* ixgbe_main.c */ diff -ruNbB ixgbe-1.3.56.5/src/ixgbe_param.c ixgbe-1.3.56.5patched/src/ixgbe_param.c --- ixgbe-1.3.56.5/src/ixgbe_param.c 2008-12-12 18:02:40.000000000 -0500 +++ ixgbe-1.3.56.5patched/src/ixgbe_param.c 2009-07-16 08:57:55.000000000 -0400 @@ -34,7 +34,7 @@ * maximum number of ports that the driver can manage. */ -#define IXGBE_MAX_NIC 8 +#define IXGBE_MAX_NIC 16 #define OPTION_UNSET -1 #define OPTION_DISABLED 0 @@ -131,6 +131,14 @@ IXGBE_PARAM(VMDQ, "Number of Virtual Machine Device Queues: 0/1 = disable (default), 2-16 enable"); +/* Tx Queues count + * + * Valid Range: 1-16 + * + * Default Value: 1 + */ +IXGBE_PARAM(TxQueues, "Number of TX queues"); + /* Interrupt Throttle Rate (interrupts/sec) * * Valid Range: 100-500000 (0=off) @@ -423,8 +431,12 @@ static struct ixgbe_option opt = { .type = range_option, .name = "Receive-Side Scaling (RSS)", + /* Disabling RSS for CLICK for initial module load .err = "using default.", .def = OPTION_ENABLED, + */ + .err = "defaulting to Disabled", + .def = OPTION_DISABLED, .arg = { .r = { .min = OPTION_DISABLED, .max = IXGBE_MAX_RSS_INDICES}} }; @@ -482,6 +494,34 @@ } } } + + /* added for click Tx Multi Queue */ + { /* Transmit Queues Count */ + struct ixgbe_option opt = { + .type = range_option, + .name = "Transmit Queues", + .err = "defaulting to Disabled", + .def = OPTION_DISABLED, + .arg = { .r = { .min = OPTION_DISABLED, + .max = IXGBE_MAX_RSS_INDICES}} + }; + +#ifdef module_param_array + if (num_TxQueues > bd) { +#endif + adapter->num_tx_queues = TxQueues[bd]; + ixgbe_validate_option(&adapter->num_tx_queues, &opt); + +#ifdef module_param_array + } else { + /* set num_tx_queues to 0 to indicate the user didn't set it as option + * then use default in set_num_queues fucntion in ixgbe_main + */ + adapter->num_tx_queues = opt.def; + } +#endif + } + { /* Virtual Machine Device Queues (VMDQ) */ static struct ixgbe_option opt = { .type = range_option,