summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKip Macy <kmacy@FreeBSD.org>2008-07-28 23:37:33 +0000
committerKip Macy <kmacy@FreeBSD.org>2008-07-28 23:37:33 +0000
commit6971fe8ddf2f0e170067a422e5f827724410bef9 (patch)
tree8fd6cc6e7404202400d3d5f758a8f3b65766b0f4
parent3ccd11b631cb9868dc43b7d5c815100a17bd8d9e (diff)
Notes
-rw-r--r--sys/conf/files1
-rw-r--r--sys/dev/cxgb/common/cxgb_ael1002.c453
-rw-r--r--sys/dev/cxgb/common/cxgb_common.h68
-rw-r--r--sys/dev/cxgb/common/cxgb_mc5.c18
-rw-r--r--sys/dev/cxgb/common/cxgb_mv88e1xxx.c4
-rw-r--r--sys/dev/cxgb/common/cxgb_t3_cpl.h32
-rw-r--r--sys/dev/cxgb/common/cxgb_t3_hw.c104
-rw-r--r--sys/dev/cxgb/common/cxgb_tn1010.c225
-rw-r--r--sys/dev/cxgb/common/cxgb_vsc8211.c7
-rw-r--r--sys/dev/cxgb/common/cxgb_xgmac.c55
-rw-r--r--sys/dev/cxgb/cxgb_adapter.h2
-rw-r--r--sys/dev/cxgb/cxgb_config.h1
-rw-r--r--sys/dev/cxgb/cxgb_main.c74
-rw-r--r--sys/dev/cxgb/cxgb_offload.c5
-rw-r--r--sys/dev/cxgb/cxgb_offload.h6
-rw-r--r--sys/dev/cxgb/cxgb_osdep.h21
-rw-r--r--sys/dev/cxgb/cxgb_sge.c64
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c294
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h168
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c1779
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h249
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c276
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c255
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c265
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c1418
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h330
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c219
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c1295
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h362
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c1052
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c382
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h59
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h68
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h684
-rw-r--r--sys/dev/cxgb/ulp/toecore/cxgb_toedev.h5
-rw-r--r--sys/dev/cxgb/ulp/toecore/toedev.c424
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c4456
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c1030
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_ddp.c738
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_defs.h90
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_l2t.c542
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_l2t.h161
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_listen.c338
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h181
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_tcp.h47
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.c95
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h155
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_toepcb.h119
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_tom.c1510
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_tom.h159
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c119
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_vm.c180
-rw-r--r--sys/dev/cxgb/ulp/tom/cxgb_vm.h40
-rw-r--r--sys/modules/cxgb/Makefile11
-rw-r--r--sys/modules/cxgb/cxgb/Makefile20
-rw-r--r--sys/modules/cxgb/cxgb_t3fw/Makefile1
-rw-r--r--sys/modules/cxgb/iw_cxgb/Makefile14
-rw-r--r--sys/modules/cxgb/toecore/Makefile8
-rw-r--r--sys/modules/cxgb/tom/Makefile14
59 files changed, 20553 insertions, 199 deletions
diff --git a/sys/conf/files b/sys/conf/files
index f71067411abd9..9672a92f10340 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -527,6 +527,7 @@ dev/cxgb/common/cxgb_ael1002.c optional cxgb pci
dev/cxgb/common/cxgb_mv88e1xxx.c optional cxgb pci
dev/cxgb/common/cxgb_xgmac.c optional cxgb pci
dev/cxgb/common/cxgb_t3_hw.c optional cxgb pci
+dev/cxgb/common/cxgb_tn1010.c optional cxgb pci
dev/cxgb/sys/uipc_mvec.c optional cxgb pci
dev/cxgb/sys/cxgb_support.c optional cxgb pci
dev/cxgb/cxgb_t3fw.c optional cxgb cxgb_t3fw
diff --git a/sys/dev/cxgb/common/cxgb_ael1002.c b/sys/dev/cxgb/common/cxgb_ael1002.c
index b288d5d60535a..a9c7fb2d86770 100644
--- a/sys/dev/cxgb/common/cxgb_ael1002.c
+++ b/sys/dev/cxgb/common/cxgb_ael1002.c
@@ -46,11 +46,32 @@ enum {
AEL1002_PWR_DOWN_LO = 0xc012,
AEL1002_XFI_EQL = 0xc015,
AEL1002_LB_EN = 0xc017,
+ AEL_OPT_SETTINGS = 0xc017,
+};
- LASI_CTRL = 0x9002,
- LASI_STAT = 0x9005
+struct reg_val {
+ unsigned short mmd_addr;
+ unsigned short reg_addr;
+ unsigned short clear_bits;
+ unsigned short set_bits;
};
+static int set_phy_regs(struct cphy *phy, const struct reg_val *rv)
+{
+ int err;
+
+ for (err = 0; rv->mmd_addr && !err; rv++) {
+ if (rv->clear_bits == 0xffff)
+ err = mdio_write(phy, rv->mmd_addr, rv->reg_addr,
+ rv->set_bits);
+ else
+ err = t3_mdio_change_bits(phy, rv->mmd_addr,
+ rv->reg_addr, rv->clear_bits,
+ rv->set_bits);
+ }
+ return err;
+}
+
static void ael100x_txon(struct cphy *phy)
{
int tx_on_gpio = phy->addr == 0 ? F_GPIO7_OUT_VAL : F_GPIO2_OUT_VAL;
@@ -158,33 +179,6 @@ static int ael1006_reset(struct cphy *phy, int wait)
return t3_phy_reset(phy, MDIO_DEV_PMA_PMD, wait);
}
-static int ael1006_intr_enable(struct cphy *phy)
-{
- return mdio_write(phy, MDIO_DEV_PMA_PMD, LASI_CTRL, 1);
-}
-
-static int ael1006_intr_disable(struct cphy *phy)
-{
- return mdio_write(phy, MDIO_DEV_PMA_PMD, LASI_CTRL, 0);
-}
-
-static int ael1006_intr_clear(struct cphy *phy)
-{
- u32 val;
-
- return mdio_read(phy, MDIO_DEV_PMA_PMD, LASI_STAT, &val);
-}
-
-static int ael1006_intr_handler(struct cphy *phy)
-{
- unsigned int status;
- int err = mdio_read(phy, MDIO_DEV_PMA_PMD, LASI_STAT, &status);
-
- if (err)
- return err;
- return (status & 1) ? cphy_cause_link_change : 0;
-}
-
static int ael1006_power_down(struct cphy *phy, int enable)
{
return t3_mdio_change_bits(phy, MDIO_DEV_PMA_PMD, MII_BMCR,
@@ -194,10 +188,10 @@ static int ael1006_power_down(struct cphy *phy, int enable)
#ifdef C99_NOT_SUPPORTED
static struct cphy_ops ael1006_ops = {
ael1006_reset,
- ael1006_intr_enable,
- ael1006_intr_disable,
- ael1006_intr_clear,
- ael1006_intr_handler,
+ t3_phy_lasi_intr_enable,
+ t3_phy_lasi_intr_disable,
+ t3_phy_lasi_intr_clear,
+ t3_phy_lasi_intr_handler,
NULL,
NULL,
NULL,
@@ -209,10 +203,10 @@ static struct cphy_ops ael1006_ops = {
#else
static struct cphy_ops ael1006_ops = {
.reset = ael1006_reset,
- .intr_enable = ael1006_intr_enable,
- .intr_disable = ael1006_intr_disable,
- .intr_clear = ael1006_intr_clear,
- .intr_handler = ael1006_intr_handler,
+ .intr_enable = t3_phy_lasi_intr_enable,
+ .intr_disable = t3_phy_lasi_intr_disable,
+ .intr_clear = t3_phy_lasi_intr_clear,
+ .intr_handler = t3_phy_lasi_intr_handler,
.get_link_status = ael100x_get_link_status,
.power_down = ael1006_power_down,
};
@@ -228,13 +222,382 @@ int t3_ael1006_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
return 0;
}
+static int ael2005_setup_sr_edc(struct cphy *phy)
+{
+ static u16 sr_edc[] = {
+ 0xcc00, 0x2ff4,
+ 0xcc01, 0x3cd4,
+ 0xcc02, 0x2015,
+ 0xcc03, 0x3105,
+ 0xcc04, 0x6524,
+ 0xcc05, 0x27ff,
+ 0xcc06, 0x300f,
+ 0xcc07, 0x2c8b,
+ 0xcc08, 0x300b,
+ 0xcc09, 0x4009,
+ 0xcc0a, 0x400e,
+ 0xcc0b, 0x2f72,
+ 0xcc0c, 0x3002,
+ 0xcc0d, 0x1002,
+ 0xcc0e, 0x2172,
+ 0xcc0f, 0x3012,
+ 0xcc10, 0x1002,
+ 0xcc11, 0x25d2,
+ 0xcc12, 0x3012,
+ 0xcc13, 0x1002,
+ 0xcc14, 0xd01e,
+ 0xcc15, 0x27d2,
+ 0xcc16, 0x3012,
+ 0xcc17, 0x1002,
+ 0xcc18, 0x2004,
+ 0xcc19, 0x3c84,
+ 0xcc1a, 0x6436,
+ 0xcc1b, 0x2007,
+ 0xcc1c, 0x3f87,
+ 0xcc1d, 0x8676,
+ 0xcc1e, 0x40b7,
+ 0xcc1f, 0xa746,
+ 0xcc20, 0x4047,
+ 0xcc21, 0x5673,
+ 0xcc22, 0x2982,
+ 0xcc23, 0x3002,
+ 0xcc24, 0x13d2,
+ 0xcc25, 0x8bbd,
+ 0xcc26, 0x2862,
+ 0xcc27, 0x3012,
+ 0xcc28, 0x1002,
+ 0xcc29, 0x2092,
+ 0xcc2a, 0x3012,
+ 0xcc2b, 0x1002,
+ 0xcc2c, 0x5cc3,
+ 0xcc2d, 0x314,
+ 0xcc2e, 0x2942,
+ 0xcc2f, 0x3002,
+ 0xcc30, 0x1002,
+ 0xcc31, 0xd019,
+ 0xcc32, 0x2032,
+ 0xcc33, 0x3012,
+ 0xcc34, 0x1002,
+ 0xcc35, 0x2a04,
+ 0xcc36, 0x3c74,
+ 0xcc37, 0x6435,
+ 0xcc38, 0x2fa4,
+ 0xcc39, 0x3cd4,
+ 0xcc3a, 0x6624,
+ 0xcc3b, 0x5563,
+ 0xcc3c, 0x2d42,
+ 0xcc3d, 0x3002,
+ 0xcc3e, 0x13d2,
+ 0xcc3f, 0x464d,
+ 0xcc40, 0x2862,
+ 0xcc41, 0x3012,
+ 0xcc42, 0x1002,
+ 0xcc43, 0x2032,
+ 0xcc44, 0x3012,
+ 0xcc45, 0x1002,
+ 0xcc46, 0x2fb4,
+ 0xcc47, 0x3cd4,
+ 0xcc48, 0x6624,
+ 0xcc49, 0x5563,
+ 0xcc4a, 0x2d42,
+ 0xcc4b, 0x3002,
+ 0xcc4c, 0x13d2,
+ 0xcc4d, 0x2ed2,
+ 0xcc4e, 0x3002,
+ 0xcc4f, 0x1002,
+ 0xcc50, 0x2fd2,
+ 0xcc51, 0x3002,
+ 0xcc52, 0x1002,
+ 0xcc53, 0x004,
+ 0xcc54, 0x2942,
+ 0xcc55, 0x3002,
+ 0xcc56, 0x1002,
+ 0xcc57, 0x2092,
+ 0xcc58, 0x3012,
+ 0xcc59, 0x1002,
+ 0xcc5a, 0x5cc3,
+ 0xcc5b, 0x317,
+ 0xcc5c, 0x2f72,
+ 0xcc5d, 0x3002,
+ 0xcc5e, 0x1002,
+ 0xcc5f, 0x2942,
+ 0xcc60, 0x3002,
+ 0xcc61, 0x1002,
+ 0xcc62, 0x22cd,
+ 0xcc63, 0x301d,
+ 0xcc64, 0x2862,
+ 0xcc65, 0x3012,
+ 0xcc66, 0x1002,
+ 0xcc67, 0x2ed2,
+ 0xcc68, 0x3002,
+ 0xcc69, 0x1002,
+ 0xcc6a, 0x2d72,
+ 0xcc6b, 0x3002,
+ 0xcc6c, 0x1002,
+ 0xcc6d, 0x628f,
+ 0xcc6e, 0x2112,
+ 0xcc6f, 0x3012,
+ 0xcc70, 0x1002,
+ 0xcc71, 0x5aa3,
+ 0xcc72, 0x2dc2,
+ 0xcc73, 0x3002,
+ 0xcc74, 0x1312,
+ 0xcc75, 0x6f72,
+ 0xcc76, 0x1002,
+ 0xcc77, 0x2807,
+ 0xcc78, 0x31a7,
+ 0xcc79, 0x20c4,
+ 0xcc7a, 0x3c24,
+ 0xcc7b, 0x6724,
+ 0xcc7c, 0x1002,
+ 0xcc7d, 0x2807,
+ 0xcc7e, 0x3187,
+ 0xcc7f, 0x20c4,
+ 0xcc80, 0x3c24,
+ 0xcc81, 0x6724,
+ 0xcc82, 0x1002,
+ 0xcc83, 0x2514,
+ 0xcc84, 0x3c64,
+ 0xcc85, 0x6436,
+ 0xcc86, 0xdff4,
+ 0xcc87, 0x6436,
+ 0xcc88, 0x1002,
+ 0xcc89, 0x40a4,
+ 0xcc8a, 0x643c,
+ 0xcc8b, 0x4016,
+ 0xcc8c, 0x8c6c,
+ 0xcc8d, 0x2b24,
+ 0xcc8e, 0x3c24,
+ 0xcc8f, 0x6435,
+ 0xcc90, 0x1002,
+ 0xcc91, 0x2b24,
+ 0xcc92, 0x3c24,
+ 0xcc93, 0x643a,
+ 0xcc94, 0x4025,
+ 0xcc95, 0x8a5a,
+ 0xcc96, 0x1002,
+ 0xcc97, 0x2731,
+ 0xcc98, 0x3011,
+ 0xcc99, 0x1001,
+ 0xcc9a, 0xc7a0,
+ 0xcc9b, 0x100,
+ 0xcc9c, 0xc502,
+ 0xcc9d, 0x53ac,
+ 0xcc9e, 0xc503,
+ 0xcc9f, 0xd5d5,
+ 0xcca0, 0xc600,
+ 0xcca1, 0x2a6d,
+ 0xcca2, 0xc601,
+ 0xcca3, 0x2a4c,
+ 0xcca4, 0xc602,
+ 0xcca5, 0x111,
+ 0xcca6, 0xc60c,
+ 0xcca7, 0x5900,
+ 0xcca8, 0xc710,
+ 0xcca9, 0x700,
+ 0xccaa, 0xc718,
+ 0xccab, 0x700,
+ 0xccac, 0xc720,
+ 0xccad, 0x4700,
+ 0xccae, 0xc801,
+ 0xccaf, 0x7f50,
+ 0xccb0, 0xc802,
+ 0xccb1, 0x7760,
+ 0xccb2, 0xc803,
+ 0xccb3, 0x7fce,
+ 0xccb4, 0xc804,
+ 0xccb5, 0x5700,
+ 0xccb6, 0xc805,
+ 0xccb7, 0x5f11,
+ 0xccb8, 0xc806,
+ 0xccb9, 0x4751,
+ 0xccba, 0xc807,
+ 0xccbb, 0x57e1,
+ 0xccbc, 0xc808,
+ 0xccbd, 0x2700,
+ 0xccbe, 0xc809,
+ 0xccbf, 0x000,
+ 0xccc0, 0xc821,
+ 0xccc1, 0x002,
+ 0xccc2, 0xc822,
+ 0xccc3, 0x014,
+ 0xccc4, 0xc832,
+ 0xccc5, 0x1186,
+ 0xccc6, 0xc847,
+ 0xccc7, 0x1e02,
+ 0xccc8, 0xc013,
+ 0xccc9, 0xf341,
+ 0xccca, 0xc01a,
+ 0xcccb, 0x446,
+ 0xcccc, 0xc024,
+ 0xcccd, 0x1000,
+ 0xccce, 0xc025,
+ 0xcccf, 0xa00,
+ 0xccd0, 0xc026,
+ 0xccd1, 0xc0c,
+ 0xccd2, 0xc027,
+ 0xccd3, 0xc0c,
+ 0xccd4, 0xc029,
+ 0xccd5, 0x0a0,
+ 0xccd6, 0xc030,
+ 0xccd7, 0xa00,
+ 0xccd8, 0xc03c,
+ 0xccd9, 0x01c,
+ 0xccda, 0xc005,
+ 0xccdb, 0x7a06,
+ 0xccdc, 0x000,
+ 0xccdd, 0x2731,
+ 0xccde, 0x3011,
+ 0xccdf, 0x1001,
+ 0xcce0, 0xc620,
+ 0xcce1, 0x000,
+ 0xcce2, 0xc621,
+ 0xcce3, 0x03f,
+ 0xcce4, 0xc622,
+ 0xcce5, 0x000,
+ 0xcce6, 0xc623,
+ 0xcce7, 0x000,
+ 0xcce8, 0xc624,
+ 0xcce9, 0x000,
+ 0xccea, 0xc625,
+ 0xcceb, 0x000,
+ 0xccec, 0xc627,
+ 0xcced, 0x000,
+ 0xccee, 0xc628,
+ 0xccef, 0x000,
+ 0xccf0, 0xc62c,
+ 0xccf1, 0x000,
+ 0xccf2, 0x000,
+ 0xccf3, 0x2806,
+ 0xccf4, 0x3cb6,
+ 0xccf5, 0xc161,
+ 0xccf6, 0x6134,
+ 0xccf7, 0x6135,
+ 0xccf8, 0x5443,
+ 0xccf9, 0x303,
+ 0xccfa, 0x6524,
+ 0xccfb, 0x00b,
+ 0xccfc, 0x1002,
+ 0xccfd, 0x2104,
+ 0xccfe, 0x3c24,
+ 0xccff, 0x2105,
+ 0xcd00, 0x3805,
+ 0xcd01, 0x6524,
+ 0xcd02, 0xdff4,
+ 0xcd03, 0x4005,
+ 0xcd04, 0x6524,
+ 0xcd05, 0x1002,
+ 0xcd06, 0x5dd3,
+ 0xcd07, 0x306,
+ 0xcd08, 0x2ff7,
+ 0xcd09, 0x38f7,
+ 0xcd0a, 0x60b7,
+ 0xcd0b, 0xdffd,
+ 0xcd0c, 0x00a,
+ 0xcd0d, 0x1002,
+ 0xcd0e, 0
+ };
+ int i, err;
+
+ for (err = i = 0; i < ARRAY_SIZE(sr_edc) && !err; i += 2)
+ err = mdio_write(phy, MDIO_DEV_PMA_PMD, sr_edc[i],
+ sr_edc[i + 1]);
+ return err;
+}
+
+static int ael2005_reset(struct cphy *phy, int wait)
+{
+ static struct reg_val regs0[] = {
+ { MDIO_DEV_PMA_PMD, 0xc001, 0, 1 << 5 },
+ { MDIO_DEV_PMA_PMD, 0xc017, 0, 1 << 5 },
+ { MDIO_DEV_PMA_PMD, 0xc013, 0xffff, 0xf341 },
+ { MDIO_DEV_PMA_PMD, 0xc210, 0xffff, 0x8000 },
+ { MDIO_DEV_PMA_PMD, 0xc210, 0xffff, 0x8100 },
+ { MDIO_DEV_PMA_PMD, 0xc210, 0xffff, 0x8000 },
+ { MDIO_DEV_PMA_PMD, 0xc210, 0xffff, 0 },
+ { 0, 0, 0, 0 }
+ };
+ static struct reg_val regs1[] = {
+ { MDIO_DEV_PMA_PMD, 0xc003, 0xffff, 0x181 },
+ { MDIO_DEV_PMA_PMD, 0xc010, 0xffff, 0x448a },
+ { MDIO_DEV_PMA_PMD, 0xc04a, 0xffff, 0x5200 },
+ { 0, 0, 0, 0 }
+ };
+ static struct reg_val regs2[] = {
+ { MDIO_DEV_PMA_PMD, 0xca00, 0xffff, 0x0080 },
+ { MDIO_DEV_PMA_PMD, 0xca12, 0xffff, 0 },
+ { 0, 0, 0, 0 }
+ };
+
+ int err;
+
+ err = t3_phy_reset(phy, MDIO_DEV_PMA_PMD, 0);
+ if (err)
+ return err;
+
+ msleep(125);
+ err = set_phy_regs(phy, regs0);
+ if (err)
+ return err;
+
+ msleep(50);
+ err = set_phy_regs(phy, regs1);
+ if (err)
+ return err;
+
+ msleep(50);
+ err = ael2005_setup_sr_edc(phy);
+ if (err)
+ return err;
+
+ return set_phy_regs(phy, regs2);
+}
+
+#ifdef C99_NOT_SUPPORTED
+static struct cphy_ops ael2005_ops = {
+ ael2005_reset,
+ t3_phy_lasi_intr_enable,
+ t3_phy_lasi_intr_disable,
+ t3_phy_lasi_intr_clear,
+ t3_phy_lasi_intr_handler,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ ael100x_get_link_status,
+ ael1002_power_down,
+};
+#else
+static struct cphy_ops ael2005_ops = {
+ .reset = ael2005_reset,
+ .intr_enable = t3_phy_lasi_intr_enable,
+ .intr_disable = t3_phy_lasi_intr_disable,
+ .intr_clear = t3_phy_lasi_intr_clear,
+ .intr_handler = t3_phy_lasi_intr_handler,
+ .get_link_status = ael100x_get_link_status,
+ .power_down = ael1002_power_down,
+};
+#endif
+
+int t3_ael2005_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
+ const struct mdio_ops *mdio_ops)
+{
+ cphy_init(phy, adapter, phy_addr, &ael2005_ops, mdio_ops,
+ SUPPORTED_10000baseT_Full | SUPPORTED_AUI | SUPPORTED_FIBRE,
+ "10GBASE-R");
+ msleep(125);
+ return t3_mdio_change_bits(phy, MDIO_DEV_PMA_PMD, AEL_OPT_SETTINGS, 0,
+ 1 << 5);
+}
+
#ifdef C99_NOT_SUPPORTED
static struct cphy_ops qt2045_ops = {
ael1006_reset,
- ael1006_intr_enable,
- ael1006_intr_disable,
- ael1006_intr_clear,
- ael1006_intr_handler,
+ t3_phy_lasi_intr_enable,
+ t3_phy_lasi_intr_disable,
+ t3_phy_lasi_intr_clear,
+ t3_phy_lasi_intr_handler,
NULL,
NULL,
NULL,
@@ -246,10 +609,10 @@ static struct cphy_ops qt2045_ops = {
#else
static struct cphy_ops qt2045_ops = {
.reset = ael1006_reset,
- .intr_enable = ael1006_intr_enable,
- .intr_disable = ael1006_intr_disable,
- .intr_clear = ael1006_intr_clear,
- .intr_handler = ael1006_intr_handler,
+ .intr_enable = t3_phy_lasi_intr_enable,
+ .intr_disable = t3_phy_lasi_intr_disable,
+ .intr_clear = t3_phy_lasi_intr_clear,
+ .intr_handler = t3_phy_lasi_intr_handler,
.get_link_status = ael100x_get_link_status,
.power_down = ael1006_power_down,
};
diff --git a/sys/dev/cxgb/common/cxgb_common.h b/sys/dev/cxgb/common/cxgb_common.h
index 1ce6b4016b493..9ac28945533a2 100644
--- a/sys/dev/cxgb/common/cxgb_common.h
+++ b/sys/dev/cxgb/common/cxgb_common.h
@@ -47,10 +47,7 @@ enum {
NCCTRL_WIN = 32, /* # of congestion control windows */
NTX_SCHED = 8, /* # of HW Tx scheduling queues */
PROTO_SRAM_LINES = 128, /* size of protocol sram */
- MAX_NPORTS = 4,
- TP_TMR_RES = 200,
- TP_SRAM_OFFSET = 4096, /* TP SRAM content offset in eeprom */
- TP_SRAM_LEN = 2112, /* TP SRAM content offset in eeprom */
+ EXACT_ADDR_FILTERS = 8, /* # of HW exact match filters */
};
#define MAX_RX_COALESCING_LEN 12288U
@@ -122,8 +119,8 @@ enum {
};
struct sg_ent { /* SGE scatter/gather entry */
- u32 len[2];
- u64 addr[2];
+ __be32 len[2];
+ __be64 addr[2];
};
#ifndef SGE_NUM_GENBITS
@@ -150,7 +147,7 @@ struct adapter_info {
unsigned char mdien:1;
unsigned char mdiinv:1;
unsigned int gpio_out; /* GPIO output settings */
- unsigned int gpio_intr; /* GPIO IRQ enable mask */
+ unsigned char gpio_intr[MAX_NPORTS]; /* GPIO PHY IRQ pins */
unsigned long caps; /* adapter capabilities */
const struct mdio_ops *mdio_ops; /* MDIO operations */
const char *desc; /* product description */
@@ -159,8 +156,6 @@ struct adapter_info {
struct port_type_info {
int (*phy_prep)(struct cphy *phy, adapter_t *adapter, int phy_addr,
const struct mdio_ops *ops);
-
-
};
struct mc5_stats {
@@ -307,7 +302,7 @@ struct tp_params {
struct qset_params { /* SGE queue set parameters */
unsigned int polling; /* polling/interrupt service for rspq */
unsigned int lro; /* large receive offload */
- unsigned int coalesce_nsecs; /* irq coalescing timer */
+ unsigned int coalesce_usecs; /* irq coalescing timer */
unsigned int rspq_size; /* # of entries in response queue */
unsigned int fl_size; /* # of entries in regular free list */
unsigned int jumbo_size; /* # of entries in jumbo free list */
@@ -486,12 +481,25 @@ enum {
MAC_RXFIFO_SIZE = 32768
};
-/* IEEE 802.3ae specified MDIO devices */
+/* IEEE 802.3 specified MDIO devices */
enum {
MDIO_DEV_PMA_PMD = 1,
MDIO_DEV_WIS = 2,
MDIO_DEV_PCS = 3,
- MDIO_DEV_XGXS = 4
+ MDIO_DEV_XGXS = 4,
+ MDIO_DEV_ANEG = 7,
+ MDIO_DEV_VEND1 = 30,
+ MDIO_DEV_VEND2 = 31
+};
+
+/* LASI control and status registers */
+enum {
+ RX_ALARM_CTRL = 0x9000,
+ TX_ALARM_CTRL = 0x9001,
+ LASI_CTRL = 0x9002,
+ RX_ALARM_STAT = 0x9003,
+ TX_ALARM_STAT = 0x9004,
+ LASI_STAT = 0x9005
};
/* PHY loopback direction */
@@ -556,8 +564,8 @@ static inline int mdio_write(struct cphy *phy, int mmd, int reg,
/* Convenience initializer */
static inline void cphy_init(struct cphy *phy, adapter_t *adapter,
int phy_addr, struct cphy_ops *phy_ops,
- const struct mdio_ops *mdio_ops, unsigned int caps,
- const char *desc)
+ const struct mdio_ops *mdio_ops, unsigned int caps,
+ const char *desc)
{
phy->adapter = adapter;
phy->addr = phy_addr;
@@ -651,7 +659,12 @@ int t3_mdio_change_bits(struct cphy *phy, int mmd, int reg, unsigned int clear,
unsigned int set);
int t3_phy_reset(struct cphy *phy, int mmd, int wait);
int t3_phy_advertise(struct cphy *phy, unsigned int advert);
+int t3_phy_advertise_fiber(struct cphy *phy, unsigned int advert);
int t3_set_phy_speed_duplex(struct cphy *phy, int speed, int duplex);
+int t3_phy_lasi_intr_enable(struct cphy *phy);
+int t3_phy_lasi_intr_disable(struct cphy *phy);
+int t3_phy_lasi_intr_clear(struct cphy *phy);
+int t3_phy_lasi_intr_handler(struct cphy *phy);
void t3_intr_enable(adapter_t *adapter);
void t3_intr_disable(adapter_t *adapter);
@@ -673,10 +686,10 @@ int t3_read_flash(adapter_t *adapter, unsigned int addr, unsigned int nwords,
int t3_get_tp_version(adapter_t *adapter, u32 *vers);
int t3_check_tpsram_version(adapter_t *adapter, int *must_load);
int t3_check_tpsram(adapter_t *adapter, const u8 *tp_ram, unsigned int size);
-int t3_load_fw(adapter_t *adapter, const const u8 *fw_data, unsigned int size);
-int t3_load_boot(adapter_t *adapter, u8 *boot_data, unsigned int size);
+int t3_load_fw(adapter_t *adapter, const u8 *fw_data, unsigned int size);
int t3_get_fw_version(adapter_t *adapter, u32 *vers);
int t3_check_fw_version(adapter_t *adapter, int *must_load);
+int t3_load_boot(adapter_t *adapter, u8 *fw_data, unsigned int size);
int t3_init_hw(adapter_t *adapter, u32 fw_params);
void mac_prep(struct cmac *mac, adapter_t *adapter, int index);
void early_hw_init(adapter_t *adapter, const struct adapter_info *ai);
@@ -684,8 +697,8 @@ int t3_prep_adapter(adapter_t *adapter, const struct adapter_info *ai, int reset
void t3_led_ready(adapter_t *adapter);
void t3_fatal_err(adapter_t *adapter);
void t3_set_vlan_accel(adapter_t *adapter, unsigned int ports, int on);
-void t3_tp_set_offload_mode(adapter_t *adap, int enable);
void t3_enable_filters(adapter_t *adap);
+void t3_tp_set_offload_mode(adapter_t *adap, int enable);
void t3_config_rss(adapter_t *adapter, unsigned int rss_config, const u8 *cpus,
const u16 *rspq);
int t3_read_rss(adapter_t *adapter, u8 *lkup, u16 *map);
@@ -719,7 +732,7 @@ void t3_mc5_intr_handler(struct mc5 *mc5);
int t3_read_mc5_range(const struct mc5 *mc5, unsigned int start, unsigned int n,
u32 *buf);
-#if defined(CONFIG_CHELSIO_T3_CORE)
+#ifdef CONFIG_CHELSIO_T3_CORE
int t3_tp_set_coalescing_size(adapter_t *adap, unsigned int size, int psh);
void t3_tp_set_max_rxsize(adapter_t *adap, unsigned int size);
void t3_tp_get_mib_stats(adapter_t *adap, struct tp_mib_stats *tps);
@@ -774,21 +787,22 @@ int t3_vsc7323_set_mtu(adapter_t *adap, unsigned int mtu, int port);
int t3_vsc7323_set_addr(adapter_t *adap, u8 addr[6], int port);
int t3_vsc7323_enable(adapter_t *adap, int port, int which);
int t3_vsc7323_disable(adapter_t *adap, int port, int which);
-
-int t3_phy_advertise_fiber(struct cphy *phy, unsigned int advert);
-
const struct mac_stats *t3_vsc7323_update_stats(struct cmac *mac);
int t3_mv88e1xxx_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
- const struct mdio_ops *mdio_ops);
+ const struct mdio_ops *mdio_ops);
int t3_vsc8211_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
- const struct mdio_ops *mdio_ops);
+ const struct mdio_ops *mdio_ops);
int t3_ael1002_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
- const struct mdio_ops *mdio_ops);
+ const struct mdio_ops *mdio_ops);
int t3_ael1006_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
- const struct mdio_ops *mdio_ops);
-int t3_qt2045_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
const struct mdio_ops *mdio_ops);
+int t3_ael2005_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
+ const struct mdio_ops *mdio_ops);
+int t3_qt2045_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
+ const struct mdio_ops *mdio_ops);
+int t3_tn1010_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
+ const struct mdio_ops *mdio_ops);
int t3_xaui_direct_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
- const struct mdio_ops *mdio_ops);
+ const struct mdio_ops *mdio_ops);
#endif /* __CHELSIO_COMMON_H */
diff --git a/sys/dev/cxgb/common/cxgb_mc5.c b/sys/dev/cxgb/common/cxgb_mc5.c
index 0e40aca8880e0..6f1537c680843 100644
--- a/sys/dev/cxgb/common/cxgb_mc5.c
+++ b/sys/dev/cxgb/common/cxgb_mc5.c
@@ -326,9 +326,16 @@ static void mc5_dbgi_mode_disable(const struct mc5 *mc5)
V_PRTYEN(mc5->parity_enabled) | F_MBUSEN);
}
-/*
- * Initialization that requires the OS and protocol layers to already
- * be intialized goes here.
+/**
+ * t3_mc5_init - initialize MC5 and the TCAM
+ * @mc5: the MC5 handle
+ * @nservers: desired number the TCP servers (listening ports)
+ * @nfilters: desired number of HW filters (classifiers)
+ * @nroutes: desired number of routes
+ *
+ * Initialize MC5 and the TCAM and partition the TCAM for the requested
+ * number of servers, filters, and routes. The number of routes is
+ * typically 0 except for specialized uses of the T3 adapters.
*/
int t3_mc5_init(struct mc5 *mc5, unsigned int nservers, unsigned int nfilters,
unsigned int nroutes)
@@ -344,7 +351,7 @@ int t3_mc5_init(struct mc5 *mc5, unsigned int nservers, unsigned int nfilters,
if (nroutes > MAX_ROUTES || nroutes + nservers + nfilters > tcam_size)
return -EINVAL;
- if (nfilters && adap->params.rev < T3_REV_C)
+ if (nfilters)
mc5->parity_enabled = 0;
/* Reset the TCAM */
@@ -420,7 +427,7 @@ int t3_read_mc5_range(const struct mc5 *mc5, unsigned int start,
}
mc5_dbgi_mode_disable(mc5);
- return 0;
+ return err;
}
#define MC5_INT_FATAL (F_PARITYERR | F_REQQPARERR | F_DISPQPARERR)
@@ -465,7 +472,6 @@ void t3_mc5_intr_handler(struct mc5 *mc5)
t3_write_reg(adap, A_MC5_DB_INT_CAUSE, cause);
}
-
/**
* t3_mc5_prep - initialize the SW state for MC5
* @adapter: the adapter
diff --git a/sys/dev/cxgb/common/cxgb_mv88e1xxx.c b/sys/dev/cxgb/common/cxgb_mv88e1xxx.c
index 8777b82b2f05f..ab8cce7fdc043 100644
--- a/sys/dev/cxgb/common/cxgb_mv88e1xxx.c
+++ b/sys/dev/cxgb/common/cxgb_mv88e1xxx.c
@@ -299,7 +299,7 @@ static struct cphy_ops mv88e1xxx_ops = {
#endif
int t3_mv88e1xxx_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
- const struct mdio_ops *mdio_ops)
+ const struct mdio_ops *mdio_ops)
{
int err;
@@ -310,9 +310,9 @@ int t3_mv88e1xxx_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
/* Configure copper PHY transmitter as class A to reduce EMI. */
err = mdio_write(phy, 0, MV88E1XXX_EXTENDED_ADDR, 0xb);
-
if (!err)
err = mdio_write(phy, 0, MV88E1XXX_EXTENDED_DATA, 0x8004);
+
if (!err)
err = mv88e1xxx_downshift_set(phy, 1); /* Enable downshift */
return err;
diff --git a/sys/dev/cxgb/common/cxgb_t3_cpl.h b/sys/dev/cxgb/common/cxgb_t3_cpl.h
index dd245712cd653..7cd219d222579 100644
--- a/sys/dev/cxgb/common/cxgb_t3_cpl.h
+++ b/sys/dev/cxgb/common/cxgb_t3_cpl.h
@@ -103,6 +103,7 @@ enum CPL_opcode {
CPL_RDMA_TERMINATE = 0xA2,
CPL_TRACE_PKT = 0xA3,
CPL_RDMA_EC_STATUS = 0xA5,
+ CPL_SGE_EC_CR_RETURN = 0xA6,
NUM_CPL_CMDS /* must be last and previous entries must be sorted */
};
@@ -148,7 +149,8 @@ enum {
enum {
CPL_PASS_OPEN_ACCEPT,
- CPL_PASS_OPEN_REJECT
+ CPL_PASS_OPEN_REJECT,
+ CPL_PASS_OPEN_ACCEPT_TNL
};
enum {
@@ -907,6 +909,14 @@ struct cpl_wr_ack {
__be32 snd_una;
};
+struct cpl_sge_ec_cr_return {
+ RSS_HDR
+ union opcode_tid ot;
+ __be16 sge_ec_id;
+ __u8 cr;
+ __u8 rsvd;
+};
+
struct cpl_rdma_ec_status {
RSS_HDR
union opcode_tid ot;
@@ -959,9 +969,11 @@ struct cpl_rx_data {
__u8 dack_mode:2;
__u8 psh:1;
__u8 heartbeat:1;
- __u8 :4;
+ __u8 ddp_off:1;
+ __u8 :3;
#else
- __u8 :4;
+ __u8 :3;
+ __u8 ddp_off:1;
__u8 heartbeat:1;
__u8 psh:1;
__u8 dack_mode:2;
@@ -1129,6 +1141,17 @@ struct cpl_tx_pkt {
__be32 len;
};
+struct cpl_tx_pkt_coalesce {
+ __be32 cntrl;
+ __be32 len;
+ __be64 addr;
+};
+
+struct tx_pkt_coalesce_wr {
+ WR_HDR;
+ struct cpl_tx_pkt_coalesce cpl[0];
+};
+
struct cpl_tx_pkt_lso {
WR_HDR;
__be32 cntrl;
@@ -1265,7 +1288,8 @@ struct cpl_l2t_write_req {
WR_HDR;
union opcode_tid ot;
__be32 params;
- __u8 rsvd[2];
+ __u8 rsvd;
+ __u8 port_idx;
__u8 dst_mac[6];
};
diff --git a/sys/dev/cxgb/common/cxgb_t3_hw.c b/sys/dev/cxgb/common/cxgb_t3_hw.c
index 29fc328223d6e..acd41c034c571 100644
--- a/sys/dev/cxgb/common/cxgb_t3_hw.c
+++ b/sys/dev/cxgb/common/cxgb_t3_hw.c
@@ -460,32 +460,57 @@ int t3_set_phy_speed_duplex(struct cphy *phy, int speed, int duplex)
return mdio_write(phy, 0, MII_BMCR, ctl);
}
+int t3_phy_lasi_intr_enable(struct cphy *phy)
+{
+ return mdio_write(phy, MDIO_DEV_PMA_PMD, LASI_CTRL, 1);
+}
+
+int t3_phy_lasi_intr_disable(struct cphy *phy)
+{
+ return mdio_write(phy, MDIO_DEV_PMA_PMD, LASI_CTRL, 0);
+}
+
+int t3_phy_lasi_intr_clear(struct cphy *phy)
+{
+ u32 val;
+
+ return mdio_read(phy, MDIO_DEV_PMA_PMD, LASI_STAT, &val);
+}
+
+int t3_phy_lasi_intr_handler(struct cphy *phy)
+{
+ unsigned int status;
+ int err = mdio_read(phy, MDIO_DEV_PMA_PMD, LASI_STAT, &status);
+
+ if (err)
+ return err;
+ return (status & 1) ? cphy_cause_link_change : 0;
+}
+
static struct adapter_info t3_adap_info[] = {
{ 1, 1, 0, 0, 0,
F_GPIO2_OEN | F_GPIO4_OEN |
- F_GPIO2_OUT_VAL | F_GPIO4_OUT_VAL, F_GPIO3 | F_GPIO5,
- 0,
+ F_GPIO2_OUT_VAL | F_GPIO4_OUT_VAL, { S_GPIO3, S_GPIO5 }, 0,
&mi1_mdio_ops, "Chelsio PE9000" },
{ 1, 1, 0, 0, 0,
F_GPIO2_OEN | F_GPIO4_OEN |
- F_GPIO2_OUT_VAL | F_GPIO4_OUT_VAL, F_GPIO3 | F_GPIO5,
- 0,
+ F_GPIO2_OUT_VAL | F_GPIO4_OUT_VAL, { S_GPIO3, S_GPIO5 }, 0,
&mi1_mdio_ops, "Chelsio T302" },
{ 1, 0, 0, 0, 0,
F_GPIO1_OEN | F_GPIO6_OEN | F_GPIO7_OEN | F_GPIO10_OEN |
F_GPIO11_OEN | F_GPIO1_OUT_VAL | F_GPIO6_OUT_VAL | F_GPIO10_OUT_VAL,
- 0, SUPPORTED_10000baseT_Full | SUPPORTED_AUI,
+ { 0 }, SUPPORTED_10000baseT_Full | SUPPORTED_AUI,
&mi1_mdio_ext_ops, "Chelsio T310" },
{ 1, 1, 0, 0, 0,
F_GPIO1_OEN | F_GPIO2_OEN | F_GPIO4_OEN | F_GPIO5_OEN | F_GPIO6_OEN |
F_GPIO7_OEN | F_GPIO10_OEN | F_GPIO11_OEN | F_GPIO1_OUT_VAL |
- F_GPIO5_OUT_VAL | F_GPIO6_OUT_VAL | F_GPIO10_OUT_VAL, 0,
- SUPPORTED_10000baseT_Full | SUPPORTED_AUI,
+ F_GPIO5_OUT_VAL | F_GPIO6_OUT_VAL | F_GPIO10_OUT_VAL,
+ { S_GPIO9, S_GPIO3 }, SUPPORTED_10000baseT_Full | SUPPORTED_AUI,
&mi1_mdio_ext_ops, "Chelsio T320" },
{ 4, 0, 0, 0, 0,
F_GPIO5_OEN | F_GPIO6_OEN | F_GPIO7_OEN | F_GPIO5_OUT_VAL |
F_GPIO6_OUT_VAL | F_GPIO7_OUT_VAL,
- F_GPIO1 | F_GPIO2 | F_GPIO3 | F_GPIO4, SUPPORTED_AUI,
+ { S_GPIO1, S_GPIO2, S_GPIO3, S_GPIO4 }, SUPPORTED_AUI,
&mi1_mdio_ops, "Chelsio T304" },
};
@@ -504,10 +529,10 @@ static struct port_type_info port_types[] = {
{ t3_vsc8211_phy_prep },
{ t3_mv88e1xxx_phy_prep },
{ t3_xaui_direct_phy_prep },
- { NULL },
+ { t3_ael2005_phy_prep },
{ t3_qt2045_phy_prep },
{ t3_ael1006_phy_prep },
- { NULL },
+ { t3_tn1010_phy_prep },
};
#define VPD_ENTRY(name, len) \
@@ -1231,6 +1256,15 @@ void t3_link_changed(adapter_t *adapter, int port_id)
phy->ops->get_link_status(phy, &link_ok, &speed, &duplex, &fc);
+ if (lc->requested_fc & PAUSE_AUTONEG)
+ fc &= lc->requested_fc;
+ else
+ fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX);
+
+ if (link_ok == lc->link_ok && speed == lc->speed &&
+ duplex == lc->duplex && fc == lc->fc)
+ return; /* nothing changed */
+
if (link_ok != lc->link_ok && adapter->params.rev > 0 &&
uses_xaui(adapter)) {
if (link_ok)
@@ -1241,10 +1275,6 @@ void t3_link_changed(adapter_t *adapter, int port_id)
lc->link_ok = (unsigned char)link_ok;
lc->speed = speed < 0 ? SPEED_INVALID : speed;
lc->duplex = duplex < 0 ? DUPLEX_INVALID : duplex;
- if (lc->requested_fc & PAUSE_AUTONEG)
- fc &= lc->requested_fc;
- else
- fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX);
if (link_ok && speed >= 0 && lc->autoneg == AUTONEG_ENABLE) {
/* Set MAC speed, duplex, and flow control to match PHY. */
@@ -1784,19 +1814,15 @@ static int mac_intr_handler(adapter_t *adap, unsigned int idx)
*/
int t3_phy_intr_handler(adapter_t *adapter)
{
- u32 mask, gpi = adapter_info(adapter)->gpio_intr;
u32 i, cause = t3_read_reg(adapter, A_T3DBG_INT_CAUSE);
for_each_port(adapter, i) {
struct port_info *p = adap2pinfo(adapter, i);
- mask = gpi - (gpi & (gpi - 1));
- gpi -= mask;
-
if (!(p->phy.caps & SUPPORTED_IRQ))
continue;
- if (cause & mask) {
+ if (cause & (1 << adapter_info(adapter)->gpio_intr[i])) {
int phy_cause = p->phy.ops->intr_handler(&p->phy);
if (phy_cause & cphy_cause_link_change)
@@ -1870,6 +1896,17 @@ int t3_slow_intr_handler(adapter_t *adapter)
return 1;
}
+static unsigned int calc_gpio_intr(adapter_t *adap)
+{
+ unsigned int i, gpi_intr = 0;
+
+ for_each_port(adap, i)
+ if ((adap2pinfo(adap, i)->phy.caps & SUPPORTED_IRQ) &&
+ adapter_info(adap)->gpio_intr[i])
+ gpi_intr |= 1 << adapter_info(adap)->gpio_intr[i];
+ return gpi_intr;
+}
+
/**
* t3_intr_enable - enable interrupts
* @adapter: the adapter whose interrupts should be enabled
@@ -1912,10 +1949,8 @@ void t3_intr_enable(adapter_t *adapter)
t3_write_reg(adapter, A_ULPTX_INT_ENABLE, ULPTX_INTR_MASK);
}
- t3_write_reg(adapter, A_T3DBG_GPIO_ACT_LOW,
- adapter_info(adapter)->gpio_intr);
- t3_write_reg(adapter, A_T3DBG_INT_ENABLE,
- adapter_info(adapter)->gpio_intr);
+ t3_write_reg(adapter, A_T3DBG_INT_ENABLE, calc_gpio_intr(adapter));
+
if (is_pcie(adapter))
t3_write_reg(adapter, A_PCIE_INT_ENABLE, PCIE_INTR_MASK);
else
@@ -2561,6 +2596,20 @@ static void tp_wr_bits_indirect(adapter_t *adap, unsigned int addr,
}
/**
+ * t3_enable_filters - enable the HW filters
+ * @adap: the adapter
+ *
+ * Enables the HW filters for NIC traffic.
+ */
+void t3_enable_filters(adapter_t *adap)
+{
+ t3_set_reg_field(adap, A_TP_IN_CONFIG, F_NICMODE, 0);
+ t3_set_reg_field(adap, A_MC5_DB_CONFIG, 0, F_FILTEREN);
+ t3_set_reg_field(adap, A_TP_GLOBAL_CONFIG, 0, V_FIVETUPLELOOKUP(3));
+ tp_wr_bits_indirect(adap, A_TP_INGRESS_CONFIG, 0, F_LOOKUPEVERYPKT);
+}
+
+/**
* pm_num_pages - calculate the number of pages of the payload memory
* @mem_size: the size of the payload memory
* @pg_size: the size of each payload memory page
@@ -2660,10 +2709,10 @@ static void tp_config(adapter_t *adap, const struct tp_params *p)
F_TCPCHECKSUMOFFLOAD | V_IPTTL(64));
t3_write_reg(adap, A_TP_TCP_OPTIONS, V_MTUDEFAULT(576) |
F_MTUENABLE | V_WINDOWSCALEMODE(1) |
- V_TIMESTAMPSMODE(0) | V_SACKMODE(1) | V_SACKRX(1));
+ V_TIMESTAMPSMODE(1) | V_SACKMODE(1) | V_SACKRX(1));
t3_write_reg(adap, A_TP_DACK_CONFIG, V_AUTOSTATE3(1) |
V_AUTOSTATE2(1) | V_AUTOSTATE1(0) |
- V_BYTETHRESHOLD(16384) | V_MSSTHRESHOLD(2) |
+ V_BYTETHRESHOLD(26880) | V_MSSTHRESHOLD(2) |
F_AUTOCAREFUL | F_AUTOENABLE | V_DACK_MODE(1));
t3_set_reg_field(adap, A_TP_IN_CONFIG, F_RXFBARBPRIO | F_TXFBARBPRIO,
F_IPV6ENABLE | F_NICMODE);
@@ -2705,7 +2754,8 @@ static void tp_config(adapter_t *adap, const struct tp_params *p)
if (adap->params.nports > 2) {
t3_set_reg_field(adap, A_TP_PC_CONFIG2, 0,
- F_ENABLETXPORTFROMDA | F_ENABLERXPORTFROMADDR);
+ F_ENABLETXPORTFROMDA2 | F_ENABLETXPORTFROMDA |
+ F_ENABLERXPORTFROMADDR);
tp_wr_bits_indirect(adap, A_TP_QOS_RX_MAP_MODE,
V_RXMAPMODE(M_RXMAPMODE), 0);
tp_wr_indirect(adap, A_TP_INGRESS_CONFIG, V_BITPOS0(48) |
@@ -3620,6 +3670,8 @@ int t3_init_hw(adapter_t *adapter, u32 fw_params)
chan_init_hw(adapter, adapter->params.chan_map);
t3_sge_init(adapter, &adapter->params.sge);
+ t3_write_reg(adapter, A_T3DBG_GPIO_ACT_LOW, calc_gpio_intr(adapter));
+
t3_write_reg(adapter, A_CIM_HOST_ACC_DATA, vpd->uclk | fw_params);
t3_write_reg(adapter, A_CIM_BOOT_CFG,
V_BOOTADDR(FW_FLASH_BOOT_ADDR >> 2));
diff --git a/sys/dev/cxgb/common/cxgb_tn1010.c b/sys/dev/cxgb/common/cxgb_tn1010.c
new file mode 100644
index 0000000000000..920ccc04a8665
--- /dev/null
+++ b/sys/dev/cxgb/common/cxgb_tn1010.c
@@ -0,0 +1,225 @@
+/**************************************************************************
+
+Copyright (c) 2008, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef CONFIG_DEFINED
+#include <cxgb_include.h>
+#else
+#include <dev/cxgb/cxgb_include.h>
+#endif
+
+#undef msleep
+#define msleep t3_os_sleep
+
+/* TN1010 PHY specific registers. */
+enum {
+ TN1010_VEND1_STAT = 1,
+};
+
+/* IEEE auto-negotiation 10GBASE-T registers */
+enum {
+ ANEG_ADVER = 16,
+ ANEG_LPA = 19,
+ ANEG_10G_CTRL = 32,
+ ANEG_10G_STAT = 33
+};
+
+#define ADVERTISE_ENPAGE (1 << 12)
+#define ADVERTISE_10000FULL (1 << 12)
+#define ADVERTISE_LOOP_TIMING (1 << 0)
+
+/* vendor specific status register fields */
+#define F_XS_LANE_ALIGN_STAT (1 << 0)
+#define F_PCS_BLK_LOCK (1 << 1)
+#define F_PMD_SIGNAL_OK (1 << 2)
+#define F_LINK_STAT (1 << 3)
+#define F_ANEG_SPEED_1G (1 << 4)
+#define F_ANEG_MASTER (1 << 5)
+
+#define S_ANEG_STAT 6
+#define M_ANEG_STAT 0x3
+#define G_ANEG_STAT(x) (((x) >> S_ANEG_STAT) & M_ANEG_STAT)
+
+enum { /* autonegotiation status */
+ ANEG_IN_PROGR = 0,
+ ANEG_COMPLETE = 1,
+ ANEG_FAILED = 3
+};
+
+/*
+ * Reset the PHY. May take up to 500ms to complete.
+ */
+static int tn1010_reset(struct cphy *phy, int wait)
+{
+ int err = t3_phy_reset(phy, MDIO_DEV_PMA_PMD, wait);
+ msleep(500);
+ return err;
+}
+
+static int tn1010_power_down(struct cphy *phy, int enable)
+{
+ return t3_mdio_change_bits(phy, MDIO_DEV_PMA_PMD, MII_BMCR,
+ BMCR_PDOWN, enable ? BMCR_PDOWN : 0);
+}
+
+static int tn1010_autoneg_enable(struct cphy *phy)
+{
+ int err;
+
+ err = tn1010_power_down(phy, 0);
+ if (!err)
+ err = t3_mdio_change_bits(phy, MDIO_DEV_ANEG, MII_BMCR, 0,
+ BMCR_ANENABLE | BMCR_ANRESTART);
+ return err;
+}
+
+static int tn1010_autoneg_restart(struct cphy *phy)
+{
+ int err;
+
+ err = tn1010_power_down(phy, 0);
+ if (!err)
+ err = t3_mdio_change_bits(phy, MDIO_DEV_ANEG, MII_BMCR, 0,
+ BMCR_ANRESTART);
+ return err;
+}
+
+static int tn1010_advertise(struct cphy *phy, unsigned int advert)
+{
+ int err, val;
+
+ if (!(advert & ADVERTISED_1000baseT_Full))
+ return -EINVAL; /* PHY can't disable 1000BASE-T */
+
+ val = ADVERTISE_CSMA | ADVERTISE_ENPAGE | ADVERTISE_NPAGE;
+ if (advert & ADVERTISED_Pause)
+ val |= ADVERTISE_PAUSE_CAP;
+ if (advert & ADVERTISED_Asym_Pause)
+ val |= ADVERTISE_PAUSE_ASYM;
+ err = mdio_write(phy, MDIO_DEV_ANEG, ANEG_ADVER, val);
+ if (err)
+ return err;
+
+ val = (advert & ADVERTISED_10000baseT_Full) ? ADVERTISE_10000FULL : 0;
+ return mdio_write(phy, MDIO_DEV_ANEG, ANEG_10G_CTRL, val |
+ ADVERTISE_LOOP_TIMING);
+}
+
+static int tn1010_get_link_status(struct cphy *phy, int *link_ok,
+ int *speed, int *duplex, int *fc)
+{
+ unsigned int status, lpa, adv;
+ int err, sp = -1, pause = 0;
+
+ err = mdio_read(phy, MDIO_DEV_VEND1, TN1010_VEND1_STAT, &status);
+ if (err)
+ return err;
+
+ if (link_ok)
+ *link_ok = (status & F_LINK_STAT) != 0;
+
+ if (G_ANEG_STAT(status) == ANEG_COMPLETE) {
+ sp = (status & F_ANEG_SPEED_1G) ? SPEED_1000 : SPEED_10000;
+
+ if (fc) {
+ err = mdio_read(phy, MDIO_DEV_ANEG, ANEG_LPA, &lpa);
+ if (!err)
+ err = mdio_read(phy, MDIO_DEV_ANEG, ANEG_ADVER,
+ &adv);
+ if (err)
+ return err;
+
+ if (lpa & adv & ADVERTISE_PAUSE_CAP)
+ pause = PAUSE_RX | PAUSE_TX;
+ else if ((lpa & ADVERTISE_PAUSE_CAP) &&
+ (lpa & ADVERTISE_PAUSE_ASYM) &&
+ (adv & ADVERTISE_PAUSE_ASYM))
+ pause = PAUSE_TX;
+ else if ((lpa & ADVERTISE_PAUSE_ASYM) &&
+ (adv & ADVERTISE_PAUSE_CAP))
+ pause = PAUSE_RX;
+ }
+ }
+ if (speed)
+ *speed = sp;
+ if (duplex)
+ *duplex = DUPLEX_FULL;
+ if (fc)
+ *fc = pause;
+ return 0;
+}
+
+static int tn1010_set_speed_duplex(struct cphy *phy, int speed, int duplex)
+{
+ return -EINVAL; /* require autoneg */
+}
+
+#ifdef C99_NOT_SUPPORTED
+static struct cphy_ops tn1010_ops = {
+ tn1010_reset,
+ t3_phy_lasi_intr_enable,
+ t3_phy_lasi_intr_disable,
+ t3_phy_lasi_intr_clear,
+ t3_phy_lasi_intr_handler,
+ tn1010_autoneg_enable,
+ tn1010_autoneg_restart,
+ tn1010_advertise,
+ NULL,
+ tn1010_set_speed_duplex,
+ tn1010_get_link_status,
+ tn1010_power_down,
+};
+#else
+static struct cphy_ops tn1010_ops = {
+ .reset = tn1010_reset,
+ .intr_enable = t3_phy_lasi_intr_enable,
+ .intr_disable = t3_phy_lasi_intr_disable,
+ .intr_clear = t3_phy_lasi_intr_clear,
+ .intr_handler = t3_phy_lasi_intr_handler,
+ .autoneg_enable = tn1010_autoneg_enable,
+ .autoneg_restart = tn1010_autoneg_restart,
+ .advertise = tn1010_advertise,
+ .set_speed_duplex = tn1010_set_speed_duplex,
+ .get_link_status = tn1010_get_link_status,
+ .power_down = tn1010_power_down,
+};
+#endif
+
+int t3_tn1010_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
+ const struct mdio_ops *mdio_ops)
+{
+ cphy_init(phy, adapter, phy_addr, &tn1010_ops, mdio_ops,
+ SUPPORTED_1000baseT_Full | SUPPORTED_10000baseT_Full |
+ SUPPORTED_Autoneg | SUPPORTED_AUI | SUPPORTED_TP,
+ "1000/10GBASE-T");
+ msleep(500); /* PHY needs up to 500ms to start responding to MDIO */
+ return 0;
+}
diff --git a/sys/dev/cxgb/common/cxgb_vsc8211.c b/sys/dev/cxgb/common/cxgb_vsc8211.c
index 61bdc9c7f5ed2..ad3c88e4c99d3 100644
--- a/sys/dev/cxgb/common/cxgb_vsc8211.c
+++ b/sys/dev/cxgb/common/cxgb_vsc8211.c
@@ -45,6 +45,7 @@ enum {
VSC8211_EXT_CTRL = 23,
VSC8211_INTR_ENABLE = 25,
VSC8211_INTR_STATUS = 26,
+ VSC8211_LED_CTRL = 27,
VSC8211_AUX_CTRL_STAT = 28,
VSC8211_EXT_PAGE_AXS = 31,
};
@@ -393,8 +394,10 @@ int t3_vsc8211_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
err = mdio_read(phy, 0, VSC8211_EXT_CTRL, &val);
if (err)
return err;
- if (val & VSC_CTRL_MEDIA_MODE_HI)
- return 0; /* copper interface, done */
+ if (val & VSC_CTRL_MEDIA_MODE_HI) {
+ /* copper interface, just need to configure the LEDs */
+ return mdio_write(phy, 0, VSC8211_LED_CTRL, 0x100);
+ }
phy->caps = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg |
SUPPORTED_MII | SUPPORTED_FIBRE | SUPPORTED_IRQ;
diff --git a/sys/dev/cxgb/common/cxgb_xgmac.c b/sys/dev/cxgb/common/cxgb_xgmac.c
index 745cc4b4dd5e2..51a02c25bcb69 100644
--- a/sys/dev/cxgb/common/cxgb_xgmac.c
+++ b/sys/dev/cxgb/common/cxgb_xgmac.c
@@ -44,7 +44,6 @@ __FBSDID("$FreeBSD$");
* # of exact address filters. The first one is used for the station address,
* the rest are available for multicast addresses.
*/
-#define EXACT_ADDR_FILTERS 8
static inline int macidx(const struct cmac *mac)
{
@@ -159,16 +158,18 @@ int t3_mac_reset(struct cmac *mac)
t3_write_reg(adap, A_XGM_TX_CTRL + oft, F_TXEN);
t3_write_reg(adap, A_XGM_RX_CTRL + oft, F_RXEN);
}
+
t3_set_reg_field(adap, A_XGM_RX_MAX_PKT_SIZE + oft,
V_RXMAXFRAMERSIZE(M_RXMAXFRAMERSIZE),
V_RXMAXFRAMERSIZE(MAX_FRAME_SIZE) | F_RXENFRAMER);
+
val = F_MAC_RESET_ | F_XGMAC_STOP_EN;
- if (is_10G(adap) || mac->multiport)
+ if (!mac->multiport)
+ val |= F_XG2G_RESET_;
+ if (uses_xaui(adap))
val |= F_PCS_RESET_;
- else if (uses_xaui(adap))
- val |= F_PCS_RESET_ | F_XG2G_RESET_;
else
- val |= F_RGMII_RESET_ | F_XG2G_RESET_;
+ val |= F_RGMII_RESET_;
t3_write_reg(adap, A_XGM_RESET_CTRL + oft, val);
(void) t3_read_reg(adap, A_XGM_RESET_CTRL + oft); /* flush */
if ((val & F_PCS_RESET_) && adap->params.rev) {
@@ -188,10 +189,10 @@ static int t3b2_mac_reset(struct cmac *mac)
/* Stop egress traffic to xgm*/
- if (!macidx(mac))
- t3_set_reg_field(adap, A_MPS_CFG, F_PORT0ACTIVE, 0);
+ if (!macidx(mac))
+ t3_set_reg_field(adap, A_MPS_CFG, F_PORT0ACTIVE, 0);
else
- t3_set_reg_field(adap, A_MPS_CFG, F_PORT1ACTIVE, 0);
+ t3_set_reg_field(adap, A_MPS_CFG, F_PORT1ACTIVE, 0);
/* PCS in reset */
t3_write_reg(adap, A_XGM_RESET_CTRL + oft, F_MAC_RESET_);
@@ -223,15 +224,15 @@ static int t3b2_mac_reset(struct cmac *mac)
msleep(1);
t3b_pcs_reset(mac);
}
- t3_write_reg(adap, A_XGM_RX_CFG + oft,
+ t3_write_reg(adap, A_XGM_RX_CFG + oft,
F_DISPAUSEFRAMES | F_EN1536BFRAMES |
F_RMFCS | F_ENJUMBO | F_ENHASHMCAST );
/*Resume egress traffic to xgm*/
- if (!macidx(mac))
- t3_set_reg_field(adap, A_MPS_CFG, 0, F_PORT0ACTIVE);
+ if (!macidx(mac))
+ t3_set_reg_field(adap, A_MPS_CFG, 0, F_PORT0ACTIVE);
else
- t3_set_reg_field(adap, A_MPS_CFG, 0, F_PORT1ACTIVE);
+ t3_set_reg_field(adap, A_MPS_CFG, 0, F_PORT1ACTIVE);
return 0;
}
@@ -279,6 +280,9 @@ int t3_mac_set_address(struct cmac *mac, unsigned int idx, u8 addr[6])
* Specify the number of exact address filters that should be reserved for
* unicast addresses. Caller should reload the unicast and multicast
* addresses after calling this.
+ *
+ * Generally, this is 1 with the first one used for the station address,
+ * and the rest are available for multicast addresses.
*/
int t3_mac_set_num_ucast(struct cmac *mac, unsigned char n)
{
@@ -385,7 +389,7 @@ static int rx_fifo_hwm(int mtu)
*
* Sets the MAC MTU and adjusts the FIFO PAUSE watermarks accordingly.
*/
-int t3_mac_set_mtu(struct cmac *mac, unsigned int mtu)
+int t3_mac_set_mtu(struct cmac *mac, unsigned int mtu)
{
int hwm, lwm, divisor;
int ipg;
@@ -413,7 +417,7 @@ int t3_mac_set_mtu(struct cmac *mac, unsigned int mtu)
reg = adap->params.rev == T3_REV_B2 ?
A_XGM_RX_MAX_PKT_SIZE_ERR_CNT : A_XGM_RXFIFO_CFG;
-
+
/* drain RX FIFO */
if (t3_wait_op_done(adap, reg + mac->offset,
F_RXFIFO_EMPTY, 1, 20, 5)) {
@@ -428,9 +432,8 @@ int t3_mac_set_mtu(struct cmac *mac, unsigned int mtu)
enable_exact_filters(mac);
} else
t3_set_reg_field(adap, A_XGM_RX_MAX_PKT_SIZE + mac->offset,
- V_RXMAXPKTSIZE(M_RXMAXPKTSIZE),
- V_RXMAXPKTSIZE(mtu));
-
+ V_RXMAXPKTSIZE(M_RXMAXPKTSIZE),
+ V_RXMAXPKTSIZE(mtu));
/*
* Adjust the PAUSE frame watermarks. We always set the LWM, and the
* HWM only if flow-control is enabled.
@@ -462,10 +465,10 @@ int t3_mac_set_mtu(struct cmac *mac, unsigned int mtu)
*/
if (adap->params.rev > 0) {
divisor = (adap->params.rev == T3_REV_C) ? 64 : 8;
- t3_write_reg(adap, A_XGM_PAUSE_TIMER + mac->offset,
- (hwm - lwm) * 4 / divisor);
+ t3_write_reg(adap, A_XGM_PAUSE_TIMER + mac->offset,
+ (hwm - lwm) * 4 / divisor);
}
- t3_write_reg(adap, A_XGM_TX_PAUSE_QUANTA + mac->offset,
+ t3_write_reg(adap, A_XGM_TX_PAUSE_QUANTA + mac->offset,
MAC_RXFIFO_SIZE * 4 * 8 / 512);
return 0;
}
@@ -489,7 +492,7 @@ int t3_mac_set_speed_duplex_fc(struct cmac *mac, int speed, int duplex, int fc)
if (duplex >= 0 && duplex != DUPLEX_FULL)
return -EINVAL;
- if (mac->multiport) {
+ if (mac->multiport) {
val = t3_read_reg(adap, A_XGM_RXFIFO_CFG + oft);
val &= ~V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM);
val |= V_RXFIFOPAUSEHWM(rx_fifo_hwm(t3_read_reg(adap,
@@ -575,7 +578,7 @@ int t3_mac_enable(struct cmac *mac, int which)
mac->txen = F_TXEN;
mac->toggle_cnt = 0;
}
- if (which & MAC_DIRECTION_RX)
+ if (which & MAC_DIRECTION_RX)
t3_write_reg(adap, A_XGM_RX_CTRL + oft, F_RXEN);
return 0;
}
@@ -673,10 +676,10 @@ rxcheck:
if (rx_mcnt != mac->rx_mcnt) {
rx_xcnt = (G_TXSPI4SOPCNT(t3_read_reg(adap,
A_XGM_RX_SPI4_SOP_EOP_CNT +
- mac->offset))) +
+ mac->offset))) +
(s->rx_fifo_ovfl - mac->rx_ocnt);
mac->rx_ocnt = s->rx_fifo_ovfl;
- } else
+ } else
goto out;
if (mac->rx_mcnt != s->rx_frames && rx_xcnt == 0 && mac->rx_xcnt == 0) {
@@ -684,8 +687,8 @@ rxcheck:
status = 2;
goto out;
}
-
-out:
+
+out:
mac->tx_tcnt = tx_tcnt;
mac->tx_xcnt = tx_xcnt;
mac->tx_mcnt = s->tx_frames;
diff --git a/sys/dev/cxgb/cxgb_adapter.h b/sys/dev/cxgb/cxgb_adapter.h
index f2b0531503910..39fe8eb91f58a 100644
--- a/sys/dev/cxgb/cxgb_adapter.h
+++ b/sys/dev/cxgb/cxgb_adapter.h
@@ -166,7 +166,7 @@ enum { TXQ_ETH = 0,
* work request size in bytes
*/
#define WR_LEN (WR_FLITS * 8)
-#define PIO_LEN (WR_LEN - sizeof(struct cpl_tx_pkt))
+#define PIO_LEN (WR_LEN - sizeof(struct cpl_tx_pkt_lso))
/* careful, the following are set on priv_flags and must not collide with
diff --git a/sys/dev/cxgb/cxgb_config.h b/sys/dev/cxgb/cxgb_config.h
index 723c23e7279f8..a5ee963b4734f 100644
--- a/sys/dev/cxgb/cxgb_config.h
+++ b/sys/dev/cxgb/cxgb_config.h
@@ -31,7 +31,6 @@ $FreeBSD$
***************************************************************************/
#ifndef _CXGB_CONFIG_H_
#define _CXGB_CONFIG_H_
-#define DISABLE_MBUF_IOVEC
#define RTALLOC2_DEFINED
#define VM_FAULT_HOLD_DEFINED
#ifndef CONFIG_DEFINED
diff --git a/sys/dev/cxgb/cxgb_main.c b/sys/dev/cxgb/cxgb_main.c
index f6cfcdfbe46e6..4fb53b53efe70 100644
--- a/sys/dev/cxgb/cxgb_main.c
+++ b/sys/dev/cxgb/cxgb_main.c
@@ -9,7 +9,7 @@ modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
-2. Neither the name of the Chelsio Corporation nor the names of its
+ 2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
@@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$");
#include <net/if_dl.h>
#include <net/if_media.h>
#include <net/if_types.h>
+#include <net/if_vlan_var.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
@@ -724,10 +725,9 @@ cxgb_free(struct adapter *sc)
} else
printf("not offloading set\n");
#ifdef notyet
- /* XXX need to handle unload in TOM */
if (sc->flags & CXGB_OFLD_INIT)
cxgb_offload_deactivate(sc);
-#endif
+#endif
free(sc->filters, M_DEVBUF);
t3_sge_free(sc);
@@ -979,7 +979,7 @@ cxgb_port_attach(device_t dev)
* Only default to jumbo frames on 10GigE
*/
if (p->adapter->params.nports <= 2)
- ifp->if_mtu = 9000;
+ ifp->if_mtu = ETHERMTU_JUMBO;
if ((err = cxgb_makedev(p)) != 0) {
printf("makedev failed %d\n", err);
return (err);
@@ -1255,13 +1255,23 @@ cxgb_link_start(struct port_info *p)
struct ifnet *ifp;
struct t3_rx_mode rm;
struct cmac *mac = &p->mac;
+ int mtu, hwtagging;
ifp = p->ifp;
+ bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
+
+ mtu = ifp->if_mtu;
+ if (ifp->if_capenable & IFCAP_VLAN_MTU)
+ mtu += ETHER_VLAN_ENCAP_LEN;
+
+ hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
+
t3_init_rx_mode(&rm, p);
if (!mac->multiport)
t3_mac_reset(mac);
- t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
+ t3_mac_set_mtu(mac, mtu);
+ t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
t3_mac_set_address(mac, 0, p->hw_addr);
t3_mac_set_rx_mode(mac, &rm);
t3_link_start(&p->phy, mac, &p->link_config);
@@ -1751,10 +1761,9 @@ offload_open(struct port_info *pi)
adapter->params.rev == 0 ?
adapter->port[0].ifp->if_mtu : 0xffff);
init_smt(adapter);
-#ifdef TOE_ENABLED
/* Call back all registered clients */
cxgb_add_clients(tdev);
-#endif
+
/* restore them in case the offload module has changed them */
if (err) {
t3_tp_set_offload_mode(adapter, 0);
@@ -1771,10 +1780,10 @@ offload_close(struct t3cdev *tdev)
if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
return (0);
-#ifdef TOE_ENABLED
+
/* Call back all registered clients */
cxgb_remove_clients(tdev);
-#endif
+
tdev->lldev = NULL;
cxgb_set_dummy_ops(tdev);
t3_tp_set_offload_mode(adapter, 0);
@@ -1904,7 +1913,7 @@ cxgb_set_mtu(struct port_info *p, int mtu)
struct ifnet *ifp = p->ifp;
int error = 0;
- if ((mtu < ETHERMIN) || (mtu > ETHER_MAX_LEN_JUMBO))
+ if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO))
error = EINVAL;
else if (ifp->if_mtu != mtu) {
PORT_LOCK(p);
@@ -1924,7 +1933,7 @@ cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
struct port_info *p = ifp->if_softc;
struct ifaddr *ifa = (struct ifaddr *)data;
struct ifreq *ifr = (struct ifreq *)data;
- int flags, error = 0;
+ int flags, error = 0, reinit = 0;
uint32_t mask;
/*
@@ -1979,18 +1988,16 @@ cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
if (IFCAP_TXCSUM & ifp->if_capenable) {
ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
- | CSUM_TSO);
+ | CSUM_IP | CSUM_TSO);
} else {
ifp->if_capenable |= IFCAP_TXCSUM;
- ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
- }
- } else if (mask & IFCAP_RXCSUM) {
- if (IFCAP_RXCSUM & ifp->if_capenable) {
- ifp->if_capenable &= ~IFCAP_RXCSUM;
- } else {
- ifp->if_capenable |= IFCAP_RXCSUM;
+ ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
+ | CSUM_IP);
}
}
+ if (mask & IFCAP_RXCSUM) {
+ ifp->if_capenable ^= IFCAP_RXCSUM;
+ }
if (mask & IFCAP_TSO4) {
if (IFCAP_TSO4 & ifp->if_capenable) {
ifp->if_capenable &= ~IFCAP_TSO4;
@@ -2005,7 +2012,26 @@ cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
error = EINVAL;
}
}
+ if (mask & IFCAP_VLAN_HWTAGGING) {
+ ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
+ reinit = ifp->if_drv_flags & IFF_DRV_RUNNING;
+ }
+ if (mask & IFCAP_VLAN_MTU) {
+ ifp->if_capenable ^= IFCAP_VLAN_MTU;
+ reinit = ifp->if_drv_flags & IFF_DRV_RUNNING;
+ }
+ if (mask & IFCAP_VLAN_HWCSUM) {
+ ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
+ }
+ if (reinit) {
+ cxgb_stop_locked(p);
+ cxgb_init_locked(p);
+ }
PORT_UNLOCK(p);
+
+#ifdef VLAN_CAPABILITIES
+ VLAN_CAPABILITIES(ifp);
+#endif
break;
default:
error = ether_ioctl(ifp, command, data);
@@ -2126,9 +2152,11 @@ check_t3b2_mac(struct adapter *adapter)
p->mac.stats.num_toggled++;
else if (status == 2) {
struct cmac *mac = &p->mac;
+ int mtu = ifp->if_mtu;
- t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN
- + ETHER_VLAN_ENCAP_LEN);
+ if (ifp->if_capenable & IFCAP_VLAN_MTU)
+ mtu += ETHER_VLAN_ENCAP_LEN;
+ t3_mac_set_mtu(mac, mtu);
t3_mac_set_address(mac, 0, p->hw_addr);
cxgb_set_rxmode(p);
t3_link_start(&p->phy, mac, &p->link_config);
@@ -2434,7 +2462,7 @@ cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
if (t->intr_lat >= 0) {
struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
- q->coalesce_nsecs = t->intr_lat*1000;
+ q->coalesce_usecs = t->intr_lat;
t3_update_qset_coalesce(qs, q);
}
break;
@@ -2454,7 +2482,7 @@ cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
t->fl_size[0] = q->fl_size;
t->fl_size[1] = q->jumbo_size;
t->polling = q->polling;
- t->intr_lat = q->coalesce_nsecs / 1000;
+ t->intr_lat = q->coalesce_usecs;
t->cong_thres = q->cong_thres;
break;
}
diff --git a/sys/dev/cxgb/cxgb_offload.c b/sys/dev/cxgb/cxgb_offload.c
index 1eeafafa5b4f5..d865e7f7cbfb1 100644
--- a/sys/dev/cxgb/cxgb_offload.c
+++ b/sys/dev/cxgb/cxgb_offload.c
@@ -1,7 +1,6 @@
-
/**************************************************************************
-Copyright (c) 2007, Chelsio Inc.
+Copyright (c) 2007-2008, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -104,7 +103,7 @@ unregister_tdev(struct t3cdev *tdev)
mtx_unlock(&cxgb_db_lock);
}
-#ifdef TOE_ENABLED
+#ifndef TCP_OFFLOAD_DISABLE
/**
* cxgb_register_client - register an offload client
* @client: the client
diff --git a/sys/dev/cxgb/cxgb_offload.h b/sys/dev/cxgb/cxgb_offload.h
index dbe2bc50a4cd8..605dd0b0dc2a8 100644
--- a/sys/dev/cxgb/cxgb_offload.h
+++ b/sys/dev/cxgb/cxgb_offload.h
@@ -36,17 +36,13 @@ $FreeBSD$
#ifdef CONFIG_DEFINED
#include <common/cxgb_version.h>
#include <cxgb_config.h>
-#ifdef TOE_ENABLED
#include <ulp/tom/cxgb_l2t.h>
-#endif
#include <common/cxgb_tcb.h>
#include <t3cdev.h>
#else
#include <dev/cxgb/common/cxgb_version.h>
#include <dev/cxgb/cxgb_config.h>
-#ifdef TOE_ENABLED
#include <dev/cxgb/ulp/tom/cxgb_l2t.h>
-#endif
#include <dev/cxgb/common/cxgb_tcb.h>
#include <dev/cxgb/t3cdev.h>
#endif
@@ -83,7 +79,6 @@ void cxgb_remove_clients(struct t3cdev *tdev);
typedef int (*cxgb_cpl_handler_func)(struct t3cdev *dev,
struct mbuf *m, void *ctx);
-#ifdef TOE_ENABLED
struct cxgb_client {
char *name;
void (*add) (struct t3cdev *);
@@ -102,7 +97,6 @@ int cxgb_alloc_atid(struct t3cdev *dev, struct cxgb_client *client,
void *ctx);
int cxgb_alloc_stid(struct t3cdev *dev, struct cxgb_client *client,
void *ctx);
-#endif
void *cxgb_free_atid(struct t3cdev *dev, int atid);
void cxgb_free_stid(struct t3cdev *dev, int stid);
void *cxgb_get_lctx(struct t3cdev *tdev, int stid);
diff --git a/sys/dev/cxgb/cxgb_osdep.h b/sys/dev/cxgb/cxgb_osdep.h
index 7466d8a24be5e..73d7c77ae3cee 100644
--- a/sys/dev/cxgb/cxgb_osdep.h
+++ b/sys/dev/cxgb/cxgb_osdep.h
@@ -55,12 +55,25 @@ $FreeBSD$
typedef struct adapter adapter_t;
struct sge_rspq;
+enum {
+ TP_TMR_RES = 200, /* TP timer resolution in usec */
+ MAX_NPORTS = 4, /* max # of ports */
+ TP_SRAM_OFFSET = 4096, /* TP SRAM content offset in eeprom */
+ TP_SRAM_LEN = 2112, /* TP SRAM content offset in eeprom */
+};
struct t3_mbuf_hdr {
struct mbuf *mh_head;
struct mbuf *mh_tail;
};
+#ifndef PANIC_IF
+#define PANIC_IF(exp) do { \
+ if (exp) \
+ panic("BUG: %s", #exp); \
+} while (0)
+#endif
+
#define m_get_priority(m) ((uintptr_t)(m)->m_pkthdr.rcvif)
#define m_set_priority(m, pri) ((m)->m_pkthdr.rcvif = (struct ifnet *)((uintptr_t)pri))
#define m_set_sgl(m, sgl) ((m)->m_pkthdr.header = (sgl))
@@ -127,9 +140,6 @@ void cxgb_log_tcb(struct adapter *sc, unsigned int tid);
#define TX_START_MIN_DESC (TX_MAX_DESC << 2)
-
-
-
#define TX_START_MAX_DESC (TX_MAX_DESC << 3) /* maximum number of descriptors
* call to start used per */
@@ -159,7 +169,7 @@ void prefetch(void *x)
extern void kdb_backtrace(void);
#define WARN_ON(condition) do { \
- if ((condition)!=0) { \
+ if (__predict_false((condition)!=0)) { \
log(LOG_WARNING, "BUG: warning at %s:%d/%s()\n", __FILE__, __LINE__, __FUNCTION__); \
kdb_backtrace(); \
} \
@@ -384,6 +394,9 @@ static const int debug_flags = DBG_RX;
#define ADVERTISE_1000XPSE_ASYM ANAR_X_PAUSE_ASYM
#define ADVERTISE_1000XPAUSE ANAR_X_PAUSE_SYM
+#define ADVERTISE_CSMA ANAR_CSMA
+#define ADVERTISE_NPAGE ANAR_NP
+
/* Standard PCI Extended Capaibilities definitions */
#define PCI_CAP_ID_VPD 0x03
diff --git a/sys/dev/cxgb/cxgb_sge.c b/sys/dev/cxgb/cxgb_sge.c
index 7f9c933854d05..50335aa17bb0f 100644
--- a/sys/dev/cxgb/cxgb_sge.c
+++ b/sys/dev/cxgb/cxgb_sge.c
@@ -394,12 +394,12 @@ t3_sge_prep(adapter_t *adap, struct sge_params *p)
struct qset_params *q = p->qset + i;
if (adap->params.nports > 2) {
- q->coalesce_nsecs = 50000;
+ q->coalesce_usecs = 50;
} else {
#ifdef INVARIANTS
- q->coalesce_nsecs = 10000;
+ q->coalesce_usecs = 10;
#else
- q->coalesce_nsecs = 5000;
+ q->coalesce_usecs = 5;
#endif
}
q->polling = adap->params.rev > 0;
@@ -490,7 +490,7 @@ void
t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
{
- qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U);
+ qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
qs->rspq.polling = 0 /* p->polling */;
}
@@ -1314,6 +1314,10 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
cntrl = V_TXPKT_INTF(pi->txpkt_intf);
GET_VTAG_MI(cntrl, batchmi);
cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
+ if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
+ cntrl |= F_TXPKT_IPCSUM_DIS;
+ if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
+ cntrl |= F_TXPKT_L4CSUM_DIS;
cbe->cntrl = htonl(cntrl);
cbe->len = htonl(batchmi->mi_len | 0x80000000);
cbe->addr = htobe64(segs[i].ds_addr);
@@ -1343,7 +1347,7 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
tmpmi = mv->mv_vec;
txd->flit[2] = 0;
- GET_VTAG_MI(cntrl, mi);
+ GET_VTAG(cntrl, m0);
cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
hdr->cntrl = htonl(cntrl);
mlen = m0->m_pkthdr.len;
@@ -1356,7 +1360,10 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
if (__predict_false(undersized)) {
pkthdr = tmp;
- dump_mi(mi);
+ if (mi)
+ dump_mi(mi);
+ printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
+ m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags);
panic("discontig packet - fixxorz");
} else
pkthdr = m0->m_data;
@@ -1376,12 +1383,39 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
V_LSO_IPHDR_WORDS(ip->ip_hl) |
V_LSO_TCPHDR_WORDS(tcp->th_off);
hdr->lso_info = htonl(tso_info);
+
+ if (__predict_false(mlen <= PIO_LEN)) {
+ /* pkt not undersized but fits in PIO_LEN
+ * Indicates a TSO bug at the higher levels.
+ */
+ DPRINTF("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
+ m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags);
+ txq_prod(txq, 1, &txqs);
+ m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
+ m_freem(m0);
+ m0 = NULL;
+ flits = (mlen + 7) / 8 + 3;
+ hdr->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
+ V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
+ F_WR_SOP | F_WR_EOP | txqs.compl);
+ wmb();
+ hdr->wr.wr_lo = htonl(V_WR_LEN(flits) |
+ V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
+
+ wr_gen2(txd, txqs.gen);
+ check_ring_tx_db(sc, txq);
+ return (0);
+ }
flits = 3;
} else {
struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
GET_VTAG(cntrl, m0);
cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
+ if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
+ cntrl |= F_TXPKT_IPCSUM_DIS;
+ if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
+ cntrl |= F_TXPKT_L4CSUM_DIS;
cpl->cntrl = htonl(cntrl);
mlen = m0->m_pkthdr.len;
cpl->len = htonl(mlen | 0x80000000);
@@ -3223,11 +3257,11 @@ t3_lro_enable(SYSCTL_HANDLER_ARGS)
}
static int
-t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS)
+t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
{
adapter_t *sc = arg1;
struct qset_params *qsp = &sc->params.sge.qset[0];
- int coalesce_nsecs;
+ int coalesce_usecs;
struct sge_qset *qs;
int i, j, err, nqsets = 0;
struct mtx *lock;
@@ -3235,25 +3269,25 @@ t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS)
if ((sc->flags & FULL_INIT_DONE) == 0)
return (ENXIO);
- coalesce_nsecs = qsp->coalesce_nsecs;
- err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req);
+ coalesce_usecs = qsp->coalesce_usecs;
+ err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
if (err != 0) {
return (err);
}
- if (coalesce_nsecs == qsp->coalesce_nsecs)
+ if (coalesce_usecs == qsp->coalesce_usecs)
return (0);
for (i = 0; i < sc->params.nports; i++)
for (j = 0; j < sc->port[i].nqsets; j++)
nqsets++;
- coalesce_nsecs = max(100, coalesce_nsecs);
+ coalesce_usecs = max(1, coalesce_usecs);
for (i = 0; i < nqsets; i++) {
qs = &sc->sge.qs[i];
qsp = &sc->params.sge.qset[i];
- qsp->coalesce_nsecs = coalesce_nsecs;
+ qsp->coalesce_usecs = coalesce_usecs;
lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
&sc->sge.qs[0].rspq.lock;
@@ -3356,8 +3390,8 @@ t3_add_configured_sysctls(adapter_t *sc)
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"intr_coal",
CTLTYPE_INT|CTLFLAG_RW, sc,
- 0, t3_set_coalesce_nsecs,
- "I", "interrupt coalescing timer (ns)");
+ 0, t3_set_coalesce_usecs,
+ "I", "interrupt coalescing timer (us)");
for (i = 0; i < sc->params.nports; i++) {
struct port_info *pi = &sc->port[i];
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c
new file mode 100644
index 0000000000000..b198904533465
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c
@@ -0,0 +1,294 @@
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/module.h>
+#include <sys/pciio.h>
+#include <sys/conf.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <sys/bus_dma.h>
+#include <sys/rman.h>
+#include <sys/ioccom.h>
+#include <sys/mbuf.h>
+#include <sys/rwlock.h>
+#include <sys/linker.h>
+#include <sys/firmware.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/proc.h>
+#include <sys/eventhandler.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+
+#include <netinet/in.h>
+
+#include <contrib/rdma/ib_verbs.h>
+
+
+#ifdef CONFIG_DEFINED
+#include <cxgb_include.h>
+#include <ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <ulp/iw_cxgb/iw_cxgb.h>
+#else
+#include <dev/cxgb/cxgb_include.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h>
+#endif
+
+/*
+ * XXX :-/
+ *
+ */
+
+#define idr_init(x)
+
+cxgb_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
+
+static void open_rnic_dev(struct t3cdev *);
+static void close_rnic_dev(struct t3cdev *);
+
+static TAILQ_HEAD( ,iwch_dev) dev_list;
+static struct mtx dev_mutex;
+static eventhandler_tag event_tag;
+
+static void
+rnic_init(struct iwch_dev *rnicp)
+{
+ CTR2(KTR_IW_CXGB, "%s iwch_dev %p", __FUNCTION__, rnicp);
+ idr_init(&rnicp->cqidr);
+ idr_init(&rnicp->qpidr);
+ idr_init(&rnicp->mmidr);
+ mtx_init(&rnicp->lock, "iwch rnic lock", NULL, MTX_DEF|MTX_DUPOK);
+
+ rnicp->attr.vendor_id = 0x168;
+ rnicp->attr.vendor_part_id = 7;
+ rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
+ rnicp->attr.max_wrs = (1UL << 24) - 1;
+ rnicp->attr.max_sge_per_wr = T3_MAX_SGE;
+ rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE;
+ rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1;
+ rnicp->attr.max_cqes_per_cq = (1UL << 24) - 1;
+ rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev);
+ rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
+ rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
+ rnicp->attr.mem_pgsizes_bitmask = 0x7FFF; /* 4KB-128MB */
+ rnicp->attr.can_resize_wq = 0;
+ rnicp->attr.max_rdma_reads_per_qp = 8;
+ rnicp->attr.max_rdma_read_resources =
+ rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps;
+ rnicp->attr.max_rdma_read_qp_depth = 8; /* IRD */
+ rnicp->attr.max_rdma_read_depth =
+ rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps;
+ rnicp->attr.rq_overflow_handled = 0;
+ rnicp->attr.can_modify_ird = 0;
+ rnicp->attr.can_modify_ord = 0;
+ rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1;
+ rnicp->attr.stag0_value = 1;
+ rnicp->attr.zbva_support = 1;
+ rnicp->attr.local_invalidate_fence = 1;
+ rnicp->attr.cq_overflow_detection = 1;
+ return;
+}
+
+static void
+open_rnic_dev(struct t3cdev *tdev)
+{
+ struct iwch_dev *rnicp;
+ static int vers_printed;
+
+ CTR2(KTR_IW_CXGB, "%s t3cdev %p", __FUNCTION__, tdev);
+ if (!vers_printed++)
+ printf("Chelsio T3 RDMA Driver - version %s\n",
+ DRV_VERSION);
+ rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
+ if (!rnicp) {
+ printf("Cannot allocate ib device\n");
+ return;
+ }
+ rnicp->rdev.ulp = rnicp;
+ rnicp->rdev.t3cdev_p = tdev;
+
+ mtx_lock(&dev_mutex);
+
+ if (cxio_rdev_open(&rnicp->rdev)) {
+ mtx_unlock(&dev_mutex);
+ printf("Unable to open CXIO rdev\n");
+ ib_dealloc_device(&rnicp->ibdev);
+ return;
+ }
+
+ rnic_init(rnicp);
+
+ TAILQ_INSERT_TAIL(&dev_list, rnicp, entry);
+ mtx_unlock(&dev_mutex);
+
+ if (iwch_register_device(rnicp)) {
+ printf("Unable to register device\n");
+ close_rnic_dev(tdev);
+ }
+#ifdef notyet
+ printf("Initialized device %s\n",
+ pci_name(rnicp->rdev.rnic_info.pdev));
+#endif
+ return;
+}
+
+static void
+close_rnic_dev(struct t3cdev *tdev)
+{
+ struct iwch_dev *dev, *tmp;
+ CTR2(KTR_IW_CXGB, "%s t3cdev %p", __FUNCTION__, tdev);
+ mtx_lock(&dev_mutex);
+
+ TAILQ_FOREACH_SAFE(dev, &dev_list, entry, tmp) {
+ if (dev->rdev.t3cdev_p == tdev) {
+#ifdef notyet
+ list_del(&dev->entry);
+ iwch_unregister_device(dev);
+ cxio_rdev_close(&dev->rdev);
+ idr_destroy(&dev->cqidr);
+ idr_destroy(&dev->qpidr);
+ idr_destroy(&dev->mmidr);
+ ib_dealloc_device(&dev->ibdev);
+#endif
+ break;
+ }
+ }
+ mtx_unlock(&dev_mutex);
+}
+
+static ifaddr_event_handler_t
+ifaddr_event_handler(void *arg, struct ifnet *ifp)
+{
+ printf("%s if name %s \n", __FUNCTION__, ifp->if_xname);
+ if (ifp->if_capabilities & IFCAP_TOE4) {
+ KASSERT(T3CDEV(ifp) != NULL, ("null t3cdev ptr!"));
+ if (cxio_hal_find_rdev_by_t3cdev(T3CDEV(ifp)) == NULL)
+ open_rnic_dev(T3CDEV(ifp));
+ }
+ return 0;
+}
+
+
+static int
+iwch_init_module(void)
+{
+ int err;
+ struct ifnet *ifp;
+
+ printf("%s enter\n", __FUNCTION__);
+ TAILQ_INIT(&dev_list);
+ mtx_init(&dev_mutex, "iwch dev_list lock", NULL, MTX_DEF);
+
+ err = cxio_hal_init();
+ if (err)
+ return err;
+ err = iwch_cm_init();
+ if (err)
+ return err;
+ cxio_register_ev_cb(iwch_ev_dispatch);
+
+ /* Register for ifaddr events to dynamically add TOE devs */
+ event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_event_handler,
+ NULL, EVENTHANDLER_PRI_ANY);
+
+ /* Register existing TOE interfaces by walking the ifnet chain */
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifp, &ifnet, if_link) {
+ (void)ifaddr_event_handler(NULL, ifp);
+ }
+ IFNET_RUNLOCK();
+ return 0;
+}
+
+static void
+iwch_exit_module(void)
+{
+ EVENTHANDLER_DEREGISTER(ifaddr_event, event_tag);
+ cxio_unregister_ev_cb(iwch_ev_dispatch);
+ iwch_cm_term();
+ cxio_hal_exit();
+}
+
+static int
+iwch_load(module_t mod, int cmd, void *arg)
+{
+ int err = 0;
+
+ switch (cmd) {
+ case MOD_LOAD:
+ printf("Loading iw_cxgb.\n");
+
+ iwch_init_module();
+ break;
+ case MOD_QUIESCE:
+ break;
+ case MOD_UNLOAD:
+ printf("Unloading iw_cxgb.\n");
+ iwch_exit_module();
+ break;
+ case MOD_SHUTDOWN:
+ break;
+ default:
+ err = EOPNOTSUPP;
+ break;
+ }
+
+ return (err);
+}
+
+static moduledata_t mod_data = {
+ "iw_cxgb",
+ iwch_load,
+ 0
+};
+
+MODULE_VERSION(iw_cxgb, 1);
+DECLARE_MODULE(iw_cxgb, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
+MODULE_DEPEND(iw_cxgb, rdma_core, 1, 1, 1);
+MODULE_DEPEND(iw_cxgb, if_cxgb, 1, 1, 1);
+MODULE_DEPEND(iw_cxgb, t3_tom, 1, 1, 1);
+
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h
new file mode 100644
index 0000000000000..f4b28566ebf5b
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h
@@ -0,0 +1,168 @@
+/**************************************************************************
+
+Copyright (c) 2007, 2008 Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+$FreeBSD$
+
+***************************************************************************/
+
+#ifndef __IWCH_H__
+#define __IWCH_H__
+
+struct iwch_pd;
+struct iwch_cq;
+struct iwch_qp;
+struct iwch_mr;
+
+
+struct iwch_rnic_attributes {
+ u32 vendor_id;
+ u32 vendor_part_id;
+ u32 max_qps;
+ u32 max_wrs; /* Max for any SQ/RQ */
+ u32 max_sge_per_wr;
+ u32 max_sge_per_rdma_write_wr; /* for RDMA Write WR */
+ u32 max_cqs;
+ u32 max_cqes_per_cq;
+ u32 max_mem_regs;
+ u32 max_phys_buf_entries; /* for phys buf list */
+ u32 max_pds;
+
+ /*
+ * The memory page sizes supported by this RNIC.
+ * Bit position i in bitmap indicates page of
+ * size (4k)^i. Phys block list mode unsupported.
+ */
+ u32 mem_pgsizes_bitmask;
+ u8 can_resize_wq;
+
+ /*
+ * The maximum number of RDMA Reads that can be outstanding
+ * per QP with this RNIC as the target.
+ */
+ u32 max_rdma_reads_per_qp;
+
+ /*
+ * The maximum number of resources used for RDMA Reads
+ * by this RNIC with this RNIC as the target.
+ */
+ u32 max_rdma_read_resources;
+
+ /*
+ * The max depth per QP for initiation of RDMA Read
+ * by this RNIC.
+ */
+ u32 max_rdma_read_qp_depth;
+
+ /*
+ * The maximum depth for initiation of RDMA Read
+ * operations by this RNIC on all QPs
+ */
+ u32 max_rdma_read_depth;
+ u8 rq_overflow_handled;
+ u32 can_modify_ird;
+ u32 can_modify_ord;
+ u32 max_mem_windows;
+ u32 stag0_value;
+ u8 zbva_support;
+ u8 local_invalidate_fence;
+ u32 cq_overflow_detection;
+};
+
+struct iwch_dev {
+ struct ib_device ibdev;
+ struct cxio_rdev rdev;
+ u32 device_cap_flags;
+ struct iwch_rnic_attributes attr;
+ struct kvl cqidr;
+ struct kvl qpidr;
+ struct kvl mmidr;
+ struct mtx lock;
+ TAILQ_ENTRY(iwch_dev) entry;
+};
+
+#ifndef container_of
+#define container_of(p, stype, field) ((stype *)(((uint8_t *)(p)) - offsetof(stype, field)))
+#endif
+
+static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct iwch_dev, ibdev);
+}
+
+static inline int t3b_device(const struct iwch_dev *rhp)
+{
+ return rhp->rdev.t3cdev_p->type == T3B;
+}
+
+static inline int t3a_device(const struct iwch_dev *rhp)
+{
+ return rhp->rdev.t3cdev_p->type == T3A;
+}
+
+static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid)
+{
+ return kvl_lookup(&rhp->cqidr, cqid);
+}
+
+static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid)
+{
+ return kvl_lookup(&rhp->qpidr, qpid);
+}
+
+static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid)
+{
+ return kvl_lookup(&rhp->mmidr, mmid);
+}
+
+static inline int insert_handle(struct iwch_dev *rhp, struct kvl *kvlp,
+ void *handle, u32 id)
+{
+ int ret;
+ u32 newid;
+
+ do {
+ mtx_lock(&rhp->lock);
+ ret = kvl_alloc_above(kvlp, handle, id, &newid);
+ WARN_ON(ret != 0);
+ WARN_ON(!ret && newid != id);
+ mtx_unlock(&rhp->lock);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
+static inline void remove_handle(struct iwch_dev *rhp, struct kvl *kvlp, u32 id)
+{
+ mtx_lock(&rhp->lock);
+ kvl_delete(kvlp, id);
+ mtx_unlock(&rhp->lock);
+}
+
+extern struct cxgb_client t3c_client;
+extern cxgb_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
+extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct mbuf *m);
+#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c
new file mode 100644
index 0000000000000..cec461147ccc7
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c
@@ -0,0 +1,1779 @@
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/module.h>
+#include <sys/pciio.h>
+#include <sys/conf.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <sys/bus_dma.h>
+#include <sys/rman.h>
+#include <sys/ioccom.h>
+#include <sys/mbuf.h>
+#include <sys/rwlock.h>
+#include <sys/linker.h>
+#include <sys/firmware.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockio.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+
+#include <net/route.h>
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcpip.h>
+
+#include <contrib/rdma/ib_verbs.h>
+
+
+#ifdef CONFIG_DEFINED
+#include <cxgb_include.h>
+#include <ulp/tom/cxgb_tom.h>
+#include <ulp/tom/cxgb_t3_ddp.h>
+#include <ulp/tom/cxgb_defs.h>
+#include <ulp/tom/cxgb_toepcb.h>
+#include <ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <ulp/iw_cxgb/iw_cxgb.h>
+#else
+#include <dev/cxgb/cxgb_include.h>
+#include <dev/cxgb/ulp/tom/cxgb_tom.h>
+#include <dev/ulp/tom/cxgb_t3_ddp.h>
+#include <dev/cxgb/ulp/tom/cxgb_defs.h>
+#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h>
+#endif
+
+#ifdef KTR
+static char *states[] = {
+ "idle",
+ "listen",
+ "connecting",
+ "mpa_wait_req",
+ "mpa_req_sent",
+ "mpa_req_rcvd",
+ "mpa_rep_sent",
+ "fpdu_mode",
+ "aborting",
+ "closing",
+ "moribund",
+ "dead",
+ NULL,
+};
+#endif
+
+SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters");
+
+static int ep_timeout_secs = 10;
+TUNABLE_INT("hw.iw_cxgb.ep_timeout_secs", &ep_timeout_secs);
+SYSCTL_UINT(_hw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RDTUN, &ep_timeout_secs, 0,
+ "CM Endpoint operation timeout in seconds (default=10)");
+
+static int mpa_rev = 1;
+TUNABLE_INT("hw.iw_cxgb.mpa_rev", &mpa_rev);
+SYSCTL_UINT(_hw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RDTUN, &mpa_rev, 0,
+ "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)");
+
+static int markers_enabled = 0;
+TUNABLE_INT("hw.iw_cxgb.markers_enabled", &markers_enabled);
+SYSCTL_UINT(_hw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RDTUN, &markers_enabled, 0,
+ "Enable MPA MARKERS (default(0)=disabled)");
+
+static int crc_enabled = 1;
+TUNABLE_INT("hw.iw_cxgb.crc_enabled", &crc_enabled);
+SYSCTL_UINT(_hw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RDTUN, &crc_enabled, 0,
+ "Enable MPA CRC (default(1)=enabled)");
+
+static int rcv_win = 256 * 1024;
+TUNABLE_INT("hw.iw_cxgb.rcv_win", &rcv_win);
+SYSCTL_UINT(_hw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RDTUN, &rcv_win, 0,
+ "TCP receive window in bytes (default=256KB)");
+
+static int snd_win = 32 * 1024;
+TUNABLE_INT("hw.iw_cxgb.snd_win", &snd_win);
+SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_win, CTLFLAG_RDTUN, &snd_win, 0,
+ "TCP send window in bytes (default=32KB)");
+
+static unsigned int nocong = 0;
+TUNABLE_INT("hw.iw_cxgb.nocong", &nocong);
+SYSCTL_UINT(_hw_cxgb, OID_AUTO, nocong, CTLFLAG_RDTUN, &nocong, 0,
+ "Turn off congestion control (default=0)");
+
+static unsigned int cong_flavor = 1;
+TUNABLE_INT("hw.iw_cxgb.cong_flavor", &cong_flavor);
+SYSCTL_UINT(_hw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RDTUN, &cong_flavor, 0,
+ "TCP Congestion control flavor (default=1)");
+
+static void ep_timeout(void *arg);
+static void connect_reply_upcall(struct iwch_ep *ep, int status);
+static void iwch_so_upcall(struct socket *so, void *arg, int waitflag);
+
+/*
+ * Cruft to offload socket upcalls onto thread.
+ */
+static struct mtx req_lock;
+static TAILQ_HEAD(iwch_ep_list, iwch_ep_common) req_list;
+static struct task iw_cxgb_task;
+static struct taskqueue *iw_cxgb_taskq;
+static void process_req(void *ctx, int pending);
+
+static void
+start_ep_timer(struct iwch_ep *ep)
+{
+ CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
+ if (callout_pending(&ep->timer)) {
+ CTR2(KTR_IW_CXGB, "%s stopped / restarted timer ep %p", __FUNCTION__, ep);
+ callout_deactivate(&ep->timer);
+ callout_drain(&ep->timer);
+ } else {
+ /*
+ * XXX this looks racy
+ */
+ get_ep(&ep->com);
+ callout_init(&ep->timer, TRUE);
+ }
+ callout_reset(&ep->timer, ep_timeout_secs * hz, ep_timeout, ep);
+}
+
+static void
+stop_ep_timer(struct iwch_ep *ep)
+{
+ CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
+ callout_drain(&ep->timer);
+ put_ep(&ep->com);
+}
+
+static int set_tcpinfo(struct iwch_ep *ep)
+{
+ struct tcp_info ti;
+ struct sockopt sopt;
+ int err;
+
+ sopt.sopt_dir = SOPT_GET;
+ sopt.sopt_level = IPPROTO_TCP;
+ sopt.sopt_name = TCP_INFO;
+ sopt.sopt_val = (caddr_t)&ti;
+ sopt.sopt_valsize = sizeof ti;
+ sopt.sopt_td = NULL;
+
+ err = sogetopt(ep->com.so, &sopt);
+ if (err) {
+ printf("%s can't get tcpinfo\n", __FUNCTION__);
+ return -err;
+ }
+ if (!(ti.tcpi_options & TCPI_OPT_TOE)) {
+ printf("%s connection NOT OFFLOADED!\n", __FUNCTION__);
+ return -EINVAL;
+ }
+
+ ep->snd_seq = ti.tcpi_snd_nxt;
+ ep->rcv_seq = ti.tcpi_rcv_nxt;
+ ep->emss = ti.__tcpi_snd_mss - sizeof(struct tcpiphdr);
+ ep->hwtid = TOEPCB(ep->com.so)->tp_tid; /* XXX */
+ if (ti.tcpi_options & TCPI_OPT_TIMESTAMPS)
+ ep->emss -= 12;
+ if (ep->emss < 128)
+ ep->emss = 128;
+ return 0;
+}
+
+static enum iwch_ep_state
+state_read(struct iwch_ep_common *epc)
+{
+ enum iwch_ep_state state;
+
+ mtx_lock(&epc->lock);
+ state = epc->state;
+ mtx_unlock(&epc->lock);
+ return state;
+}
+
+static void
+__state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
+{
+ epc->state = new;
+}
+
+static void
+state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
+{
+
+ mtx_lock(&epc->lock);
+ CTR3(KTR_IW_CXGB, "%s - %s -> %s", __FUNCTION__, states[epc->state], states[new]);
+ __state_set(epc, new);
+ mtx_unlock(&epc->lock);
+ return;
+}
+
+static void *
+alloc_ep(int size, int flags)
+{
+ struct iwch_ep_common *epc;
+
+ epc = malloc(size, M_DEVBUF, flags);
+ if (epc) {
+ memset(epc, 0, size);
+ refcount_init(&epc->refcount, 1);
+ mtx_init(&epc->lock, "iwch_epc lock", NULL, MTX_DEF|MTX_DUPOK);
+ cv_init(&epc->waitq, "iwch_epc cv");
+ }
+ CTR2(KTR_IW_CXGB, "%s alloc ep %p", __FUNCTION__, epc);
+ return epc;
+}
+
+void __free_ep(struct iwch_ep_common *epc)
+{
+ CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]);
+ KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so));
+ KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc));
+ free(epc, M_DEVBUF);
+}
+
+int
+iwch_quiesce_tid(struct iwch_ep *ep)
+{
+#ifdef notyet
+ struct cpl_set_tcb_field *req;
+ struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT);
+
+ if (m == NULL)
+ return (-ENOMEM);
+ req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
+ req->reply = 0;
+ req->cpu_idx = 0;
+ req->word = htons(W_TCB_RX_QUIESCE);
+ req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
+ req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
+
+ m_set_priority(m, CPL_PRIORITY_DATA);
+ cxgb_ofld_send(ep->com.tdev, m);
+#endif
+ return 0;
+}
+
+int
+iwch_resume_tid(struct iwch_ep *ep)
+{
+#ifdef notyet
+ struct cpl_set_tcb_field *req;
+ struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT);
+
+ if (m == NULL)
+ return (-ENOMEM);
+ req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req));
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
+ req->reply = 0;
+ req->cpu_idx = 0;
+ req->word = htons(W_TCB_RX_QUIESCE);
+ req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
+ req->val = 0;
+
+ m_set_priority(m, CPL_PRIORITY_DATA);
+ cxgb_ofld_send(ep->com.tdev, m);
+#endif
+ return 0;
+}
+
+static struct rtentry *
+find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
+ __be16 peer_port, u8 tos)
+{
+ struct route iproute;
+ struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst;
+
+ bzero(&iproute, sizeof iproute);
+ dst->sin_family = AF_INET;
+ dst->sin_len = sizeof *dst;
+ dst->sin_addr.s_addr = peer_ip;
+
+ rtalloc(&iproute);
+ return iproute.ro_rt;
+}
+
+static void
+close_socket(struct iwch_ep_common *epc)
+{
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
+ SOCK_LOCK(epc->so);
+ epc->so->so_upcall = NULL;
+ epc->so->so_upcallarg = NULL;
+ epc->so->so_rcv.sb_flags &= ~SB_UPCALL;
+ SOCK_UNLOCK(epc->so);
+ soshutdown(epc->so, SHUT_WR|SHUT_RD);
+ epc->so = NULL;
+}
+
+static void
+shutdown_socket(struct iwch_ep_common *epc)
+{
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
+ soshutdown(epc->so, SHUT_WR);
+}
+
+static void
+abort_socket(struct iwch_ep *ep)
+{
+ struct sockopt sopt;
+ int err;
+ struct linger l;
+
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
+ l.l_onoff = 1;
+ l.l_linger = 0;
+
+ /* linger_time of 0 forces RST to be sent */
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = SOL_SOCKET;
+ sopt.sopt_name = SO_LINGER;
+ sopt.sopt_val = (caddr_t)&l;
+ sopt.sopt_valsize = sizeof l;
+ sopt.sopt_td = NULL;
+ err = sosetopt(ep->com.so, &sopt);
+ if (err)
+ printf("%s can't set linger to 0, no RST! err %d\n", __FUNCTION__, err);
+}
+
+static void
+send_mpa_req(struct iwch_ep *ep)
+{
+ int mpalen;
+ struct mpa_message *mpa;
+ struct mbuf *m;
+ int err;
+
+ CTR3(KTR_IW_CXGB, "%s ep %p pd_len %d", __FUNCTION__, ep, ep->plen);
+
+ mpalen = sizeof(*mpa) + ep->plen;
+ m = m_gethdr(mpalen, M_NOWAIT);
+ if (m == NULL) {
+ connect_reply_upcall(ep, -ENOMEM);
+ return;
+ }
+ mpa = mtod(m, struct mpa_message *);
+ m->m_len = mpalen;
+ m->m_pkthdr.len = mpalen;
+ memset(mpa, 0, sizeof(*mpa));
+ memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
+ mpa->flags = (crc_enabled ? MPA_CRC : 0) |
+ (markers_enabled ? MPA_MARKERS : 0);
+ mpa->private_data_size = htons(ep->plen);
+ mpa->revision = mpa_rev;
+ if (ep->plen)
+ memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
+
+ err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
+ if (err) {
+ m_freem(m);
+ connect_reply_upcall(ep, -ENOMEM);
+ return;
+ }
+
+ start_ep_timer(ep);
+ state_set(&ep->com, MPA_REQ_SENT);
+ return;
+}
+
+static int
+send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
+{
+ int mpalen;
+ struct mpa_message *mpa;
+ struct mbuf *m;
+ int err;
+
+ CTR3(KTR_IW_CXGB, "%s ep %p plen %d", __FUNCTION__, ep, plen);
+
+ mpalen = sizeof(*mpa) + plen;
+
+ m = m_gethdr(mpalen, M_NOWAIT);
+ if (m == NULL) {
+ printf("%s - cannot alloc mbuf!\n", __FUNCTION__);
+ return (-ENOMEM);
+ }
+ mpa = mtod(m, struct mpa_message *);
+ m->m_len = mpalen;
+ m->m_pkthdr.len = mpalen;
+ memset(mpa, 0, sizeof(*mpa));
+ memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
+ mpa->flags = MPA_REJECT;
+ mpa->revision = mpa_rev;
+ mpa->private_data_size = htons(plen);
+ if (plen)
+ memcpy(mpa->private_data, pdata, plen);
+ err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
+ PANIC_IF(err);
+ return 0;
+}
+
+static int
+send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
+{
+ int mpalen;
+ struct mpa_message *mpa;
+ struct mbuf *m;
+
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p plen %d", __FUNCTION__, ep, ep->com.so, plen);
+
+ mpalen = sizeof(*mpa) + plen;
+
+ m = m_gethdr(mpalen, M_NOWAIT);
+ if (m == NULL) {
+ printf("%s - cannot alloc mbuf!\n", __FUNCTION__);
+ return (-ENOMEM);
+ }
+ mpa = mtod(m, struct mpa_message *);
+ m->m_len = mpalen;
+ m->m_pkthdr.len = mpalen;
+ memset(mpa, 0, sizeof(*mpa));
+ memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
+ mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
+ (markers_enabled ? MPA_MARKERS : 0);
+ mpa->revision = mpa_rev;
+ mpa->private_data_size = htons(plen);
+ if (plen)
+ memcpy(mpa->private_data, pdata, plen);
+
+ state_set(&ep->com, MPA_REP_SENT);
+ return sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
+ ep->com.thread);
+}
+
+static void
+close_complete_upcall(struct iwch_ep *ep)
+{
+ struct iw_cm_event event;
+
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CLOSE;
+ if (ep->com.cm_id) {
+ CTR3(KTR_IW_CXGB, "close complete delivered ep %p cm_id %p tid %d",
+ ep, ep->com.cm_id, ep->hwtid);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ ep->com.cm_id->rem_ref(ep->com.cm_id);
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ }
+}
+
+static void
+abort_connection(struct iwch_ep *ep)
+{
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
+ state_set(&ep->com, ABORTING);
+ abort_socket(ep);
+ close_socket(&ep->com);
+ close_complete_upcall(ep);
+ state_set(&ep->com, DEAD);
+ put_ep(&ep->com);
+}
+
+static void
+peer_close_upcall(struct iwch_ep *ep)
+{
+ struct iw_cm_event event;
+
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_DISCONNECT;
+ if (ep->com.cm_id) {
+ CTR3(KTR_IW_CXGB, "peer close delivered ep %p cm_id %p tid %d",
+ ep, ep->com.cm_id, ep->hwtid);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ }
+}
+
+static void
+peer_abort_upcall(struct iwch_ep *ep)
+{
+ struct iw_cm_event event;
+
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CLOSE;
+ event.status = ECONNRESET;
+ if (ep->com.cm_id) {
+ CTR3(KTR_IW_CXGB, "abort delivered ep %p cm_id %p tid %d", ep,
+ ep->com.cm_id, ep->hwtid);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ ep->com.cm_id->rem_ref(ep->com.cm_id);
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ }
+}
+
+static void
+connect_reply_upcall(struct iwch_ep *ep, int status)
+{
+ struct iw_cm_event event;
+
+ CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], status);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CONNECT_REPLY;
+ event.status = status;
+ event.local_addr = ep->com.local_addr;
+ event.remote_addr = ep->com.remote_addr;
+
+ if ((status == 0) || (status == ECONNREFUSED)) {
+ event.private_data_len = ep->plen;
+ event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
+ }
+ if (ep->com.cm_id) {
+ CTR4(KTR_IW_CXGB, "%s ep %p tid %d status %d", __FUNCTION__, ep,
+ ep->hwtid, status);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ }
+ if (status < 0) {
+ ep->com.cm_id->rem_ref(ep->com.cm_id);
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ }
+}
+
+static void
+connect_request_upcall(struct iwch_ep *ep)
+{
+ struct iw_cm_event event;
+
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CONNECT_REQUEST;
+ event.local_addr = ep->com.local_addr;
+ event.remote_addr = ep->com.remote_addr;
+ event.private_data_len = ep->plen;
+ event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
+ event.provider_data = ep;
+ event.so = ep->com.so;
+ if (state_read(&ep->parent_ep->com) != DEAD)
+ ep->parent_ep->com.cm_id->event_handler(
+ ep->parent_ep->com.cm_id,
+ &event);
+ put_ep(&ep->parent_ep->com);
+ ep->parent_ep = NULL;
+}
+
+static void
+established_upcall(struct iwch_ep *ep)
+{
+ struct iw_cm_event event;
+
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_ESTABLISHED;
+ if (ep->com.cm_id) {
+ CTR3(KTR_IW_CXGB, "%s ep %p tid %d", __FUNCTION__, ep, ep->hwtid);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ }
+}
+
+static void
+process_mpa_reply(struct iwch_ep *ep)
+{
+ struct mpa_message *mpa;
+ u16 plen;
+ struct iwch_qp_attributes attrs;
+ enum iwch_qp_attr_mask mask;
+ int err;
+ struct mbuf *top, *m;
+ int flags = MSG_DONTWAIT;
+ struct uio uio;
+ int len;
+
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
+
+ /*
+ * Stop mpa timer. If it expired, then the state has
+ * changed and we bail since ep_timeout already aborted
+ * the connection.
+ */
+ stop_ep_timer(ep);
+ if (state_read(&ep->com) != MPA_REQ_SENT)
+ return;
+
+ uio.uio_resid = len = 1000000;
+ uio.uio_td = ep->com.thread;
+ err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
+ if (err) {
+ if (err == EWOULDBLOCK) {
+ start_ep_timer(ep);
+ return;
+ }
+ err = -err;
+ goto err;
+ }
+
+ if (ep->com.so->so_rcv.sb_mb) {
+ printf("%s data after soreceive called! so %p sb_mb %p top %p\n",
+ __FUNCTION__, ep->com.so, ep->com.so->so_rcv.sb_mb, top);
+ }
+
+ m = top;
+ do {
+ /*
+ * If we get more than the supported amount of private data
+ * then we must fail this connection.
+ */
+ if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
+ err = (-EINVAL);
+ goto err;
+ }
+
+ /*
+ * copy the new data into our accumulation buffer.
+ */
+ m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
+ ep->mpa_pkt_len += m->m_len;
+ if (!m->m_next)
+ m = m->m_nextpkt;
+ else
+ m = m->m_next;
+ } while (m);
+
+ m_freem(top);
+
+ /*
+ * if we don't even have the mpa message, then bail.
+ */
+ if (ep->mpa_pkt_len < sizeof(*mpa))
+ return;
+ mpa = (struct mpa_message *)ep->mpa_pkt;
+
+ /* Validate MPA header. */
+ if (mpa->revision != mpa_rev) {
+ CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision);
+ err = EPROTO;
+ goto err;
+ }
+ if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
+ CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key);
+ err = EPROTO;
+ goto err;
+ }
+
+ plen = ntohs(mpa->private_data_size);
+
+ /*
+ * Fail if there's too much private data.
+ */
+ if (plen > MPA_MAX_PRIVATE_DATA) {
+ CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen);
+ err = EPROTO;
+ goto err;
+ }
+
+ /*
+ * If plen does not account for pkt size
+ */
+ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
+ CTR2(KTR_IW_CXGB, "%s pkt too big %d", __FUNCTION__, ep->mpa_pkt_len);
+ err = EPROTO;
+ goto err;
+ }
+
+ ep->plen = (u8) plen;
+
+ /*
+ * If we don't have all the pdata yet, then bail.
+ * We'll continue process when more data arrives.
+ */
+ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
+ return;
+
+ if (mpa->flags & MPA_REJECT) {
+ err = ECONNREFUSED;
+ goto err;
+ }
+
+ /*
+ * If we get here we have accumulated the entire mpa
+ * start reply message including private data. And
+ * the MPA header is valid.
+ */
+ CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__);
+ state_set(&ep->com, FPDU_MODE);
+ ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
+ ep->mpa_attr.recv_marker_enabled = markers_enabled;
+ ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
+ ep->mpa_attr.version = mpa_rev;
+ if (set_tcpinfo(ep)) {
+ printf("%s set_tcpinfo error\n", __FUNCTION__);
+ goto err;
+ }
+ CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, "
+ "xmit_marker_enabled=%d, version=%d", __FUNCTION__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+
+ attrs.mpa_attr = ep->mpa_attr;
+ attrs.max_ird = ep->ird;
+ attrs.max_ord = ep->ord;
+ attrs.llp_stream_handle = ep;
+ attrs.next_state = IWCH_QP_STATE_RTS;
+
+ mask = IWCH_QP_ATTR_NEXT_STATE |
+ IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
+ IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
+
+ /* bind QP and TID with INIT_WR */
+ err = iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, mask, &attrs, 1);
+ if (!err)
+ goto out;
+err:
+ abort_connection(ep);
+out:
+ connect_reply_upcall(ep, err);
+ return;
+}
+
+static void
+process_mpa_request(struct iwch_ep *ep)
+{
+ struct mpa_message *mpa;
+ u16 plen;
+ int flags = MSG_DONTWAIT;
+ struct mbuf *top, *m;
+ int err;
+ struct uio uio;
+ int len;
+
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
+
+ /*
+ * Stop mpa timer. If it expired, then the state has
+ * changed and we bail since ep_timeout already aborted
+ * the connection.
+ */
+ stop_ep_timer(ep);
+ if (state_read(&ep->com) != MPA_REQ_WAIT)
+ return;
+
+ uio.uio_resid = len = 1000000;
+ uio.uio_td = ep->com.thread;
+ err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
+ if (err) {
+ if (err == EWOULDBLOCK) {
+ start_ep_timer(ep);
+ return;
+ }
+ err = -err;
+ goto err;
+ }
+
+ m = top;
+ do {
+
+ /*
+ * If we get more than the supported amount of private data
+ * then we must fail this connection.
+ */
+ if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
+ CTR2(KTR_IW_CXGB, "%s mpa message too big %d", __FUNCTION__,
+ ep->mpa_pkt_len + m->m_len);
+ goto err;
+ }
+
+
+ /*
+ * Copy the new data into our accumulation buffer.
+ */
+ m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
+ ep->mpa_pkt_len += m->m_len;
+
+ if (!m->m_next)
+ m = m->m_nextpkt;
+ else
+ m = m->m_next;
+ } while (m);
+
+ m_freem(top);
+
+ /*
+ * If we don't even have the mpa message, then bail.
+ * We'll continue process when more data arrives.
+ */
+ if (ep->mpa_pkt_len < sizeof(*mpa)) {
+ start_ep_timer(ep);
+ CTR2(KTR_IW_CXGB, "%s not enough header %d...waiting...", __FUNCTION__,
+ ep->mpa_pkt_len);
+ return;
+ }
+ mpa = (struct mpa_message *) ep->mpa_pkt;
+
+ /*
+ * Validate MPA Header.
+ */
+ if (mpa->revision != mpa_rev) {
+ CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision);
+ goto err;
+ }
+
+ if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
+ CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key);
+ goto err;
+ }
+
+ plen = ntohs(mpa->private_data_size);
+
+ /*
+ * Fail if there's too much private data.
+ */
+ if (plen > MPA_MAX_PRIVATE_DATA) {
+ CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen);
+ goto err;
+ }
+
+ /*
+ * If plen does not account for pkt size
+ */
+ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
+ CTR2(KTR_IW_CXGB, "%s more data after private data %d", __FUNCTION__,
+ ep->mpa_pkt_len);
+ goto err;
+ }
+ ep->plen = (u8) plen;
+
+ /*
+ * If we don't have all the pdata yet, then bail.
+ */
+ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) {
+ start_ep_timer(ep);
+ CTR2(KTR_IW_CXGB, "%s more mpa msg to come %d", __FUNCTION__,
+ ep->mpa_pkt_len);
+ return;
+ }
+
+ /*
+ * If we get here we have accumulated the entire mpa
+ * start reply message including private data.
+ */
+ ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
+ ep->mpa_attr.recv_marker_enabled = markers_enabled;
+ ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
+ ep->mpa_attr.version = mpa_rev;
+ if (set_tcpinfo(ep)) {
+ printf("%s set_tcpinfo error\n", __FUNCTION__);
+ goto err;
+ }
+ CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, "
+ "xmit_marker_enabled=%d, version=%d", __FUNCTION__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+
+ state_set(&ep->com, MPA_REQ_RCVD);
+
+ /* drive upcall */
+ connect_request_upcall(ep);
+ return;
+err:
+ abort_connection(ep);
+ return;
+}
+
+static void
+process_peer_close(struct iwch_ep *ep)
+{
+ struct iwch_qp_attributes attrs;
+ int disconnect = 1;
+ int release = 0;
+
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
+
+ mtx_lock(&ep->com.lock);
+ switch (ep->com.state) {
+ case MPA_REQ_WAIT:
+ __state_set(&ep->com, CLOSING);
+ break;
+ case MPA_REQ_SENT:
+ __state_set(&ep->com, CLOSING);
+ connect_reply_upcall(ep, -ECONNRESET);
+ break;
+ case MPA_REQ_RCVD:
+
+ /*
+ * We're gonna mark this puppy DEAD, but keep
+ * the reference on it until the ULP accepts or
+ * rejects the CR.
+ */
+ __state_set(&ep->com, CLOSING);
+ get_ep(&ep->com);
+ break;
+ case MPA_REP_SENT:
+ __state_set(&ep->com, CLOSING);
+ break;
+ case FPDU_MODE:
+ start_ep_timer(ep);
+ __state_set(&ep->com, CLOSING);
+ attrs.next_state = IWCH_QP_STATE_CLOSING;
+ iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
+ peer_close_upcall(ep);
+ break;
+ case ABORTING:
+ disconnect = 0;
+ break;
+ case CLOSING:
+ __state_set(&ep->com, MORIBUND);
+ disconnect = 0;
+ break;
+ case MORIBUND:
+ stop_ep_timer(ep);
+ if (ep->com.cm_id && ep->com.qp) {
+ attrs.next_state = IWCH_QP_STATE_IDLE;
+ iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
+ }
+ close_socket(&ep->com);
+ close_complete_upcall(ep);
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ disconnect = 0;
+ break;
+ case DEAD:
+ disconnect = 0;
+ break;
+ default:
+ PANIC_IF(1);
+ }
+ mtx_unlock(&ep->com.lock);
+ if (disconnect)
+ iwch_ep_disconnect(ep, 0, M_NOWAIT);
+ if (release)
+ put_ep(&ep->com);
+ return;
+}
+
+static void
+process_conn_error(struct iwch_ep *ep)
+{
+ struct iwch_qp_attributes attrs;
+ int ret;
+ int state;
+
+ state = state_read(&ep->com);
+ CTR5(KTR_IW_CXGB, "%s ep %p so %p so->so_error %u state %s", __FUNCTION__, ep, ep->com.so, ep->com.so->so_error, states[ep->com.state]);
+ switch (state) {
+ case MPA_REQ_WAIT:
+ stop_ep_timer(ep);
+ break;
+ case MPA_REQ_SENT:
+ stop_ep_timer(ep);
+ connect_reply_upcall(ep, -ECONNRESET);
+ break;
+ case MPA_REP_SENT:
+ ep->com.rpl_err = ECONNRESET;
+ CTR1(KTR_IW_CXGB, "waking up ep %p", ep);
+ break;
+ case MPA_REQ_RCVD:
+
+ /*
+ * We're gonna mark this puppy DEAD, but keep
+ * the reference on it until the ULP accepts or
+ * rejects the CR.
+ */
+ get_ep(&ep->com);
+ break;
+ case MORIBUND:
+ case CLOSING:
+ stop_ep_timer(ep);
+ /*FALLTHROUGH*/
+ case FPDU_MODE:
+ if (ep->com.cm_id && ep->com.qp) {
+ attrs.next_state = IWCH_QP_STATE_ERROR;
+ ret = iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ if (ret)
+ log(LOG_ERR,
+ "%s - qp <- error failed!\n",
+ __FUNCTION__);
+ }
+ peer_abort_upcall(ep);
+ break;
+ case ABORTING:
+ break;
+ case DEAD:
+ CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__,
+ ep->com.so->so_error);
+ return;
+ default:
+ PANIC_IF(1);
+ break;
+ }
+
+ if (state != ABORTING) {
+ close_socket(&ep->com);
+ state_set(&ep->com, DEAD);
+ put_ep(&ep->com);
+ }
+ return;
+}
+
+static void
+process_close_complete(struct iwch_ep *ep)
+{
+ struct iwch_qp_attributes attrs;
+ int release = 0;
+
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
+ PANIC_IF(!ep);
+
+ /* The cm_id may be null if we failed to connect */
+ mtx_lock(&ep->com.lock);
+ switch (ep->com.state) {
+ case CLOSING:
+ __state_set(&ep->com, MORIBUND);
+ break;
+ case MORIBUND:
+ stop_ep_timer(ep);
+ if ((ep->com.cm_id) && (ep->com.qp)) {
+ attrs.next_state = IWCH_QP_STATE_IDLE;
+ iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp,
+ IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ }
+ close_socket(&ep->com);
+ close_complete_upcall(ep);
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ break;
+ case ABORTING:
+ break;
+ case DEAD:
+ default:
+ PANIC_IF(1);
+ break;
+ }
+ mtx_unlock(&ep->com.lock);
+ if (release)
+ put_ep(&ep->com);
+ return;
+}
+
+/*
+ * T3A does 3 things when a TERM is received:
+ * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
+ * 2) generate an async event on the QP with the TERMINATE opcode
+ * 3) post a TERMINATE opcde cqe into the associated CQ.
+ *
+ * For (1), we save the message in the qp for later consumer consumption.
+ * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
+ * For (3), we toss the CQE in cxio_poll_cq().
+ *
+ * terminate() handles case (1)...
+ */
+static int
+terminate(struct t3cdev *tdev, struct mbuf *m, void *ctx)
+{
+ struct toepcb *toep = (struct toepcb *)ctx;
+ struct socket *so = toeptoso(toep);
+ struct iwch_ep *ep = so->so_upcallarg;
+
+ CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
+ m_adj(m, sizeof(struct cpl_rdma_terminate));
+ CTR2(KTR_IW_CXGB, "%s saving %d bytes of term msg", __FUNCTION__, m->m_len);
+ m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer);
+ ep->com.qp->attr.terminate_msg_len = m->m_len;
+ ep->com.qp->attr.is_terminate_local = 0;
+ return CPL_RET_BUF_DONE;
+}
+
+static int
+ec_status(struct t3cdev *tdev, struct mbuf *m, void *ctx)
+{
+ struct toepcb *toep = (struct toepcb *)ctx;
+ struct socket *so = toeptoso(toep);
+ struct cpl_rdma_ec_status *rep = cplhdr(m);
+ struct iwch_ep *ep;
+ struct iwch_qp_attributes attrs;
+ int release = 0;
+
+ ep = so->so_upcallarg;
+ CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s ec_status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], rep->status);
+ if (!so || !ep) {
+ panic("bogosity ep %p state %d, so %p state %x\n", ep, ep ? ep->com.state : -1, so, so ? so->so_state : -1);
+ }
+ mtx_lock(&ep->com.lock);
+ switch (ep->com.state) {
+ case CLOSING:
+ if (!rep->status)
+ __state_set(&ep->com, MORIBUND);
+ else
+ __state_set(&ep->com, ABORTING);
+ break;
+ case MORIBUND:
+ stop_ep_timer(ep);
+ if (!rep->status) {
+ if ((ep->com.cm_id) && (ep->com.qp)) {
+ attrs.next_state = IWCH_QP_STATE_IDLE;
+ iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp,
+ IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ }
+ close_socket(&ep->com);
+ close_complete_upcall(ep);
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ }
+ break;
+ case DEAD:
+ break;
+ default:
+ panic("unknown state: %d\n", ep->com.state);
+ }
+ mtx_unlock(&ep->com.lock);
+ if (rep->status) {
+ log(LOG_ERR, "%s BAD CLOSE - Aborting tid %u\n",
+ __FUNCTION__, ep->hwtid);
+ attrs.next_state = IWCH_QP_STATE_ERROR;
+ iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ }
+ if (release)
+ put_ep(&ep->com);
+ return CPL_RET_BUF_DONE;
+}
+
+static void
+ep_timeout(void *arg)
+{
+ struct iwch_ep *ep = (struct iwch_ep *)arg;
+ struct iwch_qp_attributes attrs;
+ int err = 0;
+
+ mtx_lock(&ep->com.lock);
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
+ switch (ep->com.state) {
+ case MPA_REQ_SENT:
+ connect_reply_upcall(ep, -ETIMEDOUT);
+ break;
+ case MPA_REQ_WAIT:
+ break;
+ case CLOSING:
+ case MORIBUND:
+ if (ep->com.cm_id && ep->com.qp)
+ err = 1;
+ break;
+ default:
+ panic("unknown state: %d\n", ep->com.state);
+ }
+ __state_set(&ep->com, ABORTING);
+ mtx_unlock(&ep->com.lock);
+ if (err){
+ attrs.next_state = IWCH_QP_STATE_ERROR;
+ iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ }
+ abort_connection(ep);
+ put_ep(&ep->com);
+}
+
+int
+iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+ int err;
+ struct iwch_ep *ep = to_ep(cm_id);
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
+
+ if (state_read(&ep->com) == DEAD) {
+ put_ep(&ep->com);
+ return (-ECONNRESET);
+ }
+ PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
+ if (mpa_rev == 0) {
+ abort_connection(ep);
+ } else {
+ err = send_mpa_reject(ep, pdata, pdata_len);
+ err = soshutdown(ep->com.so, 3);
+ }
+ return 0;
+}
+
+int
+iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ int err;
+ struct iwch_qp_attributes attrs;
+ enum iwch_qp_attr_mask mask;
+ struct iwch_ep *ep = to_ep(cm_id);
+ struct iwch_dev *h = to_iwch_dev(cm_id->device);
+ struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
+
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
+ if (state_read(&ep->com) == DEAD)
+ return (-ECONNRESET);
+
+ PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
+ PANIC_IF(!qp);
+
+ if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
+ (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
+ abort_connection(ep);
+ return (-EINVAL);
+ }
+
+ cm_id->add_ref(cm_id);
+ ep->com.cm_id = cm_id;
+ ep->com.qp = qp;
+
+ ep->com.rpl_err = 0;
+ ep->com.rpl_done = 0;
+ ep->ird = conn_param->ird;
+ ep->ord = conn_param->ord;
+ CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord);
+ get_ep(&ep->com);
+
+ /* bind QP to EP and move to RTS */
+ attrs.mpa_attr = ep->mpa_attr;
+ attrs.max_ird = ep->ord;
+ attrs.max_ord = ep->ord;
+ attrs.llp_stream_handle = ep;
+ attrs.next_state = IWCH_QP_STATE_RTS;
+
+ /* bind QP and TID with INIT_WR */
+ mask = IWCH_QP_ATTR_NEXT_STATE |
+ IWCH_QP_ATTR_LLP_STREAM_HANDLE |
+ IWCH_QP_ATTR_MPA_ATTR |
+ IWCH_QP_ATTR_MAX_IRD |
+ IWCH_QP_ATTR_MAX_ORD;
+
+ err = iwch_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, mask, &attrs, 1);
+
+ if (err)
+ goto err;
+
+ err = send_mpa_reply(ep, conn_param->private_data,
+ conn_param->private_data_len);
+ if (err)
+ goto err;
+ state_set(&ep->com, FPDU_MODE);
+ established_upcall(ep);
+ put_ep(&ep->com);
+ return 0;
+err:
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ cm_id->rem_ref(cm_id);
+ put_ep(&ep->com);
+ return err;
+}
+
+static int init_sock(struct iwch_ep_common *epc)
+{
+ int err;
+ struct sockopt sopt;
+ int on=1;
+
+ epc->so->so_upcall = iwch_so_upcall;
+ epc->so->so_upcallarg = epc;
+ epc->so->so_rcv.sb_flags |= SB_UPCALL;
+ epc->so->so_state |= SS_NBIO;
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = SOL_SOCKET;
+ sopt.sopt_name = SO_NO_DDP;
+ sopt.sopt_val = (caddr_t)&on;
+ sopt.sopt_valsize = sizeof on;
+ sopt.sopt_td = NULL;
+ err = sosetopt(epc->so, &sopt);
+ if (err)
+ printf("%s can't set SO_NO_DDP err %d\n", __FUNCTION__, err);
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_TCP;
+ sopt.sopt_name = TCP_NODELAY;
+ sopt.sopt_val = (caddr_t)&on;
+ sopt.sopt_valsize = sizeof on;
+ sopt.sopt_td = NULL;
+ err = sosetopt(epc->so, &sopt);
+ if (err)
+ printf("%s can't set TCP_NODELAY err %d\n", __FUNCTION__, err);
+
+ return 0;
+}
+
+static int
+is_loopback_dst(struct iw_cm_id *cm_id)
+{
+ uint16_t port = cm_id->remote_addr.sin_port;
+ struct ifaddr *ifa;
+
+ cm_id->remote_addr.sin_port = 0;
+ ifa = ifa_ifwithaddr((struct sockaddr *)&cm_id->remote_addr);
+ cm_id->remote_addr.sin_port = port;
+ return (ifa != NULL);
+}
+
+int
+iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ int err = 0;
+ struct iwch_dev *h = to_iwch_dev(cm_id->device);
+ struct iwch_ep *ep;
+ struct rtentry *rt;
+ struct toedev *tdev;
+
+ if (is_loopback_dst(cm_id)) {
+ err = -ENOSYS;
+ goto out;
+ }
+
+ ep = alloc_ep(sizeof(*ep), M_NOWAIT);
+ if (!ep) {
+ printf("%s - cannot alloc ep.\n", __FUNCTION__);
+ err = (-ENOMEM);
+ goto out;
+ }
+ callout_init(&ep->timer, TRUE);
+ ep->plen = conn_param->private_data_len;
+ if (ep->plen)
+ memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
+ conn_param->private_data, ep->plen);
+ ep->ird = conn_param->ird;
+ ep->ord = conn_param->ord;
+
+ cm_id->add_ref(cm_id);
+ ep->com.cm_id = cm_id;
+ ep->com.qp = get_qhp(h, conn_param->qpn);
+ ep->com.thread = curthread;
+ PANIC_IF(!ep->com.qp);
+ CTR4(KTR_IW_CXGB, "%s qpn 0x%x qp %p cm_id %p", __FUNCTION__, conn_param->qpn,
+ ep->com.qp, cm_id);
+
+ ep->com.so = cm_id->so;
+ err = init_sock(&ep->com);
+ if (err)
+ goto fail2;
+
+ /* find a route */
+ rt = find_route(cm_id->local_addr.sin_addr.s_addr,
+ cm_id->remote_addr.sin_addr.s_addr,
+ cm_id->local_addr.sin_port,
+ cm_id->remote_addr.sin_port, IPTOS_LOWDELAY);
+ if (!rt) {
+ printf("%s - cannot find route.\n", __FUNCTION__);
+ err = EHOSTUNREACH;
+ goto fail2;
+ }
+
+ if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) {
+ printf("%s - interface not TOE capable.\n", __FUNCTION__);
+ goto fail3;
+ }
+ tdev = TOEDEV(rt->rt_ifp);
+ if (tdev == NULL) {
+ printf("%s - No toedev for interface.\n", __FUNCTION__);
+ goto fail3;
+ }
+ if (!tdev->tod_can_offload(tdev, ep->com.so)) {
+ printf("%s - interface cannot offload!.\n", __FUNCTION__);
+ goto fail3;
+ }
+ RTFREE(rt);
+
+ state_set(&ep->com, CONNECTING);
+ ep->com.local_addr = cm_id->local_addr;
+ ep->com.remote_addr = cm_id->remote_addr;
+ err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr,
+ ep->com.thread);
+ if (!err)
+ goto out;
+fail3:
+ RTFREE(ep->dst);
+fail2:
+ put_ep(&ep->com);
+out:
+ return err;
+}
+
+int
+iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+ int err = 0;
+ struct iwch_listen_ep *ep;
+
+ ep = alloc_ep(sizeof(*ep), M_NOWAIT);
+ if (!ep) {
+ printf("%s - cannot alloc ep.\n", __FUNCTION__);
+ err = ENOMEM;
+ goto out;
+ }
+ CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
+ cm_id->add_ref(cm_id);
+ ep->com.cm_id = cm_id;
+ ep->backlog = backlog;
+ ep->com.local_addr = cm_id->local_addr;
+ ep->com.thread = curthread;
+ state_set(&ep->com, LISTEN);
+
+ ep->com.so = cm_id->so;
+ err = init_sock(&ep->com);
+ if (err)
+ goto fail;
+
+ err = solisten(ep->com.so, ep->backlog, ep->com.thread);
+ if (!err) {
+ cm_id->provider_data = ep;
+ goto out;
+ }
+ close_socket(&ep->com);
+fail:
+ cm_id->rem_ref(cm_id);
+ put_ep(&ep->com);
+out:
+ return err;
+}
+
+int
+iwch_destroy_listen(struct iw_cm_id *cm_id)
+{
+ struct iwch_listen_ep *ep = to_listen_ep(cm_id);
+
+ CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
+
+ state_set(&ep->com, DEAD);
+ close_socket(&ep->com);
+ cm_id->rem_ref(cm_id);
+ put_ep(&ep->com);
+ return 0;
+}
+
+int
+iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags)
+{
+ int close = 0;
+
+ mtx_lock(&ep->com.lock);
+
+ PANIC_IF(!ep);
+ PANIC_IF(!ep->com.so);
+
+ CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep,
+ ep->com.so, states[ep->com.state], abrupt);
+
+ if (ep->com.state == DEAD) {
+ CTR2(KTR_IW_CXGB, "%s already dead ep %p", __FUNCTION__, ep);
+ goto out;
+ }
+
+ if (abrupt) {
+ if (ep->com.state != ABORTING) {
+ ep->com.state = ABORTING;
+ close = 1;
+ }
+ goto out;
+ }
+
+ switch (ep->com.state) {
+ case MPA_REQ_WAIT:
+ case MPA_REQ_SENT:
+ case MPA_REQ_RCVD:
+ case MPA_REP_SENT:
+ case FPDU_MODE:
+ start_ep_timer(ep);
+ ep->com.state = CLOSING;
+ close = 1;
+ break;
+ case CLOSING:
+ ep->com.state = MORIBUND;
+ close = 1;
+ break;
+ case MORIBUND:
+ case ABORTING:
+ break;
+ default:
+ panic("unknown state: %d\n", ep->com.state);
+ break;
+ }
+out:
+ mtx_unlock(&ep->com.lock);
+ if (close) {
+ if (abrupt)
+ abort_connection(ep);
+ else
+ shutdown_socket(&ep->com);
+ }
+ return 0;
+}
+
+static void
+process_data(struct iwch_ep *ep)
+{
+ struct sockaddr_in *local, *remote;
+
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
+
+ switch (state_read(&ep->com)) {
+ case MPA_REQ_SENT:
+ process_mpa_reply(ep);
+ break;
+ case MPA_REQ_WAIT:
+
+ /*
+ * XXX
+ * Set local and remote addrs here because when we
+ * dequeue the newly accepted socket, they aren't set
+ * yet in the pcb!
+ */
+ in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
+ in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
+ CTR3(KTR_IW_CXGB, "%s local %s remote %s", __FUNCTION__,
+ inet_ntoa(local->sin_addr),
+ inet_ntoa(remote->sin_addr));
+ ep->com.local_addr = *local;
+ ep->com.remote_addr = *remote;
+ free(local, M_SONAME);
+ free(remote, M_SONAME);
+ process_mpa_request(ep);
+ break;
+ default:
+ if (ep->com.so->so_rcv.sb_cc)
+ printf("%s Unexpected streaming data."
+ " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n",
+ __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state,
+ ep->com.so->so_rcv.sb_cc, ep->com.so->so_rcv.sb_mb);
+ break;
+ }
+ return;
+}
+
+static void
+process_connected(struct iwch_ep *ep)
+{
+ CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
+ if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) {
+ send_mpa_req(ep);
+ } else {
+ connect_reply_upcall(ep, -ep->com.so->so_error);
+ close_socket(&ep->com);
+ state_set(&ep->com, DEAD);
+ put_ep(&ep->com);
+ }
+}
+
+static struct socket *
+dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep)
+{
+ struct socket *so;
+
+ ACCEPT_LOCK();
+ so = TAILQ_FIRST(&head->so_comp);
+ if (!so) {
+ ACCEPT_UNLOCK();
+ return NULL;
+ }
+ TAILQ_REMOVE(&head->so_comp, so, so_list);
+ head->so_qlen--;
+ SOCK_LOCK(so);
+ so->so_qstate &= ~SQ_COMP;
+ so->so_head = NULL;
+ soref(so);
+ so->so_rcv.sb_flags |= SB_UPCALL;
+ so->so_state |= SS_NBIO;
+ so->so_upcall = iwch_so_upcall;
+ so->so_upcallarg = child_ep;
+ PANIC_IF(!(so->so_state & SS_ISCONNECTED));
+ PANIC_IF(so->so_error);
+ SOCK_UNLOCK(so);
+ ACCEPT_UNLOCK();
+ soaccept(so, (struct sockaddr **)remote);
+ return so;
+}
+
+static void
+process_newconn(struct iwch_ep *parent_ep)
+{
+ struct socket *child_so;
+ struct iwch_ep *child_ep;
+ struct sockaddr_in *remote;
+
+ CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so);
+ child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
+ if (!child_ep) {
+ log(LOG_ERR, "%s - failed to allocate ep entry!\n",
+ __FUNCTION__);
+ return;
+ }
+ child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep);
+ if (!child_so) {
+ log(LOG_ERR, "%s - failed to dequeue child socket!\n",
+ __FUNCTION__);
+ __free_ep(&child_ep->com);
+ return;
+ }
+ CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__,
+ inet_ntoa(remote->sin_addr), ntohs(remote->sin_port));
+ child_ep->com.so = child_so;
+ child_ep->com.cm_id = NULL;
+ child_ep->com.thread = parent_ep->com.thread;
+ child_ep->parent_ep = parent_ep;
+ free(remote, M_SONAME);
+ get_ep(&parent_ep->com);
+ child_ep->parent_ep = parent_ep;
+ callout_init(&child_ep->timer, TRUE);
+ state_set(&child_ep->com, MPA_REQ_WAIT);
+ start_ep_timer(child_ep);
+
+ /* maybe the request has already been queued up on the socket... */
+ process_mpa_request(child_ep);
+}
+
+static void
+iwch_so_upcall(struct socket *so, void *arg, int waitflag)
+{
+ struct iwch_ep *ep = arg;
+
+ CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]);
+ mtx_lock(&req_lock);
+ if (ep && ep->com.so && !ep->com.entry.tqe_prev) {
+ get_ep(&ep->com);
+ TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
+ taskqueue_enqueue(iw_cxgb_taskq, &iw_cxgb_task);
+ }
+ mtx_unlock(&req_lock);
+}
+
+static void
+process_socket_event(struct iwch_ep *ep)
+{
+ int state = state_read(&ep->com);
+ struct socket *so = ep->com.so;
+
+ CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]);
+ if (state == CONNECTING) {
+ process_connected(ep);
+ return;
+ }
+
+ if (state == LISTEN) {
+ process_newconn(ep);
+ return;
+ }
+
+ /* connection error */
+ if (so->so_error) {
+ process_conn_error(ep);
+ return;
+ }
+
+ /* peer close */
+ if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) {
+ process_peer_close(ep);
+ return;
+ }
+
+ /* close complete */
+ if (so->so_state & (SS_ISDISCONNECTED)) {
+ process_close_complete(ep);
+ return;
+ }
+
+ /* rx data */
+ process_data(ep);
+ return;
+}
+
+static void
+process_req(void *ctx, int pending)
+{
+ struct iwch_ep_common *epc;
+
+ CTR1(KTR_IW_CXGB, "%s enter", __FUNCTION__);
+ mtx_lock(&req_lock);
+ while (!TAILQ_EMPTY(&req_list)) {
+ epc = TAILQ_FIRST(&req_list);
+ TAILQ_REMOVE(&req_list, epc, entry);
+ epc->entry.tqe_prev = NULL;
+ mtx_unlock(&req_lock);
+ if (epc->so)
+ process_socket_event((struct iwch_ep *)epc);
+ put_ep(epc);
+ mtx_lock(&req_lock);
+ }
+ mtx_unlock(&req_lock);
+}
+
+int
+iwch_cm_init(void)
+{
+ TAILQ_INIT(&req_list);
+ mtx_init(&req_lock, "iw_cxgb req_list lock", NULL, MTX_DEF);
+ iw_cxgb_taskq = taskqueue_create("iw_cxgb_taskq", M_NOWAIT,
+ taskqueue_thread_enqueue, &iw_cxgb_taskq);
+ if (iw_cxgb_taskq == NULL) {
+ printf("failed to allocate iw_cxgb taskqueue\n");
+ return (ENOMEM);
+ }
+ taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq");
+ TASK_INIT(&iw_cxgb_task, 0, process_req, NULL);
+ t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, terminate);
+ t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, ec_status);
+ return 0;
+}
+
+void
+iwch_cm_term(void)
+{
+ t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, NULL);
+ t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, NULL);
+ taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task);
+ taskqueue_free(iw_cxgb_taskq);
+}
+
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h
new file mode 100644
index 0000000000000..4250be33300ad
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h
@@ -0,0 +1,249 @@
+/**************************************************************************
+
+Copyright (c) 2007, 2008 Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+$FreeBSD$
+
+***************************************************************************/
+
+#ifndef _IWCH_CM_H_
+#define _IWCH_CM_H_
+#include <contrib/rdma/ib_verbs.h>
+#include <contrib/rdma/iw_cm.h>
+#include <sys/refcount.h>
+#include <sys/condvar.h>
+#include <sys/proc.h>
+
+
+#define MPA_KEY_REQ "MPA ID Req Frame"
+#define MPA_KEY_REP "MPA ID Rep Frame"
+
+#define MPA_MAX_PRIVATE_DATA 256
+#define MPA_REV o0 /* XXX - amso1100 uses rev 0 ! */
+#define MPA_REJECT 0x20
+#define MPA_CRC 0x40
+#define MPA_MARKERS 0x80
+#define MPA_FLAGS_MASK 0xE0
+
+#define put_ep(ep) { \
+ CTR4(KTR_IW_CXGB, "put_ep (via %s:%u) ep %p refcnt %d\n", __FUNCTION__, __LINE__, \
+ ep, atomic_load_acq_int(&((ep)->refcount))); \
+ if (refcount_release(&((ep)->refcount))) \
+ __free_ep(ep); \
+}
+
+#define get_ep(ep) { \
+ CTR4(KTR_IW_CXGB, "get_ep (via %s:%u) ep %p, refcnt %d\n", __FUNCTION__, __LINE__, \
+ ep, atomic_load_acq_int(&((ep)->refcount))); \
+ refcount_acquire(&((ep)->refcount)); \
+}
+
+struct mpa_message {
+ u8 key[16];
+ u8 flags;
+ u8 revision;
+ __be16 private_data_size;
+ u8 private_data[0];
+};
+
+struct terminate_message {
+ u8 layer_etype;
+ u8 ecode;
+ __be16 hdrct_rsvd;
+ u8 len_hdrs[0];
+};
+
+#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28)
+
+enum iwch_layers_types {
+ LAYER_RDMAP = 0x00,
+ LAYER_DDP = 0x10,
+ LAYER_MPA = 0x20,
+ RDMAP_LOCAL_CATA = 0x00,
+ RDMAP_REMOTE_PROT = 0x01,
+ RDMAP_REMOTE_OP = 0x02,
+ DDP_LOCAL_CATA = 0x00,
+ DDP_TAGGED_ERR = 0x01,
+ DDP_UNTAGGED_ERR = 0x02,
+ DDP_LLP = 0x03
+};
+
+enum iwch_rdma_ecodes {
+ RDMAP_INV_STAG = 0x00,
+ RDMAP_BASE_BOUNDS = 0x01,
+ RDMAP_ACC_VIOL = 0x02,
+ RDMAP_STAG_NOT_ASSOC = 0x03,
+ RDMAP_TO_WRAP = 0x04,
+ RDMAP_INV_VERS = 0x05,
+ RDMAP_INV_OPCODE = 0x06,
+ RDMAP_STREAM_CATA = 0x07,
+ RDMAP_GLOBAL_CATA = 0x08,
+ RDMAP_CANT_INV_STAG = 0x09,
+ RDMAP_UNSPECIFIED = 0xff
+};
+
+enum iwch_ddp_ecodes {
+ DDPT_INV_STAG = 0x00,
+ DDPT_BASE_BOUNDS = 0x01,
+ DDPT_STAG_NOT_ASSOC = 0x02,
+ DDPT_TO_WRAP = 0x03,
+ DDPT_INV_VERS = 0x04,
+ DDPU_INV_QN = 0x01,
+ DDPU_INV_MSN_NOBUF = 0x02,
+ DDPU_INV_MSN_RANGE = 0x03,
+ DDPU_INV_MO = 0x04,
+ DDPU_MSG_TOOBIG = 0x05,
+ DDPU_INV_VERS = 0x06
+};
+
+enum iwch_mpa_ecodes {
+ MPA_CRC_ERR = 0x02,
+ MPA_MARKER_ERR = 0x03
+};
+
+enum iwch_ep_state {
+ IDLE = 0,
+ LISTEN,
+ CONNECTING,
+ MPA_REQ_WAIT,
+ MPA_REQ_SENT,
+ MPA_REQ_RCVD,
+ MPA_REP_SENT,
+ FPDU_MODE,
+ ABORTING,
+ CLOSING,
+ MORIBUND,
+ DEAD,
+};
+
+enum iwch_ep_flags {
+ PEER_ABORT_IN_PROGRESS = (1 << 0),
+ ABORT_REQ_IN_PROGRESS = (1 << 1),
+};
+
+struct iwch_ep_common {
+ TAILQ_ENTRY(iwch_ep_common) entry;
+ struct iw_cm_id *cm_id;
+ struct iwch_qp *qp;
+ struct t3cdev *tdev;
+ enum iwch_ep_state state;
+ u_int refcount;
+ struct cv waitq;
+ struct mtx lock;
+ struct sockaddr_in local_addr;
+ struct sockaddr_in remote_addr;
+ int rpl_err;
+ int rpl_done;
+ struct thread *thread;
+ struct socket *so;
+};
+
+struct iwch_listen_ep {
+ struct iwch_ep_common com;
+ unsigned int stid;
+ int backlog;
+};
+
+struct iwch_ep {
+ struct iwch_ep_common com;
+ struct iwch_ep *parent_ep;
+ struct callout timer;
+ unsigned int atid;
+ u32 hwtid;
+ u32 snd_seq;
+ u32 rcv_seq;
+ struct l2t_entry *l2t;
+ struct rtentry *dst;
+ struct mbuf *mpa_mbuf;
+ struct iwch_mpa_attributes mpa_attr;
+ unsigned int mpa_pkt_len;
+ u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA];
+ u8 tos;
+ u16 emss;
+ u16 plen;
+ u32 ird;
+ u32 ord;
+ u32 flags;
+};
+
+static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id)
+{
+ return cm_id->provider_data;
+}
+
+static inline struct iwch_listen_ep *to_listen_ep(struct iw_cm_id *cm_id)
+{
+ return cm_id->provider_data;
+}
+
+static inline int compute_wscale(int win)
+{
+ int wscale = 0;
+
+ while (wscale < 14 && (65535<<wscale) < win)
+ wscale++;
+ return wscale;
+}
+
+static __inline void
+iwch_wait(struct cv *cv, struct mtx *lock, int *rpl_done)
+{
+ mtx_lock(lock);
+ if (!*rpl_done) {
+ CTR0(KTR_IW_CXGB, "sleeping for rpl_done\n");
+ cv_wait_unlock(cv, lock);
+ }
+ CTR1(KTR_IW_CXGB, "*rpl_done=%d\n", *rpl_done);
+}
+
+static __inline void
+iwch_wakeup(struct cv *cv, struct mtx *lock, int *rpl_done)
+{
+ mtx_lock(lock);
+ *rpl_done=1;
+ CTR0(KTR_IW_CXGB, "wakeup for rpl_done\n");
+ cv_broadcast(cv);
+ mtx_unlock(lock);
+}
+
+/* CM prototypes */
+
+int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int iwch_create_listen(struct iw_cm_id *cm_id, int backlog);
+int iwch_destroy_listen(struct iw_cm_id *cm_id);
+int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
+int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags);
+int iwch_quiesce_tid(struct iwch_ep *ep);
+int iwch_resume_tid(struct iwch_ep *ep);
+void __free_ep(struct iwch_ep_common *ep);
+void iwch_rearp(struct iwch_ep *ep);
+int iwch_ep_redirect(void *ctx, struct rtentry *old, struct rtentry *new, struct l2t_entry *l2t);
+
+int iwch_cm_init(void);
+void iwch_cm_term(void);
+
+#endif /* _IWCH_CM_H_ */
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c
new file mode 100644
index 0000000000000..93a9e89fd39d2
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c
@@ -0,0 +1,276 @@
+
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/module.h>
+#include <sys/pciio.h>
+#include <sys/conf.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <sys/bus_dma.h>
+#include <sys/rman.h>
+#include <sys/ioccom.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/linker.h>
+#include <sys/firmware.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/libkern.h>
+
+#include <netinet/in.h>
+#include <contrib/rdma/ib_verbs.h>
+#include <contrib/rdma/ib_umem.h>
+#include <contrib/rdma/ib_user_verbs.h>
+
+
+#ifdef CONFIG_DEFINED
+#include <cxgb_include.h>
+#include <ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <ulp/iw_cxgb/iw_cxgb.h>
+#include <ulp/iw_cxgb/iw_cxgb_resource.h>
+#include <ulp/iw_cxgb/iw_cxgb_user.h>
+#else
+#include <dev/cxgb/cxgb_include.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h>
+#endif
+
+/*
+ * Get one cq entry from cxio and map it to openib.
+ *
+ * Returns:
+ * 0 cqe returned
+ * -ENOBUFS EMPTY;
+ * -EAGAIN caller must try again
+ * any other neg errno fatal error
+ */
+static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
+ struct ib_wc *wc)
+{
+ struct iwch_qp *qhp = NULL;
+ struct t3_cqe cqe, *rd_cqe;
+ struct t3_wq *wq;
+ u32 credit = 0;
+ u8 cqe_flushed;
+ u64 cookie;
+ int ret = 1;
+
+ rd_cqe = cxio_next_cqe(&chp->cq);
+
+ if (!rd_cqe)
+ return 0;
+
+ qhp = get_qhp(rhp, CQE_QPID(*rd_cqe));
+ if (!qhp)
+ wq = NULL;
+ else {
+ mtx_lock(&qhp->lock);
+ wq = &(qhp->wq);
+ }
+ ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
+ &credit);
+ if (t3a_device(chp->rhp) && credit) {
+ CTR3(KTR_IW_CXGB, "%s updating %d cq credits on id %d", __FUNCTION__,
+ credit, chp->cq.cqid);
+ cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit);
+ }
+
+ if (ret) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ ret = 1;
+
+ wc->wr_id = cookie;
+ wc->qp = &qhp->ibqp;
+ wc->vendor_err = CQE_STATUS(cqe);
+
+ CTR4(KTR_IW_CXGB, "iwch_poll_cq_one qpid 0x%x type %d opcode %d status 0x%x",
+ CQE_QPID(cqe), CQE_TYPE(cqe),
+ CQE_OPCODE(cqe), CQE_STATUS(cqe));
+ CTR3(KTR_IW_CXGB, "wrid hi 0x%x lo 0x%x cookie 0x%llx",
+ CQE_WRID_HI(cqe), CQE_WRID_LOW(cqe), (unsigned long long) cookie);
+
+ if (CQE_TYPE(cqe) == 0) {
+ if (!CQE_STATUS(cqe))
+ wc->byte_len = CQE_LEN(cqe);
+ else
+ wc->byte_len = 0;
+ wc->opcode = IB_WC_RECV;
+ } else {
+ switch (CQE_OPCODE(cqe)) {
+ case T3_RDMA_WRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case T3_READ_REQ:
+ wc->opcode = IB_WC_RDMA_READ;
+ wc->byte_len = CQE_LEN(cqe);
+ break;
+ case T3_SEND:
+ case T3_SEND_WITH_SE:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case T3_BIND_MW:
+ wc->opcode = IB_WC_BIND_MW;
+ break;
+
+ /* these aren't supported yet */
+ case T3_SEND_WITH_INV:
+ case T3_SEND_WITH_SE_INV:
+ case T3_LOCAL_INV:
+ case T3_FAST_REGISTER:
+ default:
+ log(LOG_ERR, "Unexpected opcode %d "
+ "in the CQE received for QPID=0x%0x\n",
+ CQE_OPCODE(cqe), CQE_QPID(cqe));
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (cqe_flushed)
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ else {
+
+ switch (CQE_STATUS(cqe)) {
+ case TPT_ERR_SUCCESS:
+ wc->status = IB_WC_SUCCESS;
+ break;
+ case TPT_ERR_STAG:
+ wc->status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case TPT_ERR_PDID:
+ wc->status = IB_WC_LOC_PROT_ERR;
+ break;
+ case TPT_ERR_QPID:
+ case TPT_ERR_ACCESS:
+ wc->status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case TPT_ERR_WRAP:
+ wc->status = IB_WC_GENERAL_ERR;
+ break;
+ case TPT_ERR_BOUND:
+ wc->status = IB_WC_LOC_LEN_ERR;
+ break;
+ case TPT_ERR_INVALIDATE_SHARED_MR:
+ case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+ wc->status = IB_WC_MW_BIND_ERR;
+ break;
+ case TPT_ERR_CRC:
+ case TPT_ERR_MARKER:
+ case TPT_ERR_PDU_LEN_ERR:
+ case TPT_ERR_OUT_OF_RQE:
+ case TPT_ERR_DDP_VERSION:
+ case TPT_ERR_RDMA_VERSION:
+ case TPT_ERR_DDP_QUEUE_NUM:
+ case TPT_ERR_MSN:
+ case TPT_ERR_TBIT:
+ case TPT_ERR_MO:
+ case TPT_ERR_MSN_RANGE:
+ case TPT_ERR_IRD_OVERFLOW:
+ case TPT_ERR_OPCODE:
+ wc->status = IB_WC_FATAL_ERR;
+ break;
+ case TPT_ERR_SWFLUSH:
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ break;
+ default:
+ log(LOG_ERR, "Unexpected cqe_status 0x%x for "
+ "QPID=0x%0x\n", CQE_STATUS(cqe), CQE_QPID(cqe));
+ ret = -EINVAL;
+ }
+ }
+out:
+ if (wq)
+ mtx_unlock(&qhp->lock);
+ return ret;
+}
+
+int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct iwch_dev *rhp;
+ struct iwch_cq *chp;
+ int npolled;
+ int err = 0;
+
+ chp = to_iwch_cq(ibcq);
+ rhp = chp->rhp;
+
+ mtx_lock(&chp->lock);
+ for (npolled = 0; npolled < num_entries; ++npolled) {
+#ifdef DEBUG
+ int i=0;
+#endif
+
+ /*
+ * Because T3 can post CQEs that are _not_ associated
+ * with a WR, we might have to poll again after removing
+ * one of these.
+ */
+ do {
+ err = iwch_poll_cq_one(rhp, chp, wc + npolled);
+#ifdef DEBUG
+ PANIC_IF(++i > 1000);
+#endif
+ } while (err == -EAGAIN);
+ if (err <= 0)
+ break;
+ }
+ mtx_unlock(&chp->lock);
+
+ if (err < 0) {
+ return err;
+ } else {
+ return npolled;
+ }
+}
+
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c
new file mode 100644
index 0000000000000..8b52119e306ec
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c
@@ -0,0 +1,255 @@
+
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/module.h>
+#include <sys/pciio.h>
+#include <sys/conf.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <sys/bus_dma.h>
+#include <sys/rman.h>
+#include <sys/ioccom.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/linker.h>
+#include <sys/firmware.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/libkern.h>
+
+#include <netinet/in.h>
+
+#include <contrib/rdma/ib_verbs.h>
+#include <contrib/rdma/ib_umem.h>
+#include <contrib/rdma/ib_user_verbs.h>
+
+#ifdef DEBUG
+#ifdef CONFIG_DEFINED
+#include <cxgb_include.h>
+#include <ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <ulp/iw_cxgb/iw_cxgb.h>
+#include <ulp/iw_cxgb/iw_cxgb_resource.h>
+#include <ulp/iw_cxgb/iw_cxgb_user.h>
+#else
+#include <dev/cxgb/cxgb_include.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h>
+#endif
+
+void cxio_dump_tpt(struct cxio_rdev *rdev, uint32_t stag)
+{
+ struct ch_mem_range *m;
+ u64 *data;
+ int rc;
+ int size = 32;
+
+ m = kmalloc(sizeof(*m) + size, M_NOWAIT);
+ if (!m) {
+ CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__);
+ return;
+ }
+ m->mem_id = MEM_PMRX;
+ m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
+ m->len = size;
+ CTR3(KTR_IW_CXGB, "%s TPT addr 0x%x len %d", __FUNCTION__, m->addr, m->len);
+ rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+ if (rc) {
+ CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc);
+ free(m, M_DEVBUF);
+ return;
+ }
+
+ data = (u64 *)m->buf;
+ while (size > 0) {
+ CTR2(KTR_IW_CXGB, "TPT %08x: %016llx", m->addr, (unsigned long long) *data);
+ size -= 8;
+ data++;
+ m->addr += 8;
+ }
+ free(m, M_DEVBUF);
+}
+
+void cxio_dump_pbl(struct cxio_rdev *rdev, uint32_t pbl_addr, uint32_t len, u8 shift)
+{
+ struct ch_mem_range *m;
+ u64 *data;
+ int rc;
+ int size, npages;
+
+ shift += 12;
+ npages = (len + (1ULL << shift) - 1) >> shift;
+ size = npages * sizeof(u64);
+
+ m = kmalloc(sizeof(*m) + size, M_NOWAIT);
+ if (!m) {
+ CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__);
+ return;
+ }
+ m->mem_id = MEM_PMRX;
+ m->addr = pbl_addr;
+ m->len = size;
+ CTR4(KTR_IW_CXGB, "%s PBL addr 0x%x len %d depth %d",
+ __FUNCTION__, m->addr, m->len, npages);
+ rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+ if (rc) {
+ CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc);
+ free(m, M_DEVBUF);
+ return;
+ }
+
+ data = (u64 *)m->buf;
+ while (size > 0) {
+ CTR2(KTR_IW_CXGB, "PBL %08x: %016llx", m->addr, (unsigned long long) *data);
+ size -= 8;
+ data++;
+ m->addr += 8;
+ }
+ free(m, M_DEVBUF);
+}
+
+void cxio_dump_wqe(union t3_wr *wqe)
+{
+ uint64_t *data = (uint64_t *)wqe;
+ uint32_t size = (uint32_t)(be64toh(*data) & 0xff);
+
+ if (size == 0)
+ size = 8;
+ while (size > 0) {
+ CTR2(KTR_IW_CXGB, "WQE %p: %016llx", data,
+ (unsigned long long) be64toh(*data));
+ size--;
+ data++;
+ }
+}
+
+void cxio_dump_wce(struct t3_cqe *wce)
+{
+ uint64_t *data = (uint64_t *)wce;
+ int size = sizeof(*wce);
+
+ while (size > 0) {
+ CTR2(KTR_IW_CXGB, "WCE %p: %016llx", data,
+ (unsigned long long) be64toh(*data));
+ size -= 8;
+ data++;
+ }
+}
+
+void cxio_dump_rqt(struct cxio_rdev *rdev, uint32_t hwtid, int nents)
+{
+ struct ch_mem_range *m;
+ int size = nents * 64;
+ u64 *data;
+ int rc;
+
+ m = kmalloc(sizeof(*m) + size, M_NOWAIT);
+ if (!m) {
+ CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__);
+ return;
+ }
+ m->mem_id = MEM_PMRX;
+ m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
+ m->len = size;
+ CTR3(KTR_IW_CXGB, "%s RQT addr 0x%x len %d", __FUNCTION__, m->addr, m->len);
+ rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+ if (rc) {
+ CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc);
+ free(m, M_DEVBUF);
+ return;
+ }
+
+ data = (u64 *)m->buf;
+ while (size > 0) {
+ CTR2(KTR_IW_CXGB, "RQT %08x: %016llx", m->addr, (unsigned long long) *data);
+ size -= 8;
+ data++;
+ m->addr += 8;
+ }
+ free(m, M_DEVBUF);
+}
+
+void cxio_dump_tcb(struct cxio_rdev *rdev, uint32_t hwtid)
+{
+ struct ch_mem_range *m;
+ int size = TCB_SIZE;
+ uint32_t *data;
+ int rc;
+
+ m = kmalloc(sizeof(*m) + size, M_NOWAIT);
+ if (!m) {
+ CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__);
+ return;
+ }
+ m->mem_id = MEM_CM;
+ m->addr = hwtid * size;
+ m->len = size;
+ CTR3(KTR_IW_CXGB, "%s TCB %d len %d", __FUNCTION__, m->addr, m->len);
+ rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
+ if (rc) {
+ CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc);
+ free(m, M_DEVBUF);
+ return;
+ }
+
+ data = (uint32_t *)m->buf;
+ while (size > 0) {
+ printf("%2u: %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ m->addr,
+ *(data+2), *(data+3), *(data),*(data+1),
+ *(data+6), *(data+7), *(data+4), *(data+5));
+ size -= 32;
+ data += 8;
+ m->addr += 32;
+ }
+ free(m, M_DEVBUF);
+}
+#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c
new file mode 100644
index 0000000000000..2e8154731133d
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c
@@ -0,0 +1,265 @@
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/module.h>
+#include <sys/pciio.h>
+#include <sys/conf.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <sys/bus_dma.h>
+#include <sys/rman.h>
+#include <sys/ioccom.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/linker.h>
+#include <sys/firmware.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/libkern.h>
+
+#include <netinet/in.h>
+
+#include <contrib/rdma/ib_verbs.h>
+#include <contrib/rdma/ib_umem.h>
+#include <contrib/rdma/ib_user_verbs.h>
+
+
+#ifdef CONFIG_DEFINED
+#include <cxgb_include.h>
+#include <ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <ulp/iw_cxgb/iw_cxgb.h>
+#include <ulp/iw_cxgb/iw_cxgb_resource.h>
+#include <ulp/iw_cxgb/iw_cxgb_user.h>
+#else
+#include <dev/cxgb/cxgb_include.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h>
+#endif
+
+static void
+post_qp_event(struct iwch_dev *rnicp, struct iwch_qp *qhp, struct iwch_cq *chp,
+ struct respQ_msg_t *rsp_msg,
+ enum ib_event_type ib_event,
+ int send_term)
+{
+ struct ib_event event;
+ struct iwch_qp_attributes attrs;
+
+ if ((qhp->attr.state == IWCH_QP_STATE_ERROR) ||
+ (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) {
+ CTR4(KTR_IW_CXGB, "%s AE received after RTS - "
+ "qp state %d qpid 0x%x status 0x%x", __FUNCTION__,
+ qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe));
+ return;
+ }
+
+ log(LOG_ERR, "%s - AE qpid 0x%x opcode %d status 0x%x "
+ "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
+ CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
+ CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
+ CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+
+
+ event.event = ib_event;
+ event.device = chp->ibcq.device;
+ if (ib_event == IB_EVENT_CQ_ERR)
+ event.element.cq = &chp->ibcq;
+ else
+ event.element.qp = &qhp->ibqp;
+
+ if (qhp->ibqp.event_handler)
+ (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
+
+ if (qhp->attr.state == IWCH_QP_STATE_RTS) {
+ attrs.next_state = IWCH_QP_STATE_TERMINATE;
+ iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ if (send_term)
+ iwch_post_terminate(qhp, rsp_msg);
+ }
+}
+
+void
+iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct mbuf *m)
+{
+ struct iwch_dev *rnicp;
+ struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) m->m_data;
+ struct iwch_cq *chp;
+ struct iwch_qp *qhp;
+ u32 cqid = RSPQ_CQID(rsp_msg);
+
+ rnicp = (struct iwch_dev *) rdev_p->ulp;
+ mtx_lock(&rnicp->lock);
+ chp = get_chp(rnicp, cqid);
+ qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
+ if (!chp || !qhp) {
+ log(LOG_ERR,"BAD AE cqid 0x%x qpid 0x%x opcode %d "
+ "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x \n",
+ cqid, CQE_QPID(rsp_msg->cqe),
+ CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
+ CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe),
+ CQE_WRID_LOW(rsp_msg->cqe));
+ mtx_unlock(&rnicp->lock);
+ goto out;
+ }
+ iwch_qp_add_ref(&qhp->ibqp);
+ mtx_lock(&chp->lock);
+ ++chp->refcnt;
+ mtx_unlock(&chp->lock);
+ mtx_unlock(&rnicp->lock);
+
+ /*
+ * 1) completion of our sending a TERMINATE.
+ * 2) incoming TERMINATE message.
+ */
+ if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) &&
+ (CQE_STATUS(rsp_msg->cqe) == 0)) {
+ if (SQ_TYPE(rsp_msg->cqe)) {
+ CTR3(KTR_IW_CXGB, "%s QPID 0x%x ep %p disconnecting",
+ __FUNCTION__, qhp->wq.qpid, qhp->ep);
+ iwch_ep_disconnect(qhp->ep, 0, M_NOWAIT);
+ } else {
+ CTR2(KTR_IW_CXGB, "%s post REQ_ERR AE QPID 0x%x", __FUNCTION__,
+ qhp->wq.qpid);
+ post_qp_event(rnicp, qhp, chp, rsp_msg,
+ IB_EVENT_QP_REQ_ERR, 0);
+ iwch_ep_disconnect(qhp->ep, 0, M_NOWAIT);
+ }
+ goto done;
+ }
+
+ /* Bad incoming Read request */
+ if (SQ_TYPE(rsp_msg->cqe) &&
+ (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) {
+ post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
+ goto done;
+ }
+
+ /* Bad incoming write */
+ if (RQ_TYPE(rsp_msg->cqe) &&
+ (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) {
+ post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
+ goto done;
+ }
+
+ switch (CQE_STATUS(rsp_msg->cqe)) {
+
+ /* Completion Events */
+ case TPT_ERR_SUCCESS:
+#if 0
+ /*
+ * Confirm the destination entry if this is a RECV completion.
+ */
+ if (qhp->ep && SQ_TYPE(rsp_msg->cqe))
+ dst_confirm(qhp->ep->dst);
+#endif
+ (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+ break;
+
+ case TPT_ERR_STAG:
+ case TPT_ERR_PDID:
+ case TPT_ERR_QPID:
+ case TPT_ERR_ACCESS:
+ case TPT_ERR_WRAP:
+ case TPT_ERR_BOUND:
+ case TPT_ERR_INVALIDATE_SHARED_MR:
+ case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+ log(LOG_ERR, "%s - CQE Err qpid 0x%x opcode %d status 0x%x "
+ "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
+ CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
+ CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
+ CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+ (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+ post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1);
+ break;
+
+ /* Device Fatal Errors */
+ case TPT_ERR_ECC:
+ case TPT_ERR_ECC_PSTAG:
+ case TPT_ERR_INTERNAL_ERR:
+ post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1);
+ break;
+
+ /* QP Fatal Errors */
+ case TPT_ERR_OUT_OF_RQE:
+ case TPT_ERR_PBL_ADDR_BOUND:
+ case TPT_ERR_CRC:
+ case TPT_ERR_MARKER:
+ case TPT_ERR_PDU_LEN_ERR:
+ case TPT_ERR_DDP_VERSION:
+ case TPT_ERR_RDMA_VERSION:
+ case TPT_ERR_OPCODE:
+ case TPT_ERR_DDP_QUEUE_NUM:
+ case TPT_ERR_MSN:
+ case TPT_ERR_TBIT:
+ case TPT_ERR_MO:
+ case TPT_ERR_MSN_GAP:
+ case TPT_ERR_MSN_RANGE:
+ case TPT_ERR_RQE_ADDR_BOUND:
+ case TPT_ERR_IRD_OVERFLOW:
+ post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
+ break;
+
+ default:
+ log(LOG_ERR,"Unknown T3 status 0x%x QPID 0x%x\n",
+ CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid);
+ post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
+ break;
+ }
+done:
+ mtx_lock(&chp->lock);
+ if (--chp->refcnt == 0)
+ wakeup(chp);
+ mtx_unlock(&chp->lock);
+ iwch_qp_rem_ref(&qhp->ibqp);
+out:
+ m_free(m);
+}
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c
new file mode 100644
index 0000000000000..0309b53ba3c03
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c
@@ -0,0 +1,1418 @@
+
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/module.h>
+#include <sys/pciio.h>
+#include <sys/conf.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <sys/bus_dma.h>
+#include <sys/rman.h>
+#include <sys/ioccom.h>
+#include <sys/mbuf.h>
+#include <sys/rwlock.h>
+#include <sys/linker.h>
+#include <sys/firmware.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <netinet/in.h>
+
+#include <contrib/rdma/ib_verbs.h>
+
+
+#ifdef CONFIG_DEFINED
+#include <cxgb_include.h>
+#include <ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <ulp/iw_cxgb/iw_cxgb.h>
+#include <ulp/iw_cxgb/iw_cxgb_resource.h>
+#else
+#include <dev/cxgb/cxgb_include.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h>
+#endif
+
+static TAILQ_HEAD( ,cxio_rdev) rdev_list;
+static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL;
+
+static struct cxio_rdev *
+cxio_hal_find_rdev_by_name(char *dev_name)
+{
+ struct cxio_rdev *rdev;
+
+ TAILQ_FOREACH(rdev, &rdev_list, entry)
+ if (!strcmp(rdev->dev_name, dev_name))
+ return rdev;
+ return NULL;
+}
+
+struct cxio_rdev *
+cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev)
+{
+ struct cxio_rdev *rdev;
+
+ TAILQ_FOREACH(rdev, &rdev_list, entry)
+ if (rdev->t3cdev_p == tdev)
+ return rdev;
+ return NULL;
+}
+
+int
+cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
+ enum t3_cq_opcode op, u32 credit)
+{
+ int ret;
+ struct t3_cqe *cqe;
+ u32 rptr;
+
+ struct rdma_cq_op setup;
+ setup.id = cq->cqid;
+ setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0;
+ setup.op = op;
+ ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup);
+
+ if ((ret < 0) || (op == CQ_CREDIT_UPDATE))
+ return (ret);
+
+ /*
+ * If the rearm returned an index other than our current index,
+ * then there might be CQE's in flight (being DMA'd). We must wait
+ * here for them to complete or the consumer can miss a notification.
+ */
+ if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) {
+ int i=0;
+
+ rptr = cq->rptr;
+
+ /*
+ * Keep the generation correct by bumping rptr until it
+ * matches the index returned by the rearm - 1.
+ */
+ while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret)
+ rptr++;
+
+ /*
+ * Now rptr is the index for the (last) cqe that was
+ * in-flight at the time the HW rearmed the CQ. We
+ * spin until that CQE is valid.
+ */
+ cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2);
+ while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
+ DELAY(1);
+ if (i++ > 1000000) {
+ PANIC_IF(1);
+ log(LOG_ERR, "%s: stalled rnic\n",
+ rdev_p->dev_name);
+ return (-EIO);
+ }
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
+{
+ struct rdma_cq_setup setup;
+ setup.id = cqid;
+ setup.base_addr = 0; /* NULL address */
+ setup.size = 0; /* disaable the CQ */
+ setup.credits = 0;
+ setup.credit_thres = 0;
+ setup.ovfl_mode = 0;
+ return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+static int
+cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
+{
+ u64 sge_cmd;
+ struct t3_modify_qp_wr *wqe;
+ struct mbuf *m = m_gethdr(MT_DATA, M_NOWAIT);
+ if (m == NULL) {
+ CTR1(KTR_IW_CXGB, "%s m_gethdr failed", __FUNCTION__);
+ return (-ENOMEM);
+ }
+ wqe = mtod(m, struct t3_modify_qp_wr *);
+ m->m_len = m->m_pkthdr.len = sizeof(*wqe);
+ memset(wqe, 0, sizeof(*wqe));
+ build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7);
+ wqe->flags = htobe32(MODQP_WRITE_EC);
+ sge_cmd = qpid << 8 | 3;
+ wqe->sge_cmd = htobe64(sge_cmd);
+ m_set_priority(m, CPL_PRIORITY_CONTROL);
+ m_set_sgl(m, NULL);
+ m_set_sgllen(m, 0);
+ return (cxgb_ofld_send(rdev_p->t3cdev_p, m));
+}
+
+int
+cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+{
+ struct rdma_cq_setup setup;
+ int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
+
+ cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
+ if (!cq->cqid)
+ return (-ENOMEM);
+ cq->sw_queue = malloc(size, M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (!cq->sw_queue)
+ return (-ENOMEM);
+#if 0
+ cq->queue = dma_alloc_coherent(rdev_p->rnic_info.pdev,
+ (1UL << (cq->size_log2)) *
+ sizeof(struct t3_cqe),
+ &(cq->dma_addr), M_NOWAIT);
+#else
+ cq->queue = contigmalloc((1UL << (cq->size_log2))*sizeof(struct t3_cqe),
+ M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
+ if (cq->queue)
+ cq->dma_addr = vtophys(cq->queue);
+ else {
+ free(cq->sw_queue, M_DEVBUF);
+ return (-ENOMEM);
+ }
+#endif
+
+#ifdef notyet
+ pci_unmap_addr_set(cq, mapping, cq->dma_addr);
+#endif
+ memset(cq->queue, 0, size);
+ setup.id = cq->cqid;
+ setup.base_addr = (u64) (cq->dma_addr);
+ setup.size = 1UL << cq->size_log2;
+ setup.credits = 65535;
+ setup.credit_thres = 1;
+ if (rdev_p->t3cdev_p->type != T3A)
+ setup.ovfl_mode = 0;
+ else
+ setup.ovfl_mode = 1;
+ return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+int
+cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+{
+ struct rdma_cq_setup setup;
+ setup.id = cq->cqid;
+ setup.base_addr = (u64) (cq->dma_addr);
+ setup.size = 1UL << cq->size_log2;
+ setup.credits = setup.size;
+ setup.credit_thres = setup.size; /* TBD: overflow recovery */
+ setup.ovfl_mode = 1;
+ return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+static u32
+get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
+{
+ struct cxio_qpid *entry;
+ u32 qpid;
+ int i;
+
+ mtx_lock(&uctx->lock);
+ if (!TAILQ_EMPTY(&uctx->qpids)) {
+
+ entry = TAILQ_FIRST(&uctx->qpids);
+ TAILQ_REMOVE(&uctx->qpids, entry, entry);
+ qpid = entry->qpid;
+ free(entry, M_DEVBUF);
+ } else {
+ qpid = cxio_hal_get_qpid(rdev_p->rscp);
+ if (!qpid)
+ goto out;
+ for (i = qpid+1; i & rdev_p->qpmask; i++) {
+ entry = malloc(sizeof *entry, M_DEVBUF, M_NOWAIT);
+ if (!entry)
+ break;
+ entry->qpid = i;
+ TAILQ_INSERT_TAIL(&uctx->qpids, entry, entry);
+ }
+ }
+out:
+ mtx_unlock(&uctx->lock);
+ CTR2(KTR_IW_CXGB, "%s qpid 0x%x", __FUNCTION__, qpid);
+ return qpid;
+}
+
+static void
+put_qpid(struct cxio_rdev *rdev_p, u32 qpid,
+ struct cxio_ucontext *uctx)
+{
+ struct cxio_qpid *entry;
+
+ entry = malloc(sizeof *entry, M_DEVBUF, M_NOWAIT);
+ CTR2(KTR_IW_CXGB, "%s qpid 0x%x", __FUNCTION__, qpid);
+ entry->qpid = qpid;
+ mtx_lock(&uctx->lock);
+ TAILQ_INSERT_TAIL(&uctx->qpids, entry, entry);
+ mtx_unlock(&uctx->lock);
+}
+
+void
+cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
+{
+ struct cxio_qpid *pos, *tmp;
+
+ mtx_lock(&uctx->lock);
+ TAILQ_FOREACH_SAFE(pos, &uctx->qpids, entry, tmp) {
+ TAILQ_REMOVE(&uctx->qpids, pos, entry);
+ if (!(pos->qpid & rdev_p->qpmask))
+ cxio_hal_put_qpid(rdev_p->rscp, pos->qpid);
+ free(pos, M_DEVBUF);
+ }
+ mtx_unlock(&uctx->lock);
+}
+
+void
+cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
+{
+ TAILQ_INIT(&uctx->qpids);
+ mtx_init(&uctx->lock, "cxio uctx", NULL, MTX_DEF|MTX_DUPOK);
+}
+
+int
+cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
+ struct t3_wq *wq, struct cxio_ucontext *uctx)
+{
+ int depth = 1UL << wq->size_log2;
+ int rqsize = 1UL << wq->rq_size_log2;
+
+ wq->qpid = get_qpid(rdev_p, uctx);
+ if (!wq->qpid)
+ return (-ENOMEM);
+
+ wq->rq = malloc(depth * sizeof(u64), M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (!wq->rq)
+ goto err1;
+
+ wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize);
+ if (!wq->rq_addr)
+ goto err2;
+
+ wq->sq = malloc(depth * sizeof(struct t3_swsq), M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (!wq->sq)
+ goto err3;
+#if 0
+ wq->queue = dma_alloc_coherent(rdev_p->rnic_info.pdev,
+ depth * sizeof(union t3_wr),
+ &(wq->dma_addr), M_NOWAIT);
+#else
+ wq->queue = contigmalloc(depth *sizeof(union t3_wr),
+ M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
+ if (wq->queue)
+ wq->dma_addr = vtophys(wq->queue);
+
+#endif
+ if (!wq->queue)
+ goto err4;
+
+ memset(wq->queue, 0, depth * sizeof(union t3_wr));
+#ifdef notyet
+ pci_unmap_addr_set(wq, mapping, wq->dma_addr);
+#endif
+ wq->doorbell = rdev_p->rnic_info.kdb_addr;
+ if (!kernel_domain)
+ wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
+ (wq->qpid << rdev_p->qpshift);
+ CTR4(KTR_IW_CXGB, "%s qpid 0x%x doorbell 0x%p udb 0x%llx", __FUNCTION__,
+ wq->qpid, wq->doorbell, (unsigned long long) wq->udb);
+ return 0;
+err4:
+ free(wq->sq, M_DEVBUF);
+err3:
+ cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize);
+err2:
+ free(wq->rq, M_DEVBUF);
+err1:
+ put_qpid(rdev_p, wq->qpid, uctx);
+ return (-ENOMEM);
+}
+
+int
+cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+{
+ int err;
+ err = cxio_hal_clear_cq_ctx(rdev_p, cq->cqid);
+ free(cq->sw_queue, M_DEVBUF);
+#if 0
+ dma_free_coherent(&(rdev_p->rnic_info.pdev),
+ (1UL << (cq->size_log2))
+ * sizeof(struct t3_cqe), cq->queue,
+ /* pci_unmap_addr(cq, mapping)*/ 0);
+#else
+ contigfree(cq->queue,(1UL << (cq->size_log2))
+ * sizeof(struct t3_cqe), M_DEVBUF);
+#endif
+ cxio_hal_put_cqid(rdev_p->rscp, cq->cqid);
+ return err;
+}
+
+int
+cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq,
+ struct cxio_ucontext *uctx)
+{
+
+#if 0
+ dma_free_coherent(&(rdev_p->rnic_info.pdev),
+ (1UL << (wq->size_log2))
+ * sizeof(union t3_wr), wq->queue,
+ /* pci_unmap_addr(wq, mapping)*/ 0);
+#else
+ contigfree(wq->queue, (1UL << (wq->size_log2))
+ * sizeof(union t3_wr), M_DEVBUF);
+#endif
+ free(wq->sq, M_DEVBUF);
+ cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2));
+ free(wq->rq, M_DEVBUF);
+ put_qpid(rdev_p, wq->qpid, uctx);
+ return 0;
+}
+
+static void
+insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
+{
+ struct t3_cqe cqe;
+
+ CTR5(KTR_IW_CXGB, "%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x", __FUNCTION__,
+ wq, cq, cq->sw_rptr, cq->sw_wptr);
+ memset(&cqe, 0, sizeof(cqe));
+ cqe.header = htobe32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
+ V_CQE_OPCODE(T3_SEND) |
+ V_CQE_TYPE(0) |
+ V_CQE_SWCQE(1) |
+ V_CQE_QPID(wq->qpid) |
+ V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
+ cq->size_log2)));
+ *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
+ cq->sw_wptr++;
+}
+
+void
+cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
+{
+ u32 ptr;
+
+ CTR3(KTR_IW_CXGB, "%s wq %p cq %p", __FUNCTION__, wq, cq);
+
+ /* flush RQ */
+ CTR4(KTR_IW_CXGB, "%s rq_rptr %u rq_wptr %u skip count %u", __FUNCTION__,
+ wq->rq_rptr, wq->rq_wptr, count);
+ ptr = wq->rq_rptr + count;
+ while (ptr++ != wq->rq_wptr)
+ insert_recv_cqe(wq, cq);
+}
+
+static void
+insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
+ struct t3_swsq *sqp)
+{
+ struct t3_cqe cqe;
+
+ CTR5(KTR_IW_CXGB, "%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x", __FUNCTION__,
+ wq, cq, cq->sw_rptr, cq->sw_wptr);
+ memset(&cqe, 0, sizeof(cqe));
+ cqe.header = htobe32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
+ V_CQE_OPCODE(sqp->opcode) |
+ V_CQE_TYPE(1) |
+ V_CQE_SWCQE(1) |
+ V_CQE_QPID(wq->qpid) |
+ V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
+ cq->size_log2)));
+ cqe.u.scqe.wrid_hi = sqp->sq_wptr;
+
+ *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
+ cq->sw_wptr++;
+}
+
+void
+cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
+{
+ __u32 ptr;
+ struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
+
+ ptr = wq->sq_rptr + count;
+ sqp += count;
+ while (ptr != wq->sq_wptr) {
+ insert_sq_cqe(wq, cq, sqp);
+ sqp++;
+ ptr++;
+ }
+}
+
+/*
+ * Move all CQEs from the HWCQ into the SWCQ.
+ */
+void
+cxio_flush_hw_cq(struct t3_cq *cq)
+{
+ struct t3_cqe *cqe, *swcqe;
+
+ CTR3(KTR_IW_CXGB, "%s cq %p cqid 0x%x", __FUNCTION__, cq, cq->cqid);
+ cqe = cxio_next_hw_cqe(cq);
+ while (cqe) {
+ CTR3(KTR_IW_CXGB, "%s flushing hwcq rptr 0x%x to swcq wptr 0x%x",
+ __FUNCTION__, cq->rptr, cq->sw_wptr);
+ swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2);
+ *swcqe = *cqe;
+ swcqe->header |= htobe32(V_CQE_SWCQE(1));
+ cq->sw_wptr++;
+ cq->rptr++;
+ cqe = cxio_next_hw_cqe(cq);
+ }
+}
+
+static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq)
+{
+ if (CQE_OPCODE(*cqe) == T3_TERMINATE)
+ return 0;
+
+ if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe))
+ return 0;
+
+ if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe))
+ return 0;
+
+ if ((CQE_OPCODE(*cqe) == T3_SEND) && RQ_TYPE(*cqe) &&
+ Q_EMPTY(wq->rq_rptr, wq->rq_wptr))
+ return 0;
+
+ return 1;
+}
+
+void
+cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
+{
+ struct t3_cqe *cqe;
+ u32 ptr;
+
+ *count = 0;
+ ptr = cq->sw_rptr;
+ while (!Q_EMPTY(ptr, cq->sw_wptr)) {
+ cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
+ if ((SQ_TYPE(*cqe) || (CQE_OPCODE(*cqe) == T3_READ_RESP)) &&
+ (CQE_QPID(*cqe) == wq->qpid))
+ (*count)++;
+ ptr++;
+ }
+ CTR3(KTR_IW_CXGB, "%s cq %p count %d", __FUNCTION__, cq, *count);
+}
+
+void
+cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
+{
+ struct t3_cqe *cqe;
+ u32 ptr;
+
+ *count = 0;
+ CTR2(KTR_IW_CXGB, "%s count zero %d", __FUNCTION__, *count);
+ ptr = cq->sw_rptr;
+ while (!Q_EMPTY(ptr, cq->sw_wptr)) {
+ cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
+ if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) &&
+ (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq))
+ (*count)++;
+ ptr++;
+ }
+ CTR3(KTR_IW_CXGB, "%s cq %p count %d", __FUNCTION__, cq, *count);
+}
+
+static int
+cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
+{
+ struct rdma_cq_setup setup;
+ setup.id = 0;
+ setup.base_addr = 0; /* NULL address */
+ setup.size = 1; /* enable the CQ */
+ setup.credits = 0;
+
+ /* force SGE to redirect to RspQ and interrupt */
+ setup.credit_thres = 0;
+ setup.ovfl_mode = 1;
+ return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
+}
+
+static int
+cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
+{
+ int err;
+ u64 sge_cmd, ctx0, ctx1;
+ u64 base_addr;
+ struct t3_modify_qp_wr *wqe;
+ struct mbuf *m;
+
+ m = m_gethdr(MT_DATA, M_NOWAIT);
+ if (m == NULL) {
+ CTR1(KTR_IW_CXGB, "%s m_gethdr failed", __FUNCTION__);
+ return (-ENOMEM);
+ }
+ err = cxio_hal_init_ctrl_cq(rdev_p);
+ if (err) {
+ CTR2(KTR_IW_CXGB, "%s err %d initializing ctrl_cq", __FUNCTION__, err);
+ goto err;
+ }
+#if 0
+ rdev_p->ctrl_qp.workq = dma_alloc_coherent(
+ rdev_p->rnic_info.pdev,
+ (1 << T3_CTRL_QP_SIZE_LOG2) *
+ sizeof(union t3_wr),
+ &(rdev_p->ctrl_qp.dma_addr),
+ M_NOWAIT);
+#else
+ rdev_p->ctrl_qp.workq = contigmalloc((1 << T3_CTRL_QP_SIZE_LOG2)
+ *sizeof(union t3_wr), M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
+ if (rdev_p->ctrl_qp.workq)
+ rdev_p->ctrl_qp.dma_addr = vtophys(rdev_p->ctrl_qp.workq);
+
+#endif
+
+ if (!rdev_p->ctrl_qp.workq) {
+ CTR1(KTR_IW_CXGB, "%s dma_alloc_coherent failed", __FUNCTION__);
+ err = -ENOMEM;
+ goto err;
+ }
+#if 0
+ pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
+ rdev_p->ctrl_qp.dma_addr);
+#endif
+ rdev_p->ctrl_qp.doorbell = (void /*__iomem */ *)rdev_p->rnic_info.kdb_addr;
+ memset(rdev_p->ctrl_qp.workq, 0,
+ (1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr));
+
+ mtx_init(&rdev_p->ctrl_qp.lock, "ctl-qp lock", NULL, MTX_DEF|MTX_DUPOK);
+
+ /* update HW Ctrl QP context */
+ base_addr = rdev_p->ctrl_qp.dma_addr;
+ base_addr >>= 12;
+ ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) |
+ V_EC_BASE_LO((u32) base_addr & 0xffff));
+ ctx0 <<= 32;
+ ctx0 |= V_EC_CREDITS(FW_WR_NUM);
+ base_addr >>= 16;
+ ctx1 = (u32) base_addr;
+ base_addr >>= 32;
+ ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) |
+ V_EC_TYPE(0) | V_EC_GEN(1) |
+ V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
+ wqe = mtod(m, struct t3_modify_qp_wr *);
+ m->m_len = m->m_pkthdr.len = sizeof(*wqe);
+ memset(wqe, 0, sizeof(*wqe));
+ build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0,
+ T3_CTL_QP_TID, 7);
+ wqe->flags = htobe32(MODQP_WRITE_EC);
+ sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
+ wqe->sge_cmd = htobe64(sge_cmd);
+ wqe->ctx1 = htobe64(ctx1);
+ wqe->ctx0 = htobe64(ctx0);
+ CTR3(KTR_IW_CXGB, "CtrlQP dma_addr 0x%llx workq %p size %d",
+ (unsigned long long) rdev_p->ctrl_qp.dma_addr,
+ rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
+ m_set_priority(m, CPL_PRIORITY_CONTROL);
+ m_set_sgl(m, NULL);
+ m_set_sgllen(m, 0);
+ return (cxgb_ofld_send(rdev_p->t3cdev_p, m));
+err:
+ m_free(m);
+ return err;
+}
+
+static int
+cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
+{
+#if 0
+
+ dma_free_coherent(&(rdev_p->rnic_info.pdev),
+ (1UL << T3_CTRL_QP_SIZE_LOG2)
+ * sizeof(union t3_wr), rdev_p->ctrl_qp.workq,
+ /* pci_unmap_addr(&rdev_p->ctrl_qp, mapping)*/ 0);
+#else
+ contigfree(rdev_p->ctrl_qp.workq,(1UL << T3_CTRL_QP_SIZE_LOG2)
+ * sizeof(union t3_wr), M_DEVBUF);
+#endif
+ return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID);
+}
+
+/* write len bytes of data into addr (32B aligned address)
+ * If data is NULL, clear len byte of memory to zero.
+ * caller aquires the ctrl_qp lock before the call
+ */
+static int
+cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
+ u32 len, void *data, int completion)
+{
+ u32 i, nr_wqe, copy_len;
+ u8 *copy_data;
+ u8 wr_len, utx_len; /* lenght in 8 byte flit */
+ enum t3_wr_flags flag;
+ __be64 *wqe;
+ u64 utx_cmd;
+ addr &= 0x7FFFFFF;
+ nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */
+ CTR6(KTR_IW_CXGB, "cxio_hal_ctrl_qp_write_mem wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x",
+ rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
+ nr_wqe, data, addr);
+ utx_len = 3; /* in 32B unit */
+ for (i = 0; i < nr_wqe; i++) {
+ if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr,
+ T3_CTRL_QP_SIZE_LOG2)) {
+ CTR4(KTR_IW_CXGB, "%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, "
+ "wait for more space i %d", __FUNCTION__,
+ rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
+ if (cxio_wait(&rdev_p->ctrl_qp,
+ &rdev_p->ctrl_qp.lock,
+ !Q_FULL(rdev_p->ctrl_qp.rptr,
+ rdev_p->ctrl_qp.wptr,
+ T3_CTRL_QP_SIZE_LOG2))) {
+ CTR1(KTR_IW_CXGB, "%s ctrl_qp workq interrupted",
+ __FUNCTION__);
+ return (-ERESTART);
+ }
+ CTR2(KTR_IW_CXGB, "%s ctrl_qp wakeup, continue posting work request "
+ "i %d", __FUNCTION__, i);
+ }
+ wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
+ (1 << T3_CTRL_QP_SIZE_LOG2)));
+ flag = 0;
+ if (i == (nr_wqe - 1)) {
+ /* last WQE */
+ flag = completion ? T3_COMPLETION_FLAG : 0;
+ if (len % 32)
+ utx_len = len / 32 + 1;
+ else
+ utx_len = len / 32;
+ }
+
+ /*
+ * Force a CQE to return the credit to the workq in case
+ * we posted more than half the max QP size of WRs
+ */
+ if ((i != 0) &&
+ (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) {
+ flag = T3_COMPLETION_FLAG;
+ CTR2(KTR_IW_CXGB, "%s force completion at i %d", __FUNCTION__, i);
+ }
+
+ /* build the utx mem command */
+ wqe += (sizeof(struct t3_bypass_wr) >> 3);
+ utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3);
+ utx_cmd <<= 32;
+ utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1);
+ *wqe = htobe64(utx_cmd);
+ wqe++;
+ copy_data = (u8 *) data + i * 96;
+ copy_len = len > 96 ? 96 : len;
+
+ /* clear memory content if data is NULL */
+ if (data)
+ memcpy(wqe, copy_data, copy_len);
+ else
+ memset(wqe, 0, copy_len);
+ if (copy_len % 32)
+ memset(((u8 *) wqe) + copy_len, 0,
+ 32 - (copy_len % 32));
+ wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 +
+ (utx_len << 2);
+ wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
+ (1 << T3_CTRL_QP_SIZE_LOG2)));
+
+ /* wptr in the WRID[31:0] */
+ ((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr;
+
+ /*
+ * This must be the last write with a memory barrier
+ * for the genbit
+ */
+ build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag,
+ Q_GENBIT(rdev_p->ctrl_qp.wptr,
+ T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID,
+ wr_len);
+ if (flag == T3_COMPLETION_FLAG)
+ ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID);
+
+ len -= 96;
+ rdev_p->ctrl_qp.wptr++;
+ }
+ return 0;
+}
+
+/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size
+ * OUT: stag index, actual pbl_size, pbl_addr allocated.
+ * TBD: shared memory region support
+ */
+static int
+__cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
+ u32 *stag, u8 stag_state, u32 pdid,
+ enum tpt_mem_type type, enum tpt_mem_perm perm,
+ u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl,
+ u32 *pbl_size, u32 *pbl_addr)
+{
+ int err;
+ struct tpt_entry tpt;
+ u32 stag_idx;
+ u32 wptr;
+ int rereg = (*stag != T3_STAG_UNSET);
+
+ stag_state = stag_state > 0;
+ stag_idx = (*stag) >> 8;
+
+ if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) {
+ stag_idx = cxio_hal_get_stag(rdev_p->rscp);
+ if (!stag_idx)
+ return (-ENOMEM);
+ *stag = (stag_idx << 8) | ((*stag) & 0xFF);
+ }
+ CTR5(KTR_IW_CXGB, "%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x",
+ __FUNCTION__, stag_state, type, pdid, stag_idx);
+
+ if (reset_tpt_entry)
+ cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3);
+ else if (!rereg) {
+ *pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3);
+ if (!*pbl_addr) {
+ return (-ENOMEM);
+ }
+ }
+
+ mtx_lock(&rdev_p->ctrl_qp.lock);
+
+ /* write PBL first if any - update pbl only if pbl list exist */
+ if (pbl) {
+
+ CTR4(KTR_IW_CXGB, "%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d",
+ __FUNCTION__, *pbl_addr, rdev_p->rnic_info.pbl_base,
+ *pbl_size);
+ err = cxio_hal_ctrl_qp_write_mem(rdev_p,
+ (*pbl_addr >> 5),
+ (*pbl_size << 3), pbl, 0);
+ if (err)
+ goto ret;
+ }
+
+ /* write TPT entry */
+ if (reset_tpt_entry)
+ memset(&tpt, 0, sizeof(tpt));
+ else {
+ tpt.valid_stag_pdid = htobe32(F_TPT_VALID |
+ V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) |
+ V_TPT_STAG_STATE(stag_state) |
+ V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid));
+ PANIC_IF(page_size >= 28);
+ tpt.flags_pagesize_qpid = htobe32(V_TPT_PERM(perm) |
+ F_TPT_MW_BIND_ENABLE |
+ V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
+ V_TPT_PAGE_SIZE(page_size));
+ tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
+ htobe32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3));
+ tpt.len = htobe32(len);
+ tpt.va_hi = htobe32((u32) (to >> 32));
+ tpt.va_low_or_fbo = htobe32((u32) (to & 0xFFFFFFFFULL));
+ tpt.rsvd_bind_cnt_or_pstag = 0;
+ tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
+ htobe32(V_TPT_PBL_SIZE((*pbl_size) >> 2));
+ }
+ err = cxio_hal_ctrl_qp_write_mem(rdev_p,
+ stag_idx +
+ (rdev_p->rnic_info.tpt_base >> 5),
+ sizeof(tpt), &tpt, 1);
+
+ /* release the stag index to free pool */
+ if (reset_tpt_entry)
+ cxio_hal_put_stag(rdev_p->rscp, stag_idx);
+ret:
+ wptr = rdev_p->ctrl_qp.wptr;
+ mtx_unlock(&rdev_p->ctrl_qp.lock);
+ if (!err)
+ if (cxio_wait(&rdev_p->ctrl_qp,
+ &rdev_p->ctrl_qp.lock,
+ SEQ32_GE(rdev_p->ctrl_qp.rptr, wptr)))
+ return (-ERESTART);
+ return err;
+}
+
+int
+cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
+ enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+ u8 page_size, __be64 *pbl, u32 *pbl_size,
+ u32 *pbl_addr)
+{
+ *stag = T3_STAG_UNSET;
+ return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
+ zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
+}
+
+int
+cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
+ enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+ u8 page_size, __be64 *pbl, u32 *pbl_size,
+ u32 *pbl_addr)
+{
+ return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
+ zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
+}
+
+int
+cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
+ u32 pbl_addr)
+{
+ return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
+ &pbl_size, &pbl_addr);
+}
+
+int
+cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
+{
+ u32 pbl_size = 0;
+ *stag = T3_STAG_UNSET;
+ return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
+ NULL, &pbl_size, NULL);
+}
+
+int
+cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
+{
+ return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
+ NULL, NULL);
+}
+
+int
+cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
+{
+ struct t3_rdma_init_wr *wqe;
+ struct mbuf *m = m_gethdr(MT_DATA, M_NOWAIT);
+ if (m == NULL)
+ return (-ENOMEM);
+ CTR2(KTR_IW_CXGB, "%s rdev_p %p", __FUNCTION__, rdev_p);
+ wqe = mtod(m, struct t3_rdma_init_wr *);
+ m->m_len = m->m_pkthdr.len = sizeof(*wqe);
+ wqe->wrh.op_seop_flags = htobe32(V_FW_RIWR_OP(T3_WR_INIT));
+ wqe->wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(attr->tid) |
+ V_FW_RIWR_LEN(sizeof(*wqe) >> 3));
+ wqe->wrid.id1 = 0;
+ wqe->qpid = htobe32(attr->qpid);
+ wqe->pdid = htobe32(attr->pdid);
+ wqe->scqid = htobe32(attr->scqid);
+ wqe->rcqid = htobe32(attr->rcqid);
+ wqe->rq_addr = htobe32(attr->rq_addr - rdev_p->rnic_info.rqt_base);
+ wqe->rq_size = htobe32(attr->rq_size);
+ wqe->mpaattrs = attr->mpaattrs;
+ wqe->qpcaps = attr->qpcaps;
+ wqe->ulpdu_size = htobe16(attr->tcp_emss);
+ wqe->flags = htobe32(attr->flags);
+ wqe->ord = htobe32(attr->ord);
+ wqe->ird = htobe32(attr->ird);
+ wqe->qp_dma_addr = htobe64(attr->qp_dma_addr);
+ wqe->qp_dma_size = htobe32(attr->qp_dma_size);
+ wqe->irs = htobe32(attr->irs);
+ m_set_priority(m, 0); /* 0=>ToeQ; 1=>CtrlQ */
+ m_set_sgl(m, NULL);
+ m_set_sgllen(m, 0);
+ return (cxgb_ofld_send(rdev_p->t3cdev_p, m));
+}
+
+void
+cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
+{
+ cxio_ev_cb = ev_cb;
+}
+
+void
+cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
+{
+ cxio_ev_cb = NULL;
+}
+
+static int
+cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct mbuf *m)
+{
+ static int cnt;
+ struct cxio_rdev *rdev_p = NULL;
+ struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) m->m_data;
+
+ CTR6(KTR_IW_CXGB, "%s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x",
+ __FUNCTION__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
+ RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg));
+ CTR4(KTR_IW_CXGB, "se %0x notify %0x cqbranch %0x creditth %0x",
+ RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
+ RSPQ_CREDIT_THRESH(rsp_msg));
+ CTR4(KTR_IW_CXGB, "CQE: QPID 0x%0x type 0x%0x status 0x%0x opcode %d",
+ CQE_QPID(rsp_msg->cqe),
+ CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
+ CQE_OPCODE(rsp_msg->cqe));
+ CTR3(KTR_IW_CXGB, "len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x",
+ CQE_LEN(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
+ rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
+ if (!rdev_p) {
+ CTR2(KTR_IW_CXGB, "%s called by t3cdev %p with null ulp", __FUNCTION__,
+ t3cdev_p);
+ return 0;
+ }
+ if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) {
+ mtx_lock(&rdev_p->ctrl_qp.lock);
+ rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1;
+ wakeup(&rdev_p->ctrl_qp);
+ mtx_unlock(&rdev_p->ctrl_qp.lock);
+ m_free(m);
+ } else if (CQE_QPID(rsp_msg->cqe) == 0xfff8)
+ m_free(m);
+ else if (cxio_ev_cb)
+ (*cxio_ev_cb) (rdev_p, m);
+ else
+ m_free(m);
+ cnt++;
+ return 0;
+}
+
+/* Caller takes care of locking if needed */
+int
+cxio_rdev_open(struct cxio_rdev *rdev_p)
+{
+ struct ifnet *ifp;
+ int err = 0;
+
+ if (strlen(rdev_p->dev_name)) {
+ if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) {
+ return (-EBUSY);
+ }
+ ifp = rdev_p->ifp;
+ if (ifp == NULL)
+ return (-EINVAL);
+ if_free(ifp);
+ } else if (rdev_p->t3cdev_p) {
+ if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p))
+ return (-EBUSY);
+ ifp = rdev_p->t3cdev_p->lldev;
+ strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
+ T3_MAX_DEV_NAME_LEN);
+ } else {
+ CTR1(KTR_IW_CXGB, "%s t3cdev_p or dev_name must be set", __FUNCTION__);
+ return (-EINVAL);
+ }
+
+ TAILQ_INSERT_TAIL(&rdev_list, rdev_p, entry);
+
+ CTR2(KTR_IW_CXGB, "%s opening rnic dev %s", __FUNCTION__, rdev_p->dev_name);
+ memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
+ if (!rdev_p->t3cdev_p)
+ rdev_p->t3cdev_p = T3CDEV(ifp);
+ rdev_p->t3cdev_p->ulp = (void *) rdev_p;
+ err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
+ &(rdev_p->rnic_info));
+ if (err) {
+ log(LOG_ERR, "%s t3cdev_p(%p)->ctl returned error %d.\n",
+ __FUNCTION__, rdev_p->t3cdev_p, err);
+ goto err1;
+ }
+ err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS,
+ &(rdev_p->port_info));
+ if (err) {
+ log(LOG_ERR, "%s t3cdev_p(%p)->ctl returned error %d.\n",
+ __FUNCTION__, rdev_p->t3cdev_p, err);
+ goto err1;
+ }
+
+ /*
+ * qpshift is the number of bits to shift the qpid left in order
+ * to get the correct address of the doorbell for that qp.
+ */
+ cxio_init_ucontext(rdev_p, &rdev_p->uctx);
+ rdev_p->qpshift = PAGE_SHIFT -
+ ilog2(65536 >>
+ ilog2(rdev_p->rnic_info.udbell_len >>
+ PAGE_SHIFT));
+ rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT;
+ rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
+ CTR4(KTR_IW_CXGB, "cxio_rdev_open rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d",
+ rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
+ rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p));
+ CTR4(KTR_IW_CXGB, "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x",
+ rdev_p->rnic_info.pbl_base,
+ rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
+ rdev_p->rnic_info.rqt_top);
+ CTR6(KTR_IW_CXGB, "udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu "
+ "qpnr %d qpmask 0x%x",
+ rdev_p->rnic_info.udbell_len,
+ rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr,
+ rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask);
+
+ err = cxio_hal_init_ctrl_qp(rdev_p);
+ if (err) {
+ log(LOG_ERR, "%s error %d initializing ctrl_qp.\n",
+ __FUNCTION__, err);
+ goto err1;
+ }
+ err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0,
+ 0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ,
+ T3_MAX_NUM_PD);
+ if (err) {
+ log(LOG_ERR, "%s error %d initializing hal resources.\n",
+ __FUNCTION__, err);
+ goto err2;
+ }
+ err = cxio_hal_pblpool_create(rdev_p);
+ if (err) {
+ log(LOG_ERR, "%s error %d initializing pbl mem pool.\n",
+ __FUNCTION__, err);
+ goto err3;
+ }
+ err = cxio_hal_rqtpool_create(rdev_p);
+ if (err) {
+ log(LOG_ERR, "%s error %d initializing rqt mem pool.\n",
+ __FUNCTION__, err);
+ goto err4;
+ }
+ return 0;
+err4:
+ cxio_hal_pblpool_destroy(rdev_p);
+err3:
+ cxio_hal_destroy_resource(rdev_p->rscp);
+err2:
+ cxio_hal_destroy_ctrl_qp(rdev_p);
+err1:
+ TAILQ_REMOVE(&rdev_list, rdev_p, entry);
+ return err;
+}
+
+void
+cxio_rdev_close(struct cxio_rdev *rdev_p)
+{
+ if (rdev_p) {
+ cxio_hal_pblpool_destroy(rdev_p);
+ cxio_hal_rqtpool_destroy(rdev_p);
+ TAILQ_REMOVE(&rdev_list, rdev_p, entry);
+ rdev_p->t3cdev_p->ulp = NULL;
+ cxio_hal_destroy_ctrl_qp(rdev_p);
+ cxio_hal_destroy_resource(rdev_p->rscp);
+ }
+}
+
+int
+cxio_hal_init(void)
+{
+ TAILQ_INIT(&rdev_list);
+#ifdef needed
+ if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI))
+ return (-ENOMEM);
+#endif
+ t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
+ return 0;
+}
+
+void
+cxio_hal_exit(void)
+{
+ struct cxio_rdev *rdev, *tmp;
+
+ t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL);
+ TAILQ_FOREACH_SAFE(rdev, &rdev_list, entry, tmp)
+ cxio_rdev_close(rdev);
+#ifdef needed
+ cxio_hal_destroy_rhdl_resource();
+#endif
+}
+
+static void
+flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
+{
+ struct t3_swsq *sqp;
+ __u32 ptr = wq->sq_rptr;
+ int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr);
+
+ sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
+ while (count--)
+ if (!sqp->signaled) {
+ ptr++;
+ sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
+ } else if (sqp->complete) {
+
+ /*
+ * Insert this completed cqe into the swcq.
+ */
+ CTR3(KTR_IW_CXGB, "%s moving cqe into swcq sq idx %ld cq idx %ld",
+ __FUNCTION__, Q_PTR2IDX(ptr, wq->sq_size_log2),
+ Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
+ sqp->cqe.header |= htonl(V_CQE_SWCQE(1));
+ *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2))
+ = sqp->cqe;
+ cq->sw_wptr++;
+ sqp->signaled = 0;
+ break;
+ } else
+ break;
+}
+
+static void
+create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe,
+ struct t3_cqe *read_cqe)
+{
+ read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr;
+ read_cqe->len = wq->oldest_read->read_len;
+ read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) |
+ V_CQE_SWCQE(SW_CQE(*hw_cqe)) |
+ V_CQE_OPCODE(T3_READ_REQ) |
+ V_CQE_TYPE(1));
+}
+
+/*
+ * Return a ptr to the next read wr in the SWSQ or NULL.
+ */
+static void
+advance_oldest_read(struct t3_wq *wq)
+{
+
+ u32 rptr = wq->oldest_read - wq->sq + 1;
+ u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2);
+
+ while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) {
+ wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2);
+
+ if (wq->oldest_read->opcode == T3_READ_REQ)
+ return;
+ rptr++;
+ }
+ wq->oldest_read = NULL;
+}
+
+/*
+ * cxio_poll_cq
+ *
+ * Caller must:
+ * check the validity of the first CQE,
+ * supply the wq assicated with the qpid.
+ *
+ * credit: cq credit to return to sge.
+ * cqe_flushed: 1 iff the CQE is flushed.
+ * cqe: copy of the polled CQE.
+ *
+ * return value:
+ * 0 CQE returned,
+ * -1 CQE skipped, try again.
+ */
+int
+cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
+ u8 *cqe_flushed, u64 *cookie, u32 *credit)
+{
+ int ret = 0;
+ struct t3_cqe *hw_cqe, read_cqe;
+
+ *cqe_flushed = 0;
+ *credit = 0;
+ hw_cqe = cxio_next_cqe(cq);
+
+ CTR5(KTR_IW_CXGB, "cxio_poll_cq CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x",
+ CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
+ CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe));
+ CTR4(KTR_IW_CXGB, "opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x",
+ CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
+ CQE_WRID_LOW(*hw_cqe));
+
+ /*
+ * skip cqe's not affiliated with a QP.
+ */
+ if (wq == NULL) {
+ ret = -1;
+ goto skip_cqe;
+ }
+
+ /*
+ * Gotta tweak READ completions:
+ * 1) the cqe doesn't contain the sq_wptr from the wr.
+ * 2) opcode not reflected from the wr.
+ * 3) read_len not reflected from the wr.
+ * 4) cq_type is RQ_TYPE not SQ_TYPE.
+ */
+ if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) {
+
+ /*
+ * Don't write to the HWCQ, so create a new read req CQE
+ * in local memory.
+ */
+ create_read_req_cqe(wq, hw_cqe, &read_cqe);
+ hw_cqe = &read_cqe;
+ advance_oldest_read(wq);
+ }
+
+ /*
+ * T3A: Discard TERMINATE CQEs.
+ */
+ if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) {
+ ret = -1;
+ wq->error = 1;
+ goto skip_cqe;
+ }
+
+ if (CQE_STATUS(*hw_cqe) || wq->error) {
+ *cqe_flushed = wq->error;
+ wq->error = 1;
+
+ /*
+ * T3A inserts errors into the CQE. We cannot return
+ * these as work completions.
+ */
+ /* incoming write failures */
+ if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE)
+ && RQ_TYPE(*hw_cqe)) {
+ ret = -1;
+ goto skip_cqe;
+ }
+ /* incoming read request failures */
+ if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) {
+ ret = -1;
+ goto skip_cqe;
+ }
+
+ /* incoming SEND with no receive posted failures */
+ if ((CQE_OPCODE(*hw_cqe) == T3_SEND) && RQ_TYPE(*hw_cqe) &&
+ Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
+ ret = -1;
+ goto skip_cqe;
+ }
+ goto proc_cqe;
+ }
+
+ /*
+ * RECV completion.
+ */
+ if (RQ_TYPE(*hw_cqe)) {
+
+ /*
+ * HW only validates 4 bits of MSN. So we must validate that
+ * the MSN in the SEND is the next expected MSN. If its not,
+ * then we complete this with TPT_ERR_MSN and mark the wq in
+ * error.
+ */
+ if (__predict_false((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) {
+ wq->error = 1;
+ hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN));
+ goto proc_cqe;
+ }
+ goto proc_cqe;
+ }
+
+ /*
+ * If we get here its a send completion.
+ *
+ * Handle out of order completion. These get stuffed
+ * in the SW SQ. Then the SW SQ is walked to move any
+ * now in-order completions into the SW CQ. This handles
+ * 2 cases:
+ * 1) reaping unsignaled WRs when the first subsequent
+ * signaled WR is completed.
+ * 2) out of order read completions.
+ */
+ if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) {
+ struct t3_swsq *sqp;
+
+ CTR2(KTR_IW_CXGB, "%s out of order completion going in swsq at idx %ld",
+ __FUNCTION__,
+ Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2));
+ sqp = wq->sq +
+ Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2);
+ sqp->cqe = *hw_cqe;
+ sqp->complete = 1;
+ ret = -1;
+ goto flush_wq;
+ }
+
+proc_cqe:
+ *cqe = *hw_cqe;
+
+ /*
+ * Reap the associated WR(s) that are freed up with this
+ * completion.
+ */
+ if (SQ_TYPE(*hw_cqe)) {
+ wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
+ CTR2(KTR_IW_CXGB, "%s completing sq idx %ld", __FUNCTION__,
+ Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
+ *cookie = (wq->sq +
+ Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id;
+ wq->sq_rptr++;
+ } else {
+ CTR2(KTR_IW_CXGB, "%s completing rq idx %ld", __FUNCTION__,
+ Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
+ *cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
+ wq->rq_rptr++;
+ }
+
+flush_wq:
+ /*
+ * Flush any completed cqes that are now in-order.
+ */
+ flush_completed_wrs(wq, cq);
+
+skip_cqe:
+ if (SW_CQE(*hw_cqe)) {
+ CTR4(KTR_IW_CXGB, "%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x",
+ __FUNCTION__, cq, cq->cqid, cq->sw_rptr);
+ ++cq->sw_rptr;
+ } else {
+ CTR4(KTR_IW_CXGB, "%s cq %p cqid 0x%x skip hw cqe rptr 0x%x",
+ __FUNCTION__, cq, cq->cqid, cq->rptr);
+ ++cq->rptr;
+
+ /*
+ * T3A: compute credits.
+ */
+ if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1)))
+ || ((cq->rptr - cq->wptr) >= 128)) {
+ *credit = cq->rptr - cq->wptr;
+ cq->wptr = cq->rptr;
+ }
+ }
+ return ret;
+}
+
+
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h
new file mode 100644
index 0000000000000..6a401e09322d7
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h
@@ -0,0 +1,330 @@
+/**************************************************************************
+
+Copyright (c) 2007, 2008 Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+$FreeBSD$
+
+***************************************************************************/
+#ifndef __CXIO_HAL_H__
+#define __CXIO_HAL_H__
+#include <sys/condvar.h>
+#include <sys/ktr.h>
+
+#define T3_CTRL_QP_ID FW_RI_SGEEC_START
+#define T3_CTL_QP_TID FW_RI_TID_START
+#define T3_CTRL_QP_SIZE_LOG2 8
+#define T3_CTRL_CQ_ID 0
+
+/* TBD */
+#define T3_MAX_NUM_RI (1<<15)
+#define T3_MAX_NUM_QP (1<<15)
+#define T3_MAX_NUM_CQ (1<<15)
+#define T3_MAX_NUM_PD (1<<15)
+#define T3_MAX_PBL_SIZE 256
+#define T3_MAX_RQ_SIZE 1024
+#define T3_MAX_NUM_STAG (1<<15)
+
+#define T3_STAG_UNSET 0xffffffff
+
+#define T3_MAX_DEV_NAME_LEN 32
+
+struct cxio_hal_ctrl_qp {
+ u32 wptr;
+ u32 rptr;
+ struct mtx lock; /* for the wtpr, can sleep */
+#ifdef notyet
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+#endif
+ union t3_wr *workq; /* the work request queue */
+ bus_addr_t dma_addr; /* pci bus address of the workq */
+ void /* __iomem */ *doorbell;
+};
+
+struct cxio_hal_resource {
+ struct buf_ring *tpt_fifo;
+ struct mtx tpt_fifo_lock;
+ struct buf_ring *qpid_fifo;
+ struct mtx qpid_fifo_lock;
+ struct buf_ring *cqid_fifo;
+ struct mtx cqid_fifo_lock;
+ struct buf_ring *pdid_fifo;
+ struct mtx pdid_fifo_lock;
+};
+
+struct cxio_qpid {
+ TAILQ_ENTRY(cxio_qpid) entry;
+ u32 qpid;
+};
+
+struct cxio_ucontext {
+ TAILQ_HEAD(, cxio_qpid) qpids;
+ struct mtx lock;
+};
+
+struct cxio_rdev {
+ char dev_name[T3_MAX_DEV_NAME_LEN];
+ struct t3cdev *t3cdev_p;
+ struct rdma_info rnic_info;
+ struct adap_ports port_info;
+ struct cxio_hal_resource *rscp;
+ struct cxio_hal_ctrl_qp ctrl_qp;
+ void *ulp;
+ unsigned long qpshift;
+ u32 qpnr;
+ u32 qpmask;
+ struct cxio_ucontext uctx;
+ struct gen_pool *pbl_pool;
+ struct gen_pool *rqt_pool;
+ struct ifnet *ifp;
+ TAILQ_ENTRY(cxio_rdev) entry;
+};
+
+static __inline int
+cxio_num_stags(struct cxio_rdev *rdev_p)
+{
+ return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5));
+}
+
+typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p,
+ struct mbuf * m);
+
+#define RSPQ_CQID(rsp) (be32toh(rsp->cq_ptrid) & 0xffff)
+#define RSPQ_CQPTR(rsp) ((be32toh(rsp->cq_ptrid) >> 16) & 0xffff)
+#define RSPQ_GENBIT(rsp) ((be32toh(rsp->flags) >> 16) & 1)
+#define RSPQ_OVERFLOW(rsp) ((be32toh(rsp->flags) >> 17) & 1)
+#define RSPQ_AN(rsp) ((be32toh(rsp->flags) >> 18) & 1)
+#define RSPQ_SE(rsp) ((be32toh(rsp->flags) >> 19) & 1)
+#define RSPQ_NOTIFY(rsp) ((be32toh(rsp->flags) >> 20) & 1)
+#define RSPQ_CQBRANCH(rsp) ((be32toh(rsp->flags) >> 21) & 1)
+#define RSPQ_CREDIT_THRESH(rsp) ((be32toh(rsp->flags) >> 22) & 1)
+
+struct respQ_msg_t {
+ __be32 flags; /* flit 0 */
+ __be32 cq_ptrid;
+ __be64 rsvd; /* flit 1 */
+ struct t3_cqe cqe; /* flits 2-3 */
+};
+
+enum t3_cq_opcode {
+ CQ_ARM_AN = 0x2,
+ CQ_ARM_SE = 0x6,
+ CQ_FORCE_AN = 0x3,
+ CQ_CREDIT_UPDATE = 0x7
+};
+
+int cxio_rdev_open(struct cxio_rdev *rdev);
+void cxio_rdev_close(struct cxio_rdev *rdev);
+int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
+ enum t3_cq_opcode op, u32 credit);
+int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
+void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
+int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
+ struct cxio_ucontext *uctx);
+int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
+ struct cxio_ucontext *uctx);
+int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
+int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
+ enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+ u8 page_size, __be64 *pbl, u32 *pbl_size,
+ u32 *pbl_addr);
+int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
+ enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+ u8 page_size, __be64 *pbl, u32 *pbl_size,
+ u32 *pbl_addr);
+int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
+ u32 pbl_addr);
+int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
+int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
+int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
+void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
+void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
+u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
+void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
+int cxio_hal_init(void);
+void cxio_hal_exit(void);
+void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
+void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
+void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
+void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
+void cxio_flush_hw_cq(struct t3_cq *cq);
+int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
+ u8 *cqe_flushed, u64 *cookie, u32 *credit);
+
+#define MOD "iw_cxgb: "
+
+#ifdef DEBUG
+void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag);
+void cxio_dump_pbl(struct cxio_rdev *rev, u32 pbl_addr, uint32_t len, u8 shift);
+void cxio_dump_wqe(union t3_wr *wqe);
+void cxio_dump_wce(struct t3_cqe *wce);
+void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents);
+void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid);
+#endif
+
+
+ static unsigned char hiBitSetTab[] = {
+ 0, 1, 2, 2, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7
+
+};
+
+
+static __inline
+int ilog2(unsigned long val)
+{
+ unsigned long tmp;
+
+ tmp = val >> 24;
+ if (tmp) {
+ return hiBitSetTab[tmp] + 23;
+ }
+ tmp = (val >> 16) & 0xff;
+ if (tmp) {
+ return hiBitSetTab[tmp] + 15;
+ }
+ tmp = (val >> 8) & 0xff;
+ if (tmp) {
+ return hiBitSetTab[tmp] + 7;
+
+ }
+ return hiBitSetTab[val & 0xff] - 1;
+}
+
+#define cxfree(a) free((a), M_DEVBUF);
+#define kmalloc(a, b) malloc((a), M_DEVBUF, (b))
+#define kzalloc(a, b) malloc((a), M_DEVBUF, (b)|M_ZERO)
+
+static __inline __attribute__((const))
+unsigned long roundup_pow_of_two(unsigned long n)
+{
+ return 1UL << flsl(n - 1);
+}
+
+#define PAGE_ALIGN(x) roundup2((x), PAGE_SIZE)
+
+#include <sys/blist.h>
+struct gen_pool {
+ blist_t gen_list;
+ daddr_t gen_base;
+ int gen_chunk_shift;
+ struct mtx gen_lock;
+};
+
+static __inline struct gen_pool *
+gen_pool_create(daddr_t base, u_int chunk_shift, u_int len)
+{
+ struct gen_pool *gp;
+
+ gp = malloc(sizeof(struct gen_pool), M_DEVBUF, M_NOWAIT);
+ if (gp == NULL)
+ return (NULL);
+
+ gp->gen_list = blist_create(len >> chunk_shift, M_NOWAIT);
+ if (gp->gen_list == NULL) {
+ free(gp, M_DEVBUF);
+ return (NULL);
+ }
+ blist_free(gp->gen_list, 0, len >> chunk_shift);
+ gp->gen_base = base;
+ gp->gen_chunk_shift = chunk_shift;
+ mtx_init(&gp->gen_lock, "genpool", NULL, MTX_DUPOK|MTX_DEF);
+
+ return (gp);
+}
+
+static __inline unsigned long
+gen_pool_alloc(struct gen_pool *gp, int size)
+{
+ int chunks;
+ daddr_t blkno;
+
+ chunks = (size + (1<<gp->gen_chunk_shift) - 1) >> gp->gen_chunk_shift;
+ mtx_lock(&gp->gen_lock);
+ blkno = blist_alloc(gp->gen_list, chunks);
+ mtx_unlock(&gp->gen_lock);
+
+ if (blkno == SWAPBLK_NONE)
+ return (0);
+
+ return (gp->gen_base + ((1 << gp->gen_chunk_shift) * blkno));
+}
+
+static __inline void
+gen_pool_free(struct gen_pool *gp, daddr_t address, int size)
+{
+ int chunks;
+ daddr_t blkno;
+
+ chunks = (size + (1<<gp->gen_chunk_shift) - 1) >> gp->gen_chunk_shift;
+ blkno = (address - gp->gen_base) / (1 << gp->gen_chunk_shift);
+ mtx_lock(&gp->gen_lock);
+ blist_free(gp->gen_list, blkno, chunks);
+ mtx_unlock(&gp->gen_lock);
+}
+
+static __inline void
+gen_pool_destroy(struct gen_pool *gp)
+{
+ blist_destroy(gp->gen_list);
+ free(gp, M_DEVBUF);
+}
+
+#define cxio_wait(ctx, lockp, cond) \
+({ \
+ int __ret = 0; \
+ mtx_lock(lockp); \
+ while (!cond) { \
+ msleep(ctx, lockp, 0, "cxio_wait", hz); \
+ if (SIGPENDING(curthread)) { \
+ __ret = ERESTART; \
+ break; \
+ } \
+ } \
+ mtx_unlock(lockp); \
+ __ret; \
+})
+extern struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev);
+
+#define KTR_IW_CXGB KTR_SPARE4
+
+#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c
new file mode 100644
index 0000000000000..df06f87a9805c
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c
@@ -0,0 +1,219 @@
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/module.h>
+#include <sys/pciio.h>
+#include <sys/conf.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <sys/bus_dma.h>
+#include <sys/rman.h>
+#include <sys/ioccom.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/linker.h>
+#include <sys/firmware.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/libkern.h>
+
+#include <netinet/in.h>
+
+#include <contrib/rdma/ib_verbs.h>
+#include <contrib/rdma/ib_umem.h>
+#include <contrib/rdma/ib_user_verbs.h>
+
+
+#ifdef CONFIG_DEFINED
+#include <cxgb_include.h>
+#include <ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <ulp/iw_cxgb/iw_cxgb.h>
+#include <ulp/iw_cxgb/iw_cxgb_resource.h>
+#include <ulp/iw_cxgb/iw_cxgb_user.h>
+#else
+#include <dev/cxgb/cxgb_include.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h>
+#endif
+
+
+int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ struct iwch_mr *mhp,
+ int shift,
+ __be64 *page_list)
+{
+ u32 stag;
+ u32 mmid;
+
+
+ if (cxio_register_phys_mem(&rhp->rdev,
+ &stag, mhp->attr.pdid,
+ mhp->attr.perms,
+ mhp->attr.zbva,
+ mhp->attr.va_fbo,
+ mhp->attr.len,
+ shift-12,
+ page_list,
+ &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
+ return (-ENOMEM);
+ mhp->attr.state = 1;
+ mhp->attr.stag = stag;
+ mmid = stag >> 8;
+ mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+ insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+ CTR3(KTR_IW_CXGB, "%s mmid 0x%x mhp %p", __FUNCTION__, mmid, mhp);
+ return 0;
+}
+
+int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ struct iwch_mr *mhp,
+ int shift,
+ __be64 *page_list,
+ int npages)
+{
+ u32 stag;
+ u32 mmid;
+
+
+ /* We could support this... */
+ if (npages > mhp->attr.pbl_size)
+ return (-ENOMEM);
+
+ stag = mhp->attr.stag;
+ if (cxio_reregister_phys_mem(&rhp->rdev,
+ &stag, mhp->attr.pdid,
+ mhp->attr.perms,
+ mhp->attr.zbva,
+ mhp->attr.va_fbo,
+ mhp->attr.len,
+ shift-12,
+ page_list,
+ &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
+ return (-ENOMEM);
+ mhp->attr.state = 1;
+ mhp->attr.stag = stag;
+ mmid = stag >> 8;
+ mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+ insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+ CTR3(KTR_IW_CXGB, "%s mmid 0x%x mhp %p", __FUNCTION__, mmid, mhp);
+ return 0;
+}
+
+int build_phys_page_list(struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ u64 *iova_start,
+ u64 *total_size,
+ int *npages,
+ int *shift,
+ __be64 **page_list)
+{
+ u64 mask;
+ int i, j, n;
+
+ mask = 0;
+ *total_size = 0;
+ for (i = 0; i < num_phys_buf; ++i) {
+ if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
+ return (-EINVAL);
+ if (i != 0 && i != num_phys_buf - 1 &&
+ (buffer_list[i].size & ~PAGE_MASK))
+ return (-EINVAL);
+ *total_size += buffer_list[i].size;
+ if (i > 0)
+ mask |= buffer_list[i].addr;
+ else
+ mask |= buffer_list[i].addr & PAGE_MASK;
+ if (i != num_phys_buf - 1)
+ mask |= buffer_list[i].addr + buffer_list[i].size;
+ else
+ mask |= (buffer_list[i].addr + buffer_list[i].size +
+ PAGE_SIZE - 1) & PAGE_MASK;
+ }
+
+ if (*total_size > 0xFFFFFFFFULL)
+ return (-ENOMEM);
+
+ /* Find largest page shift we can use to cover buffers */
+ for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
+ if ((1ULL << *shift) & mask)
+ break;
+
+ buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
+ buffer_list[0].addr &= ~0ull << *shift;
+
+ *npages = 0;
+ for (i = 0; i < num_phys_buf; ++i)
+ *npages += (buffer_list[i].size +
+ (1ULL << *shift) - 1) >> *shift;
+
+ if (!*npages)
+ return (-EINVAL);
+
+ *page_list = kmalloc(sizeof(u64) * *npages, M_NOWAIT);
+ if (!*page_list)
+ return (-ENOMEM);
+
+ n = 0;
+ for (i = 0; i < num_phys_buf; ++i)
+ for (j = 0;
+ j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
+ ++j)
+ (*page_list)[n++] = htobe64(buffer_list[i].addr +
+ ((u64) j << *shift));
+
+ CTR6(KTR_IW_CXGB, "%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d",
+ __FUNCTION__, (unsigned long long) *iova_start,
+ (unsigned long long) mask, *shift, (unsigned long long) *total_size,
+ *npages);
+
+ return 0;
+
+}
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
new file mode 100644
index 0000000000000..4ef7dc5f8c979
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
@@ -0,0 +1,1295 @@
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/module.h>
+#include <sys/pciio.h>
+#include <sys/conf.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <sys/bus_dma.h>
+#include <sys/rman.h>
+#include <sys/ioccom.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/linker.h>
+#include <sys/firmware.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+
+#include <netinet/in.h>
+
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <contrib/rdma/ib_verbs.h>
+#include <contrib/rdma/ib_umem.h>
+#include <contrib/rdma/ib_user_verbs.h>
+
+
+
+
+#ifdef CONFIG_DEFINED
+#include <cxgb_include.h>
+#include <ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <ulp/iw_cxgb/iw_cxgb.h>
+#include <ulp/iw_cxgb/iw_cxgb_resource.h>
+#include <ulp/iw_cxgb/iw_cxgb_user.h>
+#else
+#include <dev/cxgb/cxgb_include.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h>
+#endif
+
+
+static int
+iwch_modify_port(struct ib_device *ibdev,
+ u8 port, int port_modify_mask,
+ struct ib_port_modify *props)
+{
+ return (-ENOSYS);
+}
+
+static struct ib_ah *
+iwch_ah_create(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr)
+{
+ return ERR_PTR(-ENOSYS);
+}
+
+static int
+iwch_ah_destroy(struct ib_ah *ah)
+{
+ return (-ENOSYS);
+}
+
+static int iwch_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ return (-ENOSYS);
+}
+
+static int
+iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ return (-ENOSYS);
+}
+
+static int
+iwch_process_mad(struct ib_device *ibdev,
+ int mad_flags,
+ u8 port_num,
+ struct ib_wc *in_wc,
+ struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ return (-ENOSYS);
+}
+
+static int
+iwch_dealloc_ucontext(struct ib_ucontext *context)
+{
+ struct iwch_dev *rhp = to_iwch_dev(context->device);
+ struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
+ struct iwch_mm_entry *mm, *tmp;
+
+ CTR2(KTR_IW_CXGB, "%s context %p", __FUNCTION__, context);
+ TAILQ_FOREACH_SAFE(mm, &ucontext->mmaps, entry, tmp) {
+ TAILQ_REMOVE(&ucontext->mmaps, mm, entry);
+ cxfree(mm);
+ }
+ cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
+ cxfree(ucontext);
+ return 0;
+}
+
+static struct ib_ucontext *
+iwch_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata)
+{
+ struct iwch_ucontext *context;
+ struct iwch_dev *rhp = to_iwch_dev(ibdev);
+
+ CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev);
+ context = malloc(sizeof(*context), M_DEVBUF, M_ZERO|M_NOWAIT);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+ cxio_init_ucontext(&rhp->rdev, &context->uctx);
+ TAILQ_INIT(&context->mmaps);
+ mtx_init(&context->mmap_lock, "ucontext mmap", NULL, MTX_DEF);
+ return &context->ibucontext;
+}
+
+static int
+iwch_destroy_cq(struct ib_cq *ib_cq)
+{
+ struct iwch_cq *chp;
+
+ CTR2(KTR_IW_CXGB, "%s ib_cq %p", __FUNCTION__, ib_cq);
+ chp = to_iwch_cq(ib_cq);
+
+ remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
+ mtx_lock(&chp->lock);
+ if (--chp->refcnt)
+ msleep(chp, &chp->lock, 0, "iwch_destroy_cq", 0);
+ mtx_unlock(&chp->lock);
+
+ cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
+ cxfree(chp);
+ return 0;
+}
+
+static struct ib_cq *
+iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
+ struct ib_ucontext *ib_context,
+ struct ib_udata *udata)
+{
+ struct iwch_dev *rhp;
+ struct iwch_cq *chp;
+ struct iwch_create_cq_resp uresp;
+ struct iwch_create_cq_req ureq;
+ struct iwch_ucontext *ucontext = NULL;
+
+ CTR3(KTR_IW_CXGB, "%s ib_dev %p entries %d", __FUNCTION__, ibdev, entries);
+ rhp = to_iwch_dev(ibdev);
+ chp = malloc(sizeof(*chp), M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (!chp) {
+ return ERR_PTR(-ENOMEM);
+ }
+ if (ib_context) {
+ ucontext = to_iwch_ucontext(ib_context);
+ if (!t3a_device(rhp)) {
+ if (ib_copy_from_udata(&ureq, udata, sizeof (ureq))) {
+ cxfree(chp);
+ return ERR_PTR(-EFAULT);
+ }
+ chp->user_rptr_addr = (u32 /*__user */*)(unsigned long)ureq.user_rptr_addr;
+ }
+ }
+
+ if (t3a_device(rhp)) {
+
+ /*
+ * T3A: Add some fluff to handle extra CQEs inserted
+ * for various errors.
+ * Additional CQE possibilities:
+ * TERMINATE,
+ * incoming RDMA WRITE Failures
+ * incoming RDMA READ REQUEST FAILUREs
+ * NOTE: We cannot ensure the CQ won't overflow.
+ */
+ entries += 16;
+ }
+ entries = roundup_pow_of_two(entries);
+ chp->cq.size_log2 = ilog2(entries);
+
+ if (cxio_create_cq(&rhp->rdev, &chp->cq)) {
+ cxfree(chp);
+ return ERR_PTR(-ENOMEM);
+ }
+ chp->rhp = rhp;
+ chp->ibcq.cqe = 1 << chp->cq.size_log2;
+ mtx_init(&chp->lock, "cxgb cq", NULL, MTX_DEF|MTX_DUPOK);
+ chp->refcnt = 1;
+ insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
+
+ if (ucontext) {
+ struct iwch_mm_entry *mm;
+
+ mm = kmalloc(sizeof *mm, M_NOWAIT);
+ if (!mm) {
+ iwch_destroy_cq(&chp->ibcq);
+ return ERR_PTR(-ENOMEM);
+ }
+ uresp.cqid = chp->cq.cqid;
+ uresp.size_log2 = chp->cq.size_log2;
+ mtx_lock(&ucontext->mmap_lock);
+ uresp.key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ mtx_unlock(&ucontext->mmap_lock);
+ if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+ cxfree(mm);
+ iwch_destroy_cq(&chp->ibcq);
+ return ERR_PTR(-EFAULT);
+ }
+ mm->key = uresp.key;
+ mm->addr = vtophys(chp->cq.queue);
+ mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
+ sizeof (struct t3_cqe));
+ insert_mmap(ucontext, mm);
+ }
+ CTR4(KTR_IW_CXGB, "created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx",
+ chp->cq.cqid, chp, (1 << chp->cq.size_log2),
+ (unsigned long long) chp->cq.dma_addr);
+ return &chp->ibcq;
+}
+
+static int
+iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
+{
+#ifdef notyet
+ struct iwch_cq *chp = to_iwch_cq(cq);
+ struct t3_cq oldcq, newcq;
+ int ret;
+
+ CTR3(KTR_IW_CXGB, "%s ib_cq %p cqe %d", __FUNCTION__, cq, cqe);
+
+ /* We don't downsize... */
+ if (cqe <= cq->cqe)
+ return 0;
+
+ /* create new t3_cq with new size */
+ cqe = roundup_pow_of_two(cqe+1);
+ newcq.size_log2 = ilog2(cqe);
+
+ /* Dont allow resize to less than the current wce count */
+ if (cqe < Q_COUNT(chp->cq.rptr, chp->cq.wptr)) {
+ return (-ENOMEM);
+ }
+
+ /* Quiesce all QPs using this CQ */
+ ret = iwch_quiesce_qps(chp);
+ if (ret) {
+ return (ret);
+ }
+
+ ret = cxio_create_cq(&chp->rhp->rdev, &newcq);
+ if (ret) {
+ return (ret);
+ }
+
+ /* copy CQEs */
+ memcpy(newcq.queue, chp->cq.queue, (1 << chp->cq.size_log2) *
+ sizeof(struct t3_cqe));
+
+ /* old iwch_qp gets new t3_cq but keeps old cqid */
+ oldcq = chp->cq;
+ chp->cq = newcq;
+ chp->cq.cqid = oldcq.cqid;
+
+ /* resize new t3_cq to update the HW context */
+ ret = cxio_resize_cq(&chp->rhp->rdev, &chp->cq);
+ if (ret) {
+ chp->cq = oldcq;
+ return ret;
+ }
+ chp->ibcq.cqe = (1<<chp->cq.size_log2) - 1;
+
+ /* destroy old t3_cq */
+ oldcq.cqid = newcq.cqid;
+ ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq);
+ if (ret) {
+ log(LOG_ERR, "%s - cxio_destroy_cq failed %d\n",
+ __FUNCTION__, ret);
+ }
+
+ /* add user hooks here */
+
+ /* resume qps */
+ ret = iwch_resume_qps(chp);
+ return ret;
+#else
+ return (-ENOSYS);
+#endif
+}
+
+static int
+iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct iwch_dev *rhp;
+ struct iwch_cq *chp;
+ enum t3_cq_opcode cq_op;
+ int err;
+ u32 rptr;
+
+ chp = to_iwch_cq(ibcq);
+ rhp = chp->rhp;
+ if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
+ cq_op = CQ_ARM_SE;
+ else
+ cq_op = CQ_ARM_AN;
+ if (chp->user_rptr_addr) {
+ if (copyin(&rptr, chp->user_rptr_addr, 4))
+ return (-EFAULT);
+ mtx_lock(&chp->lock);
+ chp->cq.rptr = rptr;
+ } else
+ mtx_lock(&chp->lock);
+ CTR2(KTR_IW_CXGB, "%s rptr 0x%x", __FUNCTION__, chp->cq.rptr);
+ err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
+ mtx_unlock(&chp->lock);
+ if (err < 0)
+ log(LOG_ERR, "Error %d rearming CQID 0x%x\n", err,
+ chp->cq.cqid);
+ if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
+ err = 0;
+ return err;
+}
+
+#ifdef notyet
+static int
+iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+#ifdef notyet
+ int len = vma->vm_end - vma->vm_start;
+ u32 key = vma->vm_pgoff << PAGE_SHIFT;
+ struct cxio_rdev *rdev_p;
+ int ret = 0;
+ struct iwch_mm_entry *mm;
+ struct iwch_ucontext *ucontext;
+ u64 addr;
+
+ CTR4(KTR_IW_CXGB, "%s pgoff 0x%lx key 0x%x len %d", __FUNCTION__, vma->vm_pgoff,
+ key, len);
+
+ if (vma->vm_start & (PAGE_SIZE-1)) {
+ return (-EINVAL);
+ }
+
+ rdev_p = &(to_iwch_dev(context->device)->rdev);
+ ucontext = to_iwch_ucontext(context);
+
+ mm = remove_mmap(ucontext, key, len);
+ if (!mm)
+ return (-EINVAL);
+ addr = mm->addr;
+ cxfree(mm);
+
+ if ((addr >= rdev_p->rnic_info.udbell_physbase) &&
+ (addr < (rdev_p->rnic_info.udbell_physbase +
+ rdev_p->rnic_info.udbell_len))) {
+
+ /*
+ * Map T3 DB register.
+ */
+ if (vma->vm_flags & VM_READ) {
+ return (-EPERM);
+ }
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
+ vma->vm_flags &= ~VM_MAYREAD;
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ } else {
+
+ /*
+ * Map WQ or CQ contig dma memory...
+ */
+ ret = remap_pfn_range(vma, vma->vm_start,
+ addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ }
+
+ return ret;
+#endif
+ return (0);
+}
+#endif
+
+static int iwch_deallocate_pd(struct ib_pd *pd)
+{
+ struct iwch_dev *rhp;
+ struct iwch_pd *php;
+
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+ CTR3(KTR_IW_CXGB, "%s ibpd %p pdid 0x%x", __FUNCTION__, pd, php->pdid);
+ cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
+ cxfree(php);
+ return 0;
+}
+
+static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct iwch_pd *php;
+ u32 pdid;
+ struct iwch_dev *rhp;
+
+ CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev);
+ rhp = (struct iwch_dev *) ibdev;
+ pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
+ if (!pdid)
+ return ERR_PTR(-EINVAL);
+ php = malloc(sizeof(*php), M_DEVBUF, M_ZERO|M_NOWAIT);
+ if (!php) {
+ cxio_hal_put_pdid(rhp->rdev.rscp, pdid);
+ return ERR_PTR(-ENOMEM);
+ }
+ php->pdid = pdid;
+ php->rhp = rhp;
+ if (context) {
+ if (ib_copy_to_udata(udata, &php->pdid, sizeof (__u32))) {
+ iwch_deallocate_pd(&php->ibpd);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+ CTR3(KTR_IW_CXGB, "%s pdid 0x%0x ptr 0x%p", __FUNCTION__, pdid, php);
+ return &php->ibpd;
+}
+
+static int iwch_dereg_mr(struct ib_mr *ib_mr)
+{
+ struct iwch_dev *rhp;
+ struct iwch_mr *mhp;
+ u32 mmid;
+
+ CTR2(KTR_IW_CXGB, "%s ib_mr %p", __FUNCTION__, ib_mr);
+ /* There can be no memory windows */
+ if (atomic_load_acq_int(&ib_mr->usecnt))
+ return (-EINVAL);
+
+ mhp = to_iwch_mr(ib_mr);
+ rhp = mhp->rhp;
+ mmid = mhp->attr.stag >> 8;
+ cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+ remove_handle(rhp, &rhp->mmidr, mmid);
+ if (mhp->kva)
+ cxfree((void *) (unsigned long) mhp->kva);
+ if (mhp->umem)
+ ib_umem_release(mhp->umem);
+ CTR3(KTR_IW_CXGB, "%s mmid 0x%x ptr %p", __FUNCTION__, mmid, mhp);
+ cxfree(mhp);
+ return 0;
+}
+
+static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ int acc,
+ u64 *iova_start)
+{
+ __be64 *page_list;
+ int shift;
+ u64 total_size;
+ int npages;
+ struct iwch_dev *rhp;
+ struct iwch_pd *php;
+ struct iwch_mr *mhp;
+ int ret;
+
+ CTR2(KTR_IW_CXGB, "%s ib_pd %p", __FUNCTION__, pd);
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+
+ mhp = malloc(sizeof(*mhp), M_DEVBUF, M_ZERO|M_NOWAIT);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+
+ /* First check that we have enough alignment */
+ if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (num_phys_buf > 1 &&
+ ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
+ &total_size, &npages, &shift, &page_list);
+ if (ret)
+ goto err;
+
+ mhp->rhp = rhp;
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.zbva = 0;
+
+ mhp->attr.perms = iwch_ib_to_tpt_access(acc);
+ mhp->attr.va_fbo = *iova_start;
+ mhp->attr.page_size = shift - 12;
+
+ mhp->attr.len = (u32) total_size;
+ mhp->attr.pbl_size = npages;
+ ret = iwch_register_mem(rhp, php, mhp, shift, page_list);
+ cxfree(page_list);
+ if (ret) {
+ goto err;
+ }
+ return &mhp->ibmr;
+err:
+ cxfree(mhp);
+ return ERR_PTR(-ret);
+
+}
+
+static int iwch_reregister_phys_mem(struct ib_mr *mr,
+ int mr_rereg_mask,
+ struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ int acc, u64 * iova_start)
+{
+
+ struct iwch_mr mh, *mhp;
+ struct iwch_pd *php;
+ struct iwch_dev *rhp;
+ __be64 *page_list = NULL;
+ int shift = 0;
+ u64 total_size;
+ int npages;
+ int ret;
+
+ CTR3(KTR_IW_CXGB, "%s ib_mr %p ib_pd %p", __FUNCTION__, mr, pd);
+
+ /* There can be no memory windows */
+ if (atomic_load_acq_int(&mr->usecnt))
+ return (-EINVAL);
+
+ mhp = to_iwch_mr(mr);
+ rhp = mhp->rhp;
+ php = to_iwch_pd(mr->pd);
+
+ /* make sure we are on the same adapter */
+ if (rhp != php->rhp)
+ return (-EINVAL);
+
+ memcpy(&mh, mhp, sizeof *mhp);
+
+ if (mr_rereg_mask & IB_MR_REREG_PD)
+ php = to_iwch_pd(pd);
+ if (mr_rereg_mask & IB_MR_REREG_ACCESS)
+ mh.attr.perms = iwch_ib_to_tpt_access(acc);
+ if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+ ret = build_phys_page_list(buffer_list, num_phys_buf,
+ iova_start,
+ &total_size, &npages,
+ &shift, &page_list);
+ if (ret)
+ return ret;
+ }
+
+ ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages);
+ cxfree(page_list);
+ if (ret) {
+ return ret;
+ }
+ if (mr_rereg_mask & IB_MR_REREG_PD)
+ mhp->attr.pdid = php->pdid;
+ if (mr_rereg_mask & IB_MR_REREG_ACCESS)
+ mhp->attr.perms = iwch_ib_to_tpt_access(acc);
+ if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+ mhp->attr.zbva = 0;
+ mhp->attr.va_fbo = *iova_start;
+ mhp->attr.page_size = shift - 12;
+ mhp->attr.len = (u32) total_size;
+ mhp->attr.pbl_size = npages;
+ }
+
+ return 0;
+}
+
+
+static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int acc, struct ib_udata *udata)
+{
+ __be64 *pages;
+ int shift, i, n;
+ int err = 0;
+ struct ib_umem_chunk *chunk;
+ struct iwch_dev *rhp;
+ struct iwch_pd *php;
+ struct iwch_mr *mhp;
+ struct iwch_reg_user_mr_resp uresp;
+#ifdef notyet
+ int j, k, len;
+#endif
+
+ CTR2(KTR_IW_CXGB, "%s ib_pd %p", __FUNCTION__, pd);
+
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+ mhp = malloc(sizeof(*mhp), M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+
+ mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc);
+ if (IS_ERR(mhp->umem)) {
+ err = PTR_ERR(mhp->umem);
+ cxfree(mhp);
+ return ERR_PTR(-err);
+ }
+
+ shift = ffs(mhp->umem->page_size) - 1;
+
+ n = 0;
+ TAILQ_FOREACH(chunk, &mhp->umem->chunk_list, entry)
+ n += chunk->nents;
+
+ pages = kmalloc(n * sizeof(u64), M_NOWAIT);
+ if (!pages) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ i = n = 0;
+
+#if 0
+ TAILQ_FOREACH(chunk, &mhp->umem->chunk_list, entry)
+ for (j = 0; j < chunk->nmap; ++j) {
+ len = sg_dma_len(&chunk->page_list[j]) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = htobe64(sg_dma_address(
+ &chunk->page_list[j]) +
+ mhp->umem->page_size * k);
+ }
+ }
+#endif
+ mhp->rhp = rhp;
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.zbva = 0;
+ mhp->attr.perms = iwch_ib_to_tpt_access(acc);
+ mhp->attr.va_fbo = virt;
+ mhp->attr.page_size = shift - 12;
+ mhp->attr.len = (u32) length;
+ mhp->attr.pbl_size = i;
+ err = iwch_register_mem(rhp, php, mhp, shift, pages);
+ cxfree(pages);
+ if (err)
+ goto err;
+
+ if (udata && !t3a_device(rhp)) {
+ uresp.pbl_addr = (mhp->attr.pbl_addr -
+ rhp->rdev.rnic_info.pbl_base) >> 3;
+ CTR2(KTR_IW_CXGB, "%s user resp pbl_addr 0x%x", __FUNCTION__,
+ uresp.pbl_addr);
+
+ if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+ iwch_dereg_mr(&mhp->ibmr);
+ err = EFAULT;
+ goto err;
+ }
+ }
+
+ return &mhp->ibmr;
+
+err:
+ ib_umem_release(mhp->umem);
+ cxfree(mhp);
+ return ERR_PTR(-err);
+}
+
+static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct ib_phys_buf bl;
+ u64 kva;
+ struct ib_mr *ibmr;
+
+ CTR2(KTR_IW_CXGB, "%s ib_pd %p", __FUNCTION__, pd);
+
+ /*
+ * T3 only supports 32 bits of size.
+ */
+ bl.size = 0xffffffff;
+ bl.addr = 0;
+ kva = 0;
+ ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva);
+ return ibmr;
+}
+
+static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
+{
+ struct iwch_dev *rhp;
+ struct iwch_pd *php;
+ struct iwch_mw *mhp;
+ u32 mmid;
+ u32 stag = 0;
+ int ret;
+
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+ mhp = malloc(sizeof(*mhp), M_DEVBUF, M_ZERO|M_NOWAIT);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+ ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid);
+ if (ret) {
+ cxfree(mhp);
+ return ERR_PTR(-ret);
+ }
+ mhp->rhp = rhp;
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.type = TPT_MW;
+ mhp->attr.stag = stag;
+ mmid = (stag) >> 8;
+ insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+ CTR4(KTR_IW_CXGB, "%s mmid 0x%x mhp %p stag 0x%x", __FUNCTION__, mmid, mhp, stag);
+ return &(mhp->ibmw);
+}
+
+static int iwch_dealloc_mw(struct ib_mw *mw)
+{
+ struct iwch_dev *rhp;
+ struct iwch_mw *mhp;
+ u32 mmid;
+
+ mhp = to_iwch_mw(mw);
+ rhp = mhp->rhp;
+ mmid = (mw->rkey) >> 8;
+ cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
+ remove_handle(rhp, &rhp->mmidr, mmid);
+ cxfree(mhp);
+ CTR4(KTR_IW_CXGB, "%s ib_mw %p mmid 0x%x ptr %p", __FUNCTION__, mw, mmid, mhp);
+ return 0;
+}
+
+static int iwch_destroy_qp(struct ib_qp *ib_qp)
+{
+ struct iwch_dev *rhp;
+ struct iwch_qp *qhp;
+ struct iwch_qp_attributes attrs;
+ struct iwch_ucontext *ucontext;
+
+ qhp = to_iwch_qp(ib_qp);
+ rhp = qhp->rhp;
+
+ attrs.next_state = IWCH_QP_STATE_ERROR;
+ iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0);
+ mtx_lock(&qhp->lock);
+ if (qhp->ep)
+ msleep(qhp, &qhp->lock, 0, "iwch_destroy_qp1", 0);
+ mtx_unlock(&qhp->lock);
+
+ remove_handle(rhp, &rhp->qpidr, qhp->wq.qpid);
+
+ mtx_lock(&qhp->lock);
+ if (--qhp->refcnt)
+ msleep(qhp, &qhp->lock, 0, "iwch_destroy_qp2", 0);
+ mtx_unlock(&qhp->lock);
+
+ ucontext = ib_qp->uobject ? to_iwch_ucontext(ib_qp->uobject->context)
+ : NULL;
+ cxio_destroy_qp(&rhp->rdev, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+
+ CTR4(KTR_IW_CXGB, "%s ib_qp %p qpid 0x%0x qhp %p", __FUNCTION__,
+ ib_qp, qhp->wq.qpid, qhp);
+ cxfree(qhp);
+ return 0;
+}
+
+static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
+{
+ struct iwch_dev *rhp;
+ struct iwch_qp *qhp;
+ struct iwch_pd *php;
+ struct iwch_cq *schp;
+ struct iwch_cq *rchp;
+ struct iwch_create_qp_resp uresp;
+ int wqsize, sqsize, rqsize;
+ struct iwch_ucontext *ucontext;
+
+ CTR2(KTR_IW_CXGB, "%s ib_pd %p", __FUNCTION__, pd);
+ if (attrs->qp_type != IB_QPT_RC)
+ return ERR_PTR(-EINVAL);
+ php = to_iwch_pd(pd);
+ rhp = php->rhp;
+ schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid);
+ rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid);
+ if (!schp || !rchp)
+ return ERR_PTR(-EINVAL);
+
+ /* The RQT size must be # of entries + 1 rounded up to a power of two */
+ rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr);
+ if (rqsize == attrs->cap.max_recv_wr)
+ rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1);
+
+ /* T3 doesn't support RQT depth < 16 */
+ if (rqsize < 16)
+ rqsize = 16;
+
+ if (rqsize > T3_MAX_RQ_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ if (attrs->cap.max_inline_data > T3_MAX_INLINE)
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * NOTE: The SQ and total WQ sizes don't need to be
+ * a power of two. However, all the code assumes
+ * they are. EG: Q_FREECNT() and friends.
+ */
+ sqsize = roundup_pow_of_two(attrs->cap.max_send_wr);
+ wqsize = roundup_pow_of_two(rqsize + sqsize);
+ CTR4(KTR_IW_CXGB, "%s wqsize %d sqsize %d rqsize %d", __FUNCTION__,
+ wqsize, sqsize, rqsize);
+ qhp = malloc(sizeof(*qhp), M_DEVBUF, M_ZERO|M_NOWAIT);
+ if (!qhp)
+ return ERR_PTR(-ENOMEM);
+ qhp->wq.size_log2 = ilog2(wqsize);
+ qhp->wq.rq_size_log2 = ilog2(rqsize);
+ qhp->wq.sq_size_log2 = ilog2(sqsize);
+ ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL;
+ if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) {
+ cxfree(qhp);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ attrs->cap.max_recv_wr = rqsize - 1;
+ attrs->cap.max_send_wr = sqsize;
+ attrs->cap.max_inline_data = T3_MAX_INLINE;
+
+ qhp->rhp = rhp;
+ qhp->attr.pd = php->pdid;
+ qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid;
+ qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid;
+ qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
+ qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
+ qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
+ qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
+ qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
+ qhp->attr.state = IWCH_QP_STATE_IDLE;
+ qhp->attr.next_state = IWCH_QP_STATE_IDLE;
+
+ /*
+ * XXX - These don't get passed in from the openib user
+ * at create time. The CM sets them via a QP modify.
+ * Need to fix... I think the CM should
+ */
+ qhp->attr.enable_rdma_read = 1;
+ qhp->attr.enable_rdma_write = 1;
+ qhp->attr.enable_bind = 1;
+ qhp->attr.max_ord = 1;
+ qhp->attr.max_ird = 1;
+
+ mtx_init(&qhp->lock, "cxgb qp", NULL, MTX_DEF|MTX_DUPOK);
+ qhp->refcnt = 1;
+ insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid);
+
+ if (udata) {
+
+ struct iwch_mm_entry *mm1, *mm2;
+
+ mm1 = kmalloc(sizeof *mm1, M_NOWAIT);
+ if (!mm1) {
+ iwch_destroy_qp(&qhp->ibqp);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ mm2 = kmalloc(sizeof *mm2, M_NOWAIT);
+ if (!mm2) {
+ cxfree(mm1);
+ iwch_destroy_qp(&qhp->ibqp);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ uresp.qpid = qhp->wq.qpid;
+ uresp.size_log2 = qhp->wq.size_log2;
+ uresp.sq_size_log2 = qhp->wq.sq_size_log2;
+ uresp.rq_size_log2 = qhp->wq.rq_size_log2;
+ mtx_lock(&ucontext->mmap_lock);
+ uresp.key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ uresp.db_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ mtx_unlock(&ucontext->mmap_lock);
+ if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+ cxfree(mm1);
+ cxfree(mm2);
+ iwch_destroy_qp(&qhp->ibqp);
+ return ERR_PTR(-EFAULT);
+ }
+ mm1->key = uresp.key;
+ mm1->addr = vtophys(qhp->wq.queue);
+ mm1->len = PAGE_ALIGN(wqsize * sizeof (union t3_wr));
+ insert_mmap(ucontext, mm1);
+ mm2->key = uresp.db_key;
+ mm2->addr = qhp->wq.udb & PAGE_MASK;
+ mm2->len = PAGE_SIZE;
+ insert_mmap(ucontext, mm2);
+ }
+ qhp->ibqp.qp_num = qhp->wq.qpid;
+ callout_init(&(qhp->timer), TRUE);
+ CTR6(KTR_IW_CXGB, "sq_num_entries %d, rq_num_entries %d "
+ "qpid 0x%0x qhp %p dma_addr 0x%llx size %d",
+ qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
+ qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr,
+ 1 << qhp->wq.size_log2);
+ return &qhp->ibqp;
+}
+
+static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct iwch_dev *rhp;
+ struct iwch_qp *qhp;
+ enum iwch_qp_attr_mask mask = 0;
+ struct iwch_qp_attributes attrs;
+
+ CTR2(KTR_IW_CXGB, "%s ib_qp %p", __FUNCTION__, ibqp);
+
+ /* iwarp does not support the RTR state */
+ if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
+ attr_mask &= ~IB_QP_STATE;
+
+ /* Make sure we still have something left to do */
+ if (!attr_mask)
+ return 0;
+
+ memset(&attrs, 0, sizeof attrs);
+ qhp = to_iwch_qp(ibqp);
+ rhp = qhp->rhp;
+
+ attrs.next_state = iwch_convert_state(attr->qp_state);
+ attrs.enable_rdma_read = (attr->qp_access_flags &
+ IB_ACCESS_REMOTE_READ) ? 1 : 0;
+ attrs.enable_rdma_write = (attr->qp_access_flags &
+ IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+ attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;
+
+
+ mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0;
+ mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
+ (IWCH_QP_ATTR_ENABLE_RDMA_READ |
+ IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
+ IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0;
+
+ return iwch_modify_qp(rhp, qhp, mask, &attrs, 0);
+}
+
+void iwch_qp_add_ref(struct ib_qp *qp)
+{
+ CTR2(KTR_IW_CXGB, "%s ib_qp %p", __FUNCTION__, qp);
+ mtx_lock(&to_iwch_qp(qp)->lock);
+ to_iwch_qp(qp)->refcnt++;
+ mtx_unlock(&to_iwch_qp(qp)->lock);
+}
+
+void iwch_qp_rem_ref(struct ib_qp *qp)
+{
+ CTR2(KTR_IW_CXGB, "%s ib_qp %p", __FUNCTION__, qp);
+ mtx_lock(&to_iwch_qp(qp)->lock);
+ if (--to_iwch_qp(qp)->refcnt == 0)
+ wakeup(to_iwch_qp(qp));
+ mtx_unlock(&to_iwch_qp(qp)->lock);
+}
+
+static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
+{
+ CTR3(KTR_IW_CXGB, "%s ib_dev %p qpn 0x%x", __FUNCTION__, dev, qpn);
+ return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn);
+}
+
+
+static int iwch_query_pkey(struct ib_device *ibdev,
+ u8 port, u16 index, u16 * pkey)
+{
+ CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev);
+ *pkey = 0;
+ return 0;
+}
+
+static int iwch_query_gid(struct ib_device *ibdev, u8 port,
+ int index, union ib_gid *gid)
+{
+ struct iwch_dev *dev;
+ struct port_info *pi;
+
+ CTR5(KTR_IW_CXGB, "%s ibdev %p, port %d, index %d, gid %p",
+ __FUNCTION__, ibdev, port, index, gid);
+ dev = to_iwch_dev(ibdev);
+ PANIC_IF(port == 0 || port > 2);
+ pi = ((struct port_info *)dev->rdev.port_info.lldevs[port-1]->if_softc);
+ memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+ memcpy(&(gid->raw[0]), pi->hw_addr, 6);
+ return 0;
+}
+
+static int iwch_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props)
+{
+
+ struct iwch_dev *dev;
+ CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev);
+
+ dev = to_iwch_dev(ibdev);
+ memset(props, 0, sizeof *props);
+#ifdef notyet
+ memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->if_addr.ifa_addr, 6);
+#endif
+ props->device_cap_flags = dev->device_cap_flags;
+#ifdef notyet
+ props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
+ props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
+#endif
+ props->max_mr_size = ~0ull;
+ props->max_qp = dev->attr.max_qps;
+ props->max_qp_wr = dev->attr.max_wrs;
+ props->max_sge = dev->attr.max_sge_per_wr;
+ props->max_sge_rd = 1;
+ props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp;
+ props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp;
+ props->max_cq = dev->attr.max_cqs;
+ props->max_cqe = dev->attr.max_cqes_per_cq;
+ props->max_mr = dev->attr.max_mem_regs;
+ props->max_pd = dev->attr.max_pds;
+ props->local_ca_ack_delay = 0;
+
+ return 0;
+}
+
+static int iwch_query_port(struct ib_device *ibdev,
+ u8 port, struct ib_port_attr *props)
+{
+ CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev);
+ props->max_mtu = IB_MTU_4096;
+ props->lid = 0;
+ props->lmc = 0;
+ props->sm_lid = 0;
+ props->sm_sl = 0;
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = 0;
+ props->port_cap_flags =
+ IB_PORT_CM_SUP |
+ IB_PORT_SNMP_TUNNEL_SUP |
+ IB_PORT_REINIT_SUP |
+ IB_PORT_DEVICE_MGMT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+ props->gid_tbl_len = 1;
+ props->pkey_tbl_len = 1;
+ props->qkey_viol_cntr = 0;
+ props->active_width = 2;
+ props->active_speed = 2;
+ props->max_msg_sz = -1;
+
+ return 0;
+}
+
+#ifdef notyet
+static ssize_t show_rev(struct class_device *cdev, char *buf)
+{
+ struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
+ ibdev.class_dev);
+ CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, cdev);
+ return sprintf(buf, "%d\n", dev->rdev.t3cdev_p->type);
+}
+
+static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
+{
+ struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
+ ibdev.class_dev);
+ struct ethtool_drvinfo info;
+ struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
+
+ CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, cdev);
+ lldev->ethtool_ops->get_drvinfo(lldev, &info);
+ return sprintf(buf, "%s\n", info.fw_version);
+}
+
+static ssize_t show_hca(struct class_device *cdev, char *buf)
+{
+ struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
+ ibdev.class_dev);
+ struct ethtool_drvinfo info;
+ struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
+
+ CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, cdev);
+ lldev->ethtool_ops->get_drvinfo(lldev, &info);
+ return sprintf(buf, "%s\n", info.driver);
+}
+
+static ssize_t show_board(struct class_device *cdev, char *buf)
+{
+ struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
+ ibdev.class_dev);
+ CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, dev);
+#ifdef notyet
+ return sprintf(buf, "%x.%x\n", dev->rdev.rnic_info.pdev->vendor,
+ dev->rdev.rnic_info.pdev->device);
+#else
+ return sprintf(buf, "%x.%x\n", 0xdead, 0xbeef); /* XXX */
+#endif
+}
+
+static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct class_device_attribute *iwch_class_attributes[] = {
+ &class_device_attr_hw_rev,
+ &class_device_attr_fw_ver,
+ &class_device_attr_hca_type,
+ &class_device_attr_board_id
+};
+#endif
+
+int iwch_register_device(struct iwch_dev *dev)
+{
+ int ret;
+#ifdef notyet
+ int i;
+#endif
+ CTR2(KTR_IW_CXGB, "%s iwch_dev %p", __FUNCTION__, dev);
+ strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
+ memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
+#ifdef notyet
+ memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
+#endif
+ dev->device_cap_flags =
+ (IB_DEVICE_ZERO_STAG |
+ IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
+
+ dev->ibdev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+ (1ull << IB_USER_VERBS_CMD_POST_RECV);
+ dev->ibdev.node_type = RDMA_NODE_RNIC;
+ memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
+ dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
+ dev->ibdev.num_comp_vectors = 1;
+ dev->ibdev.dma_device = dev->rdev.rnic_info.pdev;
+ dev->ibdev.query_device = iwch_query_device;
+ dev->ibdev.query_port = iwch_query_port;
+ dev->ibdev.modify_port = iwch_modify_port;
+ dev->ibdev.query_pkey = iwch_query_pkey;
+ dev->ibdev.query_gid = iwch_query_gid;
+ dev->ibdev.alloc_ucontext = iwch_alloc_ucontext;
+ dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext;
+#ifdef notyet
+ dev->ibdev.mmap = iwch_mmap;
+#endif
+ dev->ibdev.alloc_pd = iwch_allocate_pd;
+ dev->ibdev.dealloc_pd = iwch_deallocate_pd;
+ dev->ibdev.create_ah = iwch_ah_create;
+ dev->ibdev.destroy_ah = iwch_ah_destroy;
+ dev->ibdev.create_qp = iwch_create_qp;
+ dev->ibdev.modify_qp = iwch_ib_modify_qp;
+ dev->ibdev.destroy_qp = iwch_destroy_qp;
+ dev->ibdev.create_cq = iwch_create_cq;
+ dev->ibdev.destroy_cq = iwch_destroy_cq;
+ dev->ibdev.resize_cq = iwch_resize_cq;
+ dev->ibdev.poll_cq = iwch_poll_cq;
+ dev->ibdev.get_dma_mr = iwch_get_dma_mr;
+ dev->ibdev.reg_phys_mr = iwch_register_phys_mem;
+ dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem;
+ dev->ibdev.reg_user_mr = iwch_reg_user_mr;
+ dev->ibdev.dereg_mr = iwch_dereg_mr;
+ dev->ibdev.alloc_mw = iwch_alloc_mw;
+ dev->ibdev.bind_mw = iwch_bind_mw;
+ dev->ibdev.dealloc_mw = iwch_dealloc_mw;
+
+ dev->ibdev.attach_mcast = iwch_multicast_attach;
+ dev->ibdev.detach_mcast = iwch_multicast_detach;
+ dev->ibdev.process_mad = iwch_process_mad;
+
+ dev->ibdev.req_notify_cq = iwch_arm_cq;
+ dev->ibdev.post_send = iwch_post_send;
+ dev->ibdev.post_recv = iwch_post_receive;
+
+
+ dev->ibdev.iwcm =
+ (struct iw_cm_verbs *) kmalloc(sizeof(struct iw_cm_verbs),
+ M_NOWAIT);
+ dev->ibdev.iwcm->connect = iwch_connect;
+ dev->ibdev.iwcm->accept = iwch_accept_cr;
+ dev->ibdev.iwcm->reject = iwch_reject_cr;
+ dev->ibdev.iwcm->create_listen = iwch_create_listen;
+ dev->ibdev.iwcm->destroy_listen = iwch_destroy_listen;
+ dev->ibdev.iwcm->add_ref = iwch_qp_add_ref;
+ dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
+ dev->ibdev.iwcm->get_qp = iwch_get_qp;
+
+ ret = ib_register_device(&dev->ibdev);
+ if (ret)
+ goto bail1;
+#ifdef notyet
+ for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) {
+ ret = class_device_create_file(&dev->ibdev.class_dev,
+ iwch_class_attributes[i]);
+ if (ret) {
+ goto bail2;
+ }
+ }
+#endif
+ return 0;
+#ifdef notyet
+bail2:
+#endif
+ ib_unregister_device(&dev->ibdev);
+bail1:
+ return ret;
+}
+
+void iwch_unregister_device(struct iwch_dev *dev)
+{
+#ifdef notyet
+ int i;
+
+ CTR2(KTR_IW_CXGB, "%s iwch_dev %p", __FUNCTION__, dev);
+
+ for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
+ class_device_remove_file(&dev->ibdev.class_dev,
+ iwch_class_attributes[i]);
+#endif
+ ib_unregister_device(&dev->ibdev);
+ return;
+}
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h
new file mode 100644
index 0000000000000..c857ce8e5b8f0
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h
@@ -0,0 +1,362 @@
+/**************************************************************************
+
+Copyright (c) 2007, 2008 Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+$FreeBSD$
+
+***************************************************************************/
+#ifndef __IWCH_PROVIDER_H__
+#define __IWCH_PROVIDER_H__
+
+#include <contrib/rdma/ib_verbs.h>
+
+struct iwch_pd {
+ struct ib_pd ibpd;
+ u32 pdid;
+ struct iwch_dev *rhp;
+};
+
+#ifndef container_of
+#define container_of(p, stype, field) ((stype *)(((uint8_t *)(p)) - offsetof(stype, field)))
+#endif
+static __inline struct iwch_pd *
+to_iwch_pd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct iwch_pd, ibpd);
+}
+
+struct tpt_attributes {
+ u32 stag;
+ u32 state:1;
+ u32 type:2;
+ u32 rsvd:1;
+ enum tpt_mem_perm perms;
+ u32 remote_invaliate_disable:1;
+ u32 zbva:1;
+ u32 mw_bind_enable:1;
+ u32 page_size:5;
+
+ u32 pdid;
+ u32 qpid;
+ u32 pbl_addr;
+ u32 len;
+ u64 va_fbo;
+ u32 pbl_size;
+};
+
+struct iwch_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct iwch_dev *rhp;
+ u64 kva;
+ struct tpt_attributes attr;
+};
+
+typedef struct iwch_mw iwch_mw_handle;
+
+static __inline struct iwch_mr *
+to_iwch_mr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct iwch_mr, ibmr);
+}
+
+struct iwch_mw {
+ struct ib_mw ibmw;
+ struct iwch_dev *rhp;
+ u64 kva;
+ struct tpt_attributes attr;
+};
+
+static __inline struct iwch_mw *
+to_iwch_mw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct iwch_mw, ibmw);
+}
+
+struct iwch_cq {
+ struct ib_cq ibcq;
+ struct iwch_dev *rhp;
+ struct t3_cq cq;
+ struct mtx lock;
+ int refcnt;
+ u32 /* __user */ *user_rptr_addr;
+};
+
+static __inline struct iwch_cq *
+to_iwch_cq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct iwch_cq, ibcq);
+}
+
+enum IWCH_QP_FLAGS {
+ QP_QUIESCED = 0x01
+};
+
+struct iwch_mpa_attributes {
+ u8 recv_marker_enabled;
+ u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */
+ u8 crc_enabled;
+ u8 version; /* 0 or 1 */
+};
+
+struct iwch_qp_attributes {
+ u32 scq;
+ u32 rcq;
+ u32 sq_num_entries;
+ u32 rq_num_entries;
+ u32 sq_max_sges;
+ u32 sq_max_sges_rdma_write;
+ u32 rq_max_sges;
+ u32 state;
+ u8 enable_rdma_read;
+ u8 enable_rdma_write; /* enable inbound Read Resp. */
+ u8 enable_bind;
+ u8 enable_mmid0_fastreg; /* Enable STAG0 + Fast-register */
+ /*
+ * Next QP state. If specify the current state, only the
+ * QP attributes will be modified.
+ */
+ u32 max_ord;
+ u32 max_ird;
+ u32 pd; /* IN */
+ u32 next_state;
+ char terminate_buffer[52];
+ u32 terminate_msg_len;
+ u8 is_terminate_local;
+ struct iwch_mpa_attributes mpa_attr; /* IN-OUT */
+ struct iwch_ep *llp_stream_handle;
+ char *stream_msg_buf; /* Last stream msg. before Idle -> RTS */
+ u32 stream_msg_buf_len; /* Only on Idle -> RTS */
+};
+
+struct iwch_qp {
+ struct ib_qp ibqp;
+ struct iwch_dev *rhp;
+ struct iwch_ep *ep;
+ struct iwch_qp_attributes attr;
+ struct t3_wq wq;
+ struct mtx lock;
+ int refcnt;
+ enum IWCH_QP_FLAGS flags;
+ struct callout timer;
+};
+
+static __inline int
+qp_quiesced(struct iwch_qp *qhp)
+{
+ return qhp->flags & QP_QUIESCED;
+}
+
+static __inline struct iwch_qp *
+to_iwch_qp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct iwch_qp, ibqp);
+}
+
+void iwch_qp_add_ref(struct ib_qp *qp);
+void iwch_qp_rem_ref(struct ib_qp *qp);
+
+struct iwch_ucontext {
+ struct ib_ucontext ibucontext;
+ struct cxio_ucontext uctx;
+ u32 key;
+ struct mtx mmap_lock;
+ TAILQ_HEAD( ,iwch_mm_entry) mmaps;
+};
+
+static __inline struct iwch_ucontext *
+to_iwch_ucontext(struct ib_ucontext *c)
+{
+ return container_of(c, struct iwch_ucontext, ibucontext);
+}
+
+struct iwch_mm_entry {
+ TAILQ_ENTRY(iwch_mm_entry) entry;
+ u64 addr;
+ u32 key;
+ unsigned len;
+};
+
+static __inline struct iwch_mm_entry *
+remove_mmap(struct iwch_ucontext *ucontext,
+ u32 key, unsigned len)
+{
+ struct iwch_mm_entry *tmp, *mm;
+
+ mtx_lock(&ucontext->mmap_lock);
+ TAILQ_FOREACH_SAFE(mm, &ucontext->mmaps, entry, tmp) {
+ if (mm->key == key && mm->len == len) {
+ TAILQ_REMOVE(&ucontext->mmaps, mm, entry);
+ mtx_unlock(&ucontext->mmap_lock);
+ CTR4(KTR_IW_CXGB, "%s key 0x%x addr 0x%llx len %d\n", __FUNCTION__,
+ key, (unsigned long long) mm->addr, mm->len);
+ return mm;
+ }
+ }
+ mtx_unlock(&ucontext->mmap_lock);
+
+ return NULL;
+}
+
+static __inline void
+insert_mmap(struct iwch_ucontext *ucontext,
+ struct iwch_mm_entry *mm)
+{
+ mtx_lock(&ucontext->mmap_lock);
+ CTR4(KTR_IW_CXGB, "%s key 0x%x addr 0x%llx len %d\n", __FUNCTION__,
+ mm->key, (unsigned long long) mm->addr, mm->len);
+ TAILQ_INSERT_TAIL(&ucontext->mmaps, mm, entry);
+ mtx_unlock(&ucontext->mmap_lock);
+}
+
+enum iwch_qp_attr_mask {
+ IWCH_QP_ATTR_NEXT_STATE = 1 << 0,
+ IWCH_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
+ IWCH_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
+ IWCH_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
+ IWCH_QP_ATTR_MAX_ORD = 1 << 11,
+ IWCH_QP_ATTR_MAX_IRD = 1 << 12,
+ IWCH_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22,
+ IWCH_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23,
+ IWCH_QP_ATTR_MPA_ATTR = 1 << 24,
+ IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25,
+ IWCH_QP_ATTR_VALID_MODIFY = (IWCH_QP_ATTR_ENABLE_RDMA_READ |
+ IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
+ IWCH_QP_ATTR_MAX_ORD |
+ IWCH_QP_ATTR_MAX_IRD |
+ IWCH_QP_ATTR_LLP_STREAM_HANDLE |
+ IWCH_QP_ATTR_STREAM_MSG_BUFFER |
+ IWCH_QP_ATTR_MPA_ATTR |
+ IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE)
+};
+
+int iwch_modify_qp(struct iwch_dev *rhp,
+ struct iwch_qp *qhp,
+ enum iwch_qp_attr_mask mask,
+ struct iwch_qp_attributes *attrs,
+ int internal);
+
+enum iwch_qp_state {
+ IWCH_QP_STATE_IDLE,
+ IWCH_QP_STATE_RTS,
+ IWCH_QP_STATE_ERROR,
+ IWCH_QP_STATE_TERMINATE,
+ IWCH_QP_STATE_CLOSING,
+ IWCH_QP_STATE_TOT
+};
+
+static __inline int
+iwch_convert_state(enum ib_qp_state ib_state)
+{
+ switch (ib_state) {
+ case IB_QPS_RESET:
+ case IB_QPS_INIT:
+ return IWCH_QP_STATE_IDLE;
+ case IB_QPS_RTS:
+ return IWCH_QP_STATE_RTS;
+ case IB_QPS_SQD:
+ return IWCH_QP_STATE_CLOSING;
+ case IB_QPS_SQE:
+ return IWCH_QP_STATE_TERMINATE;
+ case IB_QPS_ERR:
+ return IWCH_QP_STATE_ERROR;
+ default:
+ return -1;
+ }
+}
+
+static __inline u32
+iwch_ib_to_tpt_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) |
+ TPT_LOCAL_READ;
+}
+
+static __inline u32
+iwch_ib_to_mwbind_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_WRITE ? T3_MEM_ACCESS_REM_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? T3_MEM_ACCESS_REM_READ : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? T3_MEM_ACCESS_LOCAL_WRITE : 0) |
+ T3_MEM_ACCESS_LOCAL_READ;
+}
+
+enum iwch_mmid_state {
+ IWCH_STAG_STATE_VALID,
+ IWCH_STAG_STATE_INVALID
+};
+
+enum iwch_qp_query_flags {
+ IWCH_QP_QUERY_CONTEXT_NONE = 0x0, /* No ctx; Only attrs */
+ IWCH_QP_QUERY_CONTEXT_GET = 0x1, /* Get ctx + attrs */
+ IWCH_QP_QUERY_CONTEXT_SUSPEND = 0x2, /* Not Supported */
+
+ /*
+ * Quiesce QP context; Consumer
+ * will NOT replay outstanding WR
+ */
+ IWCH_QP_QUERY_CONTEXT_QUIESCE = 0x4,
+ IWCH_QP_QUERY_CONTEXT_REMOVE = 0x8,
+ IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */
+};
+
+int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+int iwch_bind_mw(struct ib_qp *qp,
+ struct ib_mw *mw,
+ struct ib_mw_bind *mw_bind);
+int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
+int iwch_register_device(struct iwch_dev *dev);
+void iwch_unregister_device(struct iwch_dev *dev);
+int iwch_quiesce_qps(struct iwch_cq *chp);
+int iwch_resume_qps(struct iwch_cq *chp);
+void stop_read_rep_timer(struct iwch_qp *qhp);
+int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ struct iwch_mr *mhp,
+ int shift,
+ __be64 *page_list);
+int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ struct iwch_mr *mhp,
+ int shift,
+ __be64 *page_list,
+ int npages);
+int build_phys_page_list(struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ u64 *iova_start,
+ u64 *total_size,
+ int *npages,
+ int *shift,
+ __be64 **page_list);
+
+
+#define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
+
+#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c
new file mode 100644
index 0000000000000..3c203f1b43375
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c
@@ -0,0 +1,1052 @@
+
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/module.h>
+#include <sys/pciio.h>
+#include <sys/conf.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <sys/bus_dma.h>
+#include <sys/rman.h>
+#include <sys/ioccom.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/linker.h>
+#include <sys/firmware.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+
+#include <netinet/in.h>
+
+#include <contrib/rdma/ib_verbs.h>
+#include <contrib/rdma/ib_umem.h>
+#include <contrib/rdma/ib_user_verbs.h>
+
+
+#ifdef CONFIG_DEFINED
+#include <cxgb_include.h>
+#include <ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <ulp/iw_cxgb/iw_cxgb.h>
+#include <ulp/iw_cxgb/iw_cxgb_resource.h>
+#include <ulp/iw_cxgb/iw_cxgb_user.h>
+#else
+#include <dev/cxgb/cxgb_include.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h>
+#endif
+
+
+#define NO_SUPPORT -1
+
+static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 * flit_cnt)
+{
+ int i;
+ u32 plen;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->send.rdmaop = T3_SEND_WITH_SE;
+ else
+ wqe->send.rdmaop = T3_SEND;
+ wqe->send.rem_stag = 0;
+ break;
+#if 0 /* Not currently supported */
+ case TYPE_SEND_INVALIDATE:
+ case TYPE_SEND_INVALIDATE_IMMEDIATE:
+ wqe->send.rdmaop = T3_SEND_WITH_INV;
+ wqe->send.rem_stag = htobe32(wr->wr.rdma.rkey);
+ break;
+ case TYPE_SEND_SE_INVALIDATE:
+ wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
+ wqe->send.rem_stag = htobe32(wr->wr.rdma.rkey);
+ break;
+#endif
+ default:
+ break;
+ }
+ if (wr->num_sge > T3_MAX_SGE)
+ return (-EINVAL);
+ wqe->send.reserved[0] = 0;
+ wqe->send.reserved[1] = 0;
+ wqe->send.reserved[2] = 0;
+ if (wr->opcode == IB_WR_SEND_WITH_IMM) {
+ plen = 4;
+ wqe->send.sgl[0].stag = wr->imm_data;
+ wqe->send.sgl[0].len = 0;
+ wqe->send.num_sgle = 0;
+ *flit_cnt = 5;
+ } else {
+ plen = 0;
+ for (i = 0; i < wr->num_sge; i++) {
+ if ((plen + wr->sg_list[i].length) < plen) {
+ return (-EMSGSIZE);
+ }
+ plen += wr->sg_list[i].length;
+ wqe->send.sgl[i].stag =
+ htobe32(wr->sg_list[i].lkey);
+ wqe->send.sgl[i].len =
+ htobe32(wr->sg_list[i].length);
+ wqe->send.sgl[i].to = htobe64(wr->sg_list[i].addr);
+ }
+ wqe->send.num_sgle = htobe32(wr->num_sge);
+ *flit_cnt = 4 + ((wr->num_sge) << 1);
+ }
+ wqe->send.plen = htobe32(plen);
+ return 0;
+}
+
+static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 *flit_cnt)
+{
+ int i;
+ u32 plen;
+
+ if (wr->num_sge > T3_MAX_SGE)
+ return (-EINVAL);
+ wqe->write.rdmaop = T3_RDMA_WRITE;
+ wqe->write.reserved[0] = 0;
+ wqe->write.reserved[1] = 0;
+ wqe->write.reserved[2] = 0;
+ wqe->write.stag_sink = htobe32(wr->wr.rdma.rkey);
+ wqe->write.to_sink = htobe64(wr->wr.rdma.remote_addr);
+
+ if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
+ plen = 4;
+ wqe->write.sgl[0].stag = wr->imm_data;
+ wqe->write.sgl[0].len = 0;
+ wqe->write.num_sgle = 0;
+ *flit_cnt = 6;
+ } else {
+ plen = 0;
+ for (i = 0; i < wr->num_sge; i++) {
+ if ((plen + wr->sg_list[i].length) < plen) {
+ return (-EMSGSIZE);
+ }
+ plen += wr->sg_list[i].length;
+ wqe->write.sgl[i].stag =
+ htobe32(wr->sg_list[i].lkey);
+ wqe->write.sgl[i].len =
+ htobe32(wr->sg_list[i].length);
+ wqe->write.sgl[i].to =
+ htobe64(wr->sg_list[i].addr);
+ }
+ wqe->write.num_sgle = htobe32(wr->num_sge);
+ *flit_cnt = 5 + ((wr->num_sge) << 1);
+ }
+ wqe->write.plen = htobe32(plen);
+ return 0;
+}
+
+static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
+ u8 *flit_cnt)
+{
+ if (wr->num_sge > 1)
+ return (-EINVAL);
+ wqe->read.rdmaop = T3_READ_REQ;
+ wqe->read.reserved[0] = 0;
+ wqe->read.reserved[1] = 0;
+ wqe->read.reserved[2] = 0;
+ wqe->read.rem_stag = htobe32(wr->wr.rdma.rkey);
+ wqe->read.rem_to = htobe64(wr->wr.rdma.remote_addr);
+ wqe->read.local_stag = htobe32(wr->sg_list[0].lkey);
+ wqe->read.local_len = htobe32(wr->sg_list[0].length);
+ wqe->read.local_to = htobe64(wr->sg_list[0].addr);
+ *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
+ return 0;
+}
+
+/*
+ * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
+ */
+static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
+ u32 num_sgle, u32 * pbl_addr, u8 * page_size)
+{
+ int i;
+ struct iwch_mr *mhp;
+ u32 offset;
+ for (i = 0; i < num_sgle; i++) {
+
+ mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
+ if (!mhp) {
+ CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
+ return (-EIO);
+ }
+ if (!mhp->attr.state) {
+ CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
+ return (-EIO);
+ }
+ if (mhp->attr.zbva) {
+ CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
+ return (-EIO);
+ }
+
+ if (sg_list[i].addr < mhp->attr.va_fbo) {
+ CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
+ return (-EINVAL);
+ }
+ if (sg_list[i].addr + ((u64) sg_list[i].length) <
+ sg_list[i].addr) {
+ CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
+ return (-EINVAL);
+ }
+ if (sg_list[i].addr + ((u64) sg_list[i].length) >
+ mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
+ CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
+ return (-EINVAL);
+ }
+ offset = sg_list[i].addr - mhp->attr.va_fbo;
+ offset += ((u32) mhp->attr.va_fbo) %
+ (1UL << (12 + mhp->attr.page_size));
+ pbl_addr[i] = ((mhp->attr.pbl_addr -
+ rhp->rdev.rnic_info.pbl_base) >> 3) +
+ (offset >> (12 + mhp->attr.page_size));
+ page_size[i] = mhp->attr.page_size;
+ }
+ return 0;
+}
+
+static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe,
+ struct ib_recv_wr *wr)
+{
+ int i;
+ if (wr->num_sge > T3_MAX_SGE)
+ return (-EINVAL);
+ wqe->recv.num_sgle = htobe32(wr->num_sge);
+ for (i = 0; i < wr->num_sge; i++) {
+ wqe->recv.sgl[i].stag = htobe32(wr->sg_list[i].lkey);
+ wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length);
+ wqe->recv.sgl[i].to = htobe64(wr->sg_list[i].addr);
+ }
+ for (; i < T3_MAX_SGE; i++) {
+ wqe->recv.sgl[i].stag = 0;
+ wqe->recv.sgl[i].len = 0;
+ wqe->recv.sgl[i].to = 0;
+ }
+ return 0;
+}
+
+int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ int err = 0;
+ u8 t3_wr_flit_cnt = 0;
+ enum t3_wr_opcode t3_wr_opcode = 0;
+ enum t3_wr_flags t3_wr_flags;
+ struct iwch_qp *qhp;
+ u32 idx;
+ union t3_wr *wqe;
+ u32 num_wrs;
+ struct t3_swsq *sqp;
+
+ qhp = to_iwch_qp(ibqp);
+ mtx_lock(&qhp->lock);
+ if (qhp->attr.state > IWCH_QP_STATE_RTS) {
+ mtx_unlock(&qhp->lock);
+ return (-EINVAL);
+ }
+ num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
+ qhp->wq.sq_size_log2);
+ if (num_wrs <= 0) {
+ mtx_unlock(&qhp->lock);
+ return (-ENOMEM);
+ }
+ while (wr) {
+ if (num_wrs == 0) {
+ err = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+ idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
+ wqe = (union t3_wr *) (qhp->wq.queue + idx);
+ t3_wr_flags = 0;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
+ if (wr->send_flags & IB_SEND_FENCE)
+ t3_wr_flags |= T3_READ_FENCE_FLAG;
+ if (wr->send_flags & IB_SEND_SIGNALED)
+ t3_wr_flags |= T3_COMPLETION_FLAG;
+ sqp = qhp->wq.sq +
+ Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ t3_wr_opcode = T3_WR_SEND;
+ err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
+ break;
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ t3_wr_opcode = T3_WR_WRITE;
+ err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
+ break;
+ case IB_WR_RDMA_READ:
+ t3_wr_opcode = T3_WR_READ;
+ t3_wr_flags = 0; /* T3 reads are always signaled */
+ err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
+ if (err)
+ break;
+ sqp->read_len = wqe->read.local_len;
+ if (!qhp->wq.oldest_read)
+ qhp->wq.oldest_read = sqp;
+ break;
+ default:
+ CTR2(KTR_IW_CXGB, "%s post of type=%d TBD!", __FUNCTION__,
+ wr->opcode);
+ err = -EINVAL;
+ }
+ if (err) {
+ *bad_wr = wr;
+ break;
+ }
+ wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
+ sqp->wr_id = wr->wr_id;
+ sqp->opcode = wr2opcode(t3_wr_opcode);
+ sqp->sq_wptr = qhp->wq.sq_wptr;
+ sqp->complete = 0;
+ sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED);
+
+ build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
+ Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
+ 0, t3_wr_flit_cnt);
+ CTR5(KTR_IW_CXGB, "%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d",
+ __FUNCTION__, (unsigned long long) wr->wr_id, idx,
+ Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
+ sqp->opcode);
+ wr = wr->next;
+ num_wrs--;
+ ++(qhp->wq.wptr);
+ ++(qhp->wq.sq_wptr);
+ }
+ mtx_unlock(&qhp->lock);
+ ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+ return err;
+}
+
+int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ int err = 0;
+ struct iwch_qp *qhp;
+ u32 idx;
+ union t3_wr *wqe;
+ u32 num_wrs;
+
+ qhp = to_iwch_qp(ibqp);
+ mtx_lock(&qhp->lock);
+ if (qhp->attr.state > IWCH_QP_STATE_RTS) {
+ mtx_unlock(&qhp->lock);
+ return (-EINVAL);
+ }
+ num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
+ qhp->wq.rq_size_log2) - 1;
+ if (!wr) {
+ mtx_unlock(&qhp->lock);
+ return (-EINVAL);
+ }
+ while (wr) {
+ idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
+ wqe = (union t3_wr *) (qhp->wq.queue + idx);
+ if (num_wrs)
+ err = iwch_build_rdma_recv(qhp->rhp, wqe, wr);
+ else
+ err = -ENOMEM;
+ if (err) {
+ *bad_wr = wr;
+ break;
+ }
+ qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] =
+ wr->wr_id;
+ build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
+ Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
+ 0, sizeof(struct t3_receive_wr) >> 3);
+ CTR6(KTR_IW_CXGB, "%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
+ "wqe %p ", __FUNCTION__, (unsigned long long) wr->wr_id,
+ idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
+ ++(qhp->wq.rq_wptr);
+ ++(qhp->wq.wptr);
+ wr = wr->next;
+ num_wrs--;
+ }
+ mtx_unlock(&qhp->lock);
+ ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+ return err;
+}
+
+int iwch_bind_mw(struct ib_qp *qp,
+ struct ib_mw *mw,
+ struct ib_mw_bind *mw_bind)
+{
+ struct iwch_dev *rhp;
+ struct iwch_mw *mhp;
+ struct iwch_qp *qhp;
+ union t3_wr *wqe;
+ u32 pbl_addr;
+ u8 page_size;
+ u32 num_wrs;
+ struct ib_sge sgl;
+ int err=0;
+ enum t3_wr_flags t3_wr_flags;
+ u32 idx;
+ struct t3_swsq *sqp;
+
+ qhp = to_iwch_qp(qp);
+ mhp = to_iwch_mw(mw);
+ rhp = qhp->rhp;
+
+ mtx_lock(&qhp->lock);
+ if (qhp->attr.state > IWCH_QP_STATE_RTS) {
+ mtx_unlock(&qhp->lock);
+ return (-EINVAL);
+ }
+ num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
+ qhp->wq.sq_size_log2);
+ if ((num_wrs) <= 0) {
+ mtx_unlock(&qhp->lock);
+ return (-ENOMEM);
+ }
+ idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
+ CTR4(KTR_IW_CXGB, "%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p", __FUNCTION__, idx,
+ mw, mw_bind);
+ wqe = (union t3_wr *) (qhp->wq.queue + idx);
+
+ t3_wr_flags = 0;
+ if (mw_bind->send_flags & IB_SEND_SIGNALED)
+ t3_wr_flags = T3_COMPLETION_FLAG;
+
+ sgl.addr = mw_bind->addr;
+ sgl.lkey = mw_bind->mr->lkey;
+ sgl.length = mw_bind->length;
+ wqe->bind.reserved = 0;
+ wqe->bind.type = T3_VA_BASED_TO;
+
+ /* TBD: check perms */
+ wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags);
+ wqe->bind.mr_stag = htobe32(mw_bind->mr->lkey);
+ wqe->bind.mw_stag = htobe32(mw->rkey);
+ wqe->bind.mw_len = htobe32(mw_bind->length);
+ wqe->bind.mw_va = htobe64(mw_bind->addr);
+ err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
+ if (err) {
+ mtx_unlock(&qhp->lock);
+ return (err);
+ }
+ wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
+ sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
+ sqp->wr_id = mw_bind->wr_id;
+ sqp->opcode = T3_BIND_MW;
+ sqp->sq_wptr = qhp->wq.sq_wptr;
+ sqp->complete = 0;
+ sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
+ wqe->bind.mr_pbl_addr = htobe32(pbl_addr);
+ wqe->bind.mr_pagesz = page_size;
+ wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id;
+ build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
+ Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
+ sizeof(struct t3_bind_mw_wr) >> 3);
+ ++(qhp->wq.wptr);
+ ++(qhp->wq.sq_wptr);
+ mtx_unlock(&qhp->lock);
+
+ ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+
+ return err;
+}
+
+static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
+ u8 *layer_type, u8 *ecode)
+{
+ int status = TPT_ERR_INTERNAL_ERR;
+ int tagged = 0;
+ int opcode = -1;
+ int rqtype = 0;
+ int send_inv = 0;
+
+ if (rsp_msg) {
+ status = CQE_STATUS(rsp_msg->cqe);
+ opcode = CQE_OPCODE(rsp_msg->cqe);
+ rqtype = RQ_TYPE(rsp_msg->cqe);
+ send_inv = (opcode == T3_SEND_WITH_INV) ||
+ (opcode == T3_SEND_WITH_SE_INV);
+ tagged = (opcode == T3_RDMA_WRITE) ||
+ (rqtype && (opcode == T3_READ_RESP));
+ }
+
+ switch (status) {
+ case TPT_ERR_STAG:
+ if (send_inv) {
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_CANT_INV_STAG;
+ } else {
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_INV_STAG;
+ }
+ break;
+ case TPT_ERR_PDID:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ if ((opcode == T3_SEND_WITH_INV) ||
+ (opcode == T3_SEND_WITH_SE_INV))
+ *ecode = RDMAP_CANT_INV_STAG;
+ else
+ *ecode = RDMAP_STAG_NOT_ASSOC;
+ break;
+ case TPT_ERR_QPID:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_STAG_NOT_ASSOC;
+ break;
+ case TPT_ERR_ACCESS:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_ACC_VIOL;
+ break;
+ case TPT_ERR_WRAP:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_TO_WRAP;
+ break;
+ case TPT_ERR_BOUND:
+ if (tagged) {
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_BASE_BOUNDS;
+ } else {
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_BASE_BOUNDS;
+ }
+ break;
+ case TPT_ERR_INVALIDATE_SHARED_MR:
+ case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_CANT_INV_STAG;
+ break;
+ case TPT_ERR_ECC:
+ case TPT_ERR_ECC_PSTAG:
+ case TPT_ERR_INTERNAL_ERR:
+ *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
+ *ecode = 0;
+ break;
+ case TPT_ERR_OUT_OF_RQE:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_MSN_NOBUF;
+ break;
+ case TPT_ERR_PBL_ADDR_BOUND:
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_BASE_BOUNDS;
+ break;
+ case TPT_ERR_CRC:
+ *layer_type = LAYER_MPA|DDP_LLP;
+ *ecode = MPA_CRC_ERR;
+ break;
+ case TPT_ERR_MARKER:
+ *layer_type = LAYER_MPA|DDP_LLP;
+ *ecode = MPA_MARKER_ERR;
+ break;
+ case TPT_ERR_PDU_LEN_ERR:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_MSG_TOOBIG;
+ break;
+ case TPT_ERR_DDP_VERSION:
+ if (tagged) {
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_INV_VERS;
+ } else {
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_VERS;
+ }
+ break;
+ case TPT_ERR_RDMA_VERSION:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_INV_VERS;
+ break;
+ case TPT_ERR_OPCODE:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_INV_OPCODE;
+ break;
+ case TPT_ERR_DDP_QUEUE_NUM:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_QN;
+ break;
+ case TPT_ERR_MSN:
+ case TPT_ERR_MSN_GAP:
+ case TPT_ERR_MSN_RANGE:
+ case TPT_ERR_IRD_OVERFLOW:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_MSN_RANGE;
+ break;
+ case TPT_ERR_TBIT:
+ *layer_type = LAYER_DDP|DDP_LOCAL_CATA;
+ *ecode = 0;
+ break;
+ case TPT_ERR_MO:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_MO;
+ break;
+ default:
+ *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
+ *ecode = 0;
+ break;
+ }
+}
+
+/*
+ * This posts a TERMINATE with layer=RDMA, type=catastrophic.
+ */
+int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
+{
+ union t3_wr *wqe;
+ struct terminate_message *term;
+ struct mbuf *m;
+
+ CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__);
+ m = m_gethdr(MT_DATA, M_NOWAIT);
+ if (!m) {
+ log(LOG_ERR, "%s cannot send TERMINATE!\n", __FUNCTION__);
+ return (-ENOMEM);
+ }
+ wqe = mtod(m, union t3_wr *);
+ m->m_len = m->m_pkthdr.len = 40;
+ memset(wqe, 0, 40);
+ wqe->send.rdmaop = T3_TERMINATE;
+
+ /* immediate data length */
+ wqe->send.plen = htonl(4);
+
+ /* immediate data starts here. */
+ term = (struct terminate_message *)wqe->send.sgl;
+ build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
+ wqe->send.wrh.op_seop_flags = htobe32(V_FW_RIWR_OP(T3_WR_SEND) |
+ V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
+ wqe->send.wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(qhp->ep->hwtid));
+
+ m_set_priority(m, CPL_PRIORITY_DATA);
+ m_set_sgl(m, NULL);
+ m_set_sgllen(m, 0);
+ return cxgb_ofld_send(qhp->rhp->rdev.t3cdev_p, m);
+}
+
+/*
+ * Assumes qhp lock is held.
+ */
+static void __flush_qp(struct iwch_qp *qhp)
+{
+ struct iwch_cq *rchp, *schp;
+ int count;
+
+ rchp = get_chp(qhp->rhp, qhp->attr.rcq);
+ schp = get_chp(qhp->rhp, qhp->attr.scq);
+
+ CTR4(KTR_IW_CXGB, "%s qhp %p rchp %p schp %p", __FUNCTION__, qhp, rchp, schp);
+ /* take a ref on the qhp since we must release the lock */
+ qhp->refcnt++;
+ mtx_unlock(&qhp->lock);
+
+ /* locking heirarchy: cq lock first, then qp lock. */
+ mtx_lock(&rchp->lock);
+ mtx_lock(&qhp->lock);
+ cxio_flush_hw_cq(&rchp->cq);
+ cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
+ cxio_flush_rq(&qhp->wq, &rchp->cq, count);
+ mtx_unlock(&qhp->lock);
+ mtx_unlock(&rchp->lock);
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+
+ /* locking heirarchy: cq lock first, then qp lock. */
+ mtx_lock(&schp->lock);
+ mtx_lock(&qhp->lock);
+ cxio_flush_hw_cq(&schp->cq);
+ cxio_count_scqes(&schp->cq, &qhp->wq, &count);
+ cxio_flush_sq(&qhp->wq, &schp->cq, count);
+ mtx_unlock(&qhp->lock);
+ mtx_unlock(&schp->lock);
+ (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
+
+ /* deref */
+ mtx_lock(&qhp->lock);
+ if (--qhp->refcnt == 0)
+ wakeup(qhp);
+}
+
+static void flush_qp(struct iwch_qp *qhp)
+{
+ if (qhp->ibqp.uobject)
+ cxio_set_wq_in_error(&qhp->wq);
+ else
+ __flush_qp(qhp);
+}
+
+
+/*
+ * Return non zero if at least one RECV was pre-posted.
+ */
+static int rqes_posted(struct iwch_qp *qhp)
+{
+ return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV;
+}
+
+static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
+ enum iwch_qp_attr_mask mask,
+ struct iwch_qp_attributes *attrs)
+{
+ struct t3_rdma_init_attr init_attr;
+ int ret;
+
+ init_attr.tid = qhp->ep->hwtid;
+ init_attr.qpid = qhp->wq.qpid;
+ init_attr.pdid = qhp->attr.pd;
+ init_attr.scqid = qhp->attr.scq;
+ init_attr.rcqid = qhp->attr.rcq;
+ init_attr.rq_addr = qhp->wq.rq_addr;
+ init_attr.rq_size = 1 << qhp->wq.rq_size_log2;
+ init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE |
+ qhp->attr.mpa_attr.recv_marker_enabled |
+ (qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
+ (qhp->attr.mpa_attr.crc_enabled << 2);
+
+ /*
+ * XXX - The IWCM doesn't quite handle getting these
+ * attrs set before going into RTS. For now, just turn
+ * them on always...
+ */
+#if 0
+ init_attr.qpcaps = qhp->attr.enableRdmaRead |
+ (qhp->attr.enableRdmaWrite << 1) |
+ (qhp->attr.enableBind << 2) |
+ (qhp->attr.enable_stag0_fastreg << 3) |
+ (qhp->attr.enable_stag0_fastreg << 4);
+#else
+ init_attr.qpcaps = 0x1f;
+#endif
+ init_attr.tcp_emss = qhp->ep->emss;
+ init_attr.ord = qhp->attr.max_ord;
+ init_attr.ird = qhp->attr.max_ird;
+ init_attr.qp_dma_addr = qhp->wq.dma_addr;
+ init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
+ init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
+ init_attr.irs = qhp->ep->rcv_seq;
+ CTR5(KTR_IW_CXGB, "%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
+ "flags 0x%x qpcaps 0x%x", __FUNCTION__,
+ init_attr.rq_addr, init_attr.rq_size,
+ init_attr.flags, init_attr.qpcaps);
+ ret = cxio_rdma_init(&rhp->rdev, &init_attr);
+ CTR2(KTR_IW_CXGB, "%s ret %d", __FUNCTION__, ret);
+ return ret;
+}
+
+int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
+ enum iwch_qp_attr_mask mask,
+ struct iwch_qp_attributes *attrs,
+ int internal)
+{
+ int ret = 0;
+ struct iwch_qp_attributes newattr = qhp->attr;
+ int disconnect = 0;
+ int terminate = 0;
+ int abort = 0;
+ int free = 0;
+ struct iwch_ep *ep = NULL;
+
+ CTR6(KTR_IW_CXGB, "%s qhp %p qpid 0x%x ep %p state %d -> %d", __FUNCTION__,
+ qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
+ (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
+
+ mtx_lock(&qhp->lock);
+
+ /* Process attr changes if in IDLE */
+ if (mask & IWCH_QP_ATTR_VALID_MODIFY) {
+ if (qhp->attr.state != IWCH_QP_STATE_IDLE) {
+ ret = -EIO;
+ goto out;
+ }
+ if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ)
+ newattr.enable_rdma_read = attrs->enable_rdma_read;
+ if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE)
+ newattr.enable_rdma_write = attrs->enable_rdma_write;
+ if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND)
+ newattr.enable_bind = attrs->enable_bind;
+ if (mask & IWCH_QP_ATTR_MAX_ORD) {
+ if (attrs->max_ord >
+ rhp->attr.max_rdma_read_qp_depth) {
+ ret = -EINVAL;
+ goto out;
+ }
+ newattr.max_ord = attrs->max_ord;
+ }
+ if (mask & IWCH_QP_ATTR_MAX_IRD) {
+ if (attrs->max_ird >
+ rhp->attr.max_rdma_reads_per_qp) {
+ ret = -EINVAL;
+ goto out;
+ }
+ newattr.max_ird = attrs->max_ird;
+ }
+ qhp->attr = newattr;
+ }
+
+ if (!(mask & IWCH_QP_ATTR_NEXT_STATE))
+ goto out;
+ if (qhp->attr.state == attrs->next_state)
+ goto out;
+
+ switch (qhp->attr.state) {
+ case IWCH_QP_STATE_IDLE:
+ switch (attrs->next_state) {
+ case IWCH_QP_STATE_RTS:
+ if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ qhp->attr.mpa_attr = attrs->mpa_attr;
+ qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
+ qhp->ep = qhp->attr.llp_stream_handle;
+ qhp->attr.state = IWCH_QP_STATE_RTS;
+
+ /*
+ * Ref the endpoint here and deref when we
+ * disassociate the endpoint from the QP. This
+ * happens in CLOSING->IDLE transition or *->ERROR
+ * transition.
+ */
+ get_ep(&qhp->ep->com);
+ mtx_unlock(&qhp->lock);
+ ret = rdma_init(rhp, qhp, mask, attrs);
+ mtx_lock(&qhp->lock);
+ if (ret)
+ goto err;
+ break;
+ case IWCH_QP_STATE_ERROR:
+ qhp->attr.state = IWCH_QP_STATE_ERROR;
+ flush_qp(qhp);
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
+ case IWCH_QP_STATE_RTS:
+ switch (attrs->next_state) {
+ case IWCH_QP_STATE_CLOSING:
+ PANIC_IF(atomic_load_acq_int(&qhp->ep->com.refcount) < 2);
+ qhp->attr.state = IWCH_QP_STATE_CLOSING;
+ if (!internal) {
+ abort=0;
+ disconnect = 1;
+ ep = qhp->ep;
+ }
+ flush_qp(qhp);
+ break;
+ case IWCH_QP_STATE_TERMINATE:
+ qhp->attr.state = IWCH_QP_STATE_TERMINATE;
+ if (qhp->ibqp.uobject)
+ cxio_set_wq_in_error(&qhp->wq);
+ if (!internal)
+ terminate = 1;
+ break;
+ case IWCH_QP_STATE_ERROR:
+ qhp->attr.state = IWCH_QP_STATE_ERROR;
+ if (!internal) {
+ abort=1;
+ disconnect = 1;
+ ep = qhp->ep;
+ }
+ goto err;
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
+ case IWCH_QP_STATE_CLOSING:
+ if (!internal) {
+ ret = -EINVAL;
+ goto out;
+ }
+ switch (attrs->next_state) {
+ case IWCH_QP_STATE_IDLE:
+ qhp->attr.state = IWCH_QP_STATE_IDLE;
+ qhp->attr.llp_stream_handle = NULL;
+ put_ep(&qhp->ep->com);
+ qhp->ep = NULL;
+ wakeup(qhp);
+ break;
+ case IWCH_QP_STATE_ERROR:
+ disconnect=1;
+ goto err;
+ default:
+ ret = -EINVAL;
+ goto err;
+ }
+ break;
+ case IWCH_QP_STATE_ERROR:
+ if (attrs->next_state != IWCH_QP_STATE_IDLE) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) ||
+ !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ qhp->attr.state = IWCH_QP_STATE_IDLE;
+ memset(&qhp->attr, 0, sizeof(qhp->attr));
+ break;
+ case IWCH_QP_STATE_TERMINATE:
+ if (!internal) {
+ ret = -EINVAL;
+ goto out;
+ }
+ goto err;
+ break;
+ default:
+ log(LOG_ERR, "%s in a bad state %d\n",
+ __FUNCTION__, qhp->attr.state);
+ ret = -EINVAL;
+ goto err;
+ break;
+ }
+ goto out;
+err:
+ CTR3(KTR_IW_CXGB, "%s disassociating ep %p qpid 0x%x", __FUNCTION__, qhp->ep,
+ qhp->wq.qpid);
+
+ /* disassociate the LLP connection */
+ qhp->attr.llp_stream_handle = NULL;
+ ep = qhp->ep;
+ qhp->ep = NULL;
+ qhp->attr.state = IWCH_QP_STATE_ERROR;
+ free=1;
+ wakeup(qhp);
+ PANIC_IF(!ep);
+ flush_qp(qhp);
+out:
+ mtx_unlock(&qhp->lock);
+
+ if (terminate)
+ iwch_post_terminate(qhp, NULL);
+
+ /*
+ * If disconnect is 1, then we need to initiate a disconnect
+ * on the EP. This can be a normal close (RTS->CLOSING) or
+ * an abnormal close (RTS/CLOSING->ERROR).
+ */
+ if (disconnect)
+ iwch_ep_disconnect(ep, abort, M_NOWAIT);
+
+ /*
+ * If free is 1, then we've disassociated the EP from the QP
+ * and we need to dereference the EP.
+ */
+ if (free)
+ put_ep(&ep->com);
+
+ CTR2(KTR_IW_CXGB, "%s exit state %d", __FUNCTION__, qhp->attr.state);
+ return ret;
+}
+
+static int quiesce_qp(struct iwch_qp *qhp)
+{
+ mtx_lock(&qhp->lock);
+ iwch_quiesce_tid(qhp->ep);
+ qhp->flags |= QP_QUIESCED;
+ mtx_unlock(&qhp->lock);
+ return 0;
+}
+
+static int resume_qp(struct iwch_qp *qhp)
+{
+ mtx_lock(&qhp->lock);
+ iwch_resume_tid(qhp->ep);
+ qhp->flags &= ~QP_QUIESCED;
+ mtx_lock(&qhp->lock);
+ return 0;
+}
+
+int iwch_quiesce_qps(struct iwch_cq *chp)
+{
+ int i;
+ struct iwch_qp *qhp;
+
+ for (i=0; i < T3_MAX_NUM_QP; i++) {
+ qhp = get_qhp(chp->rhp, i);
+ if (!qhp)
+ continue;
+ if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) {
+ quiesce_qp(qhp);
+ continue;
+ }
+ if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp))
+ quiesce_qp(qhp);
+ }
+ return 0;
+}
+
+int iwch_resume_qps(struct iwch_cq *chp)
+{
+ int i;
+ struct iwch_qp *qhp;
+
+ for (i=0; i < T3_MAX_NUM_QP; i++) {
+ qhp = get_qhp(chp->rhp, i);
+ if (!qhp)
+ continue;
+ if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) {
+ resume_qp(qhp);
+ continue;
+ }
+ if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp))
+ resume_qp(qhp);
+ }
+ return 0;
+}
+
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c
new file mode 100644
index 0000000000000..9d3618fbd57f0
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c
@@ -0,0 +1,382 @@
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/module.h>
+#include <sys/pciio.h>
+#include <sys/conf.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <sys/bus_dma.h>
+#include <sys/rman.h>
+#include <sys/ioccom.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/linker.h>
+#include <sys/firmware.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/libkern.h>
+
+#include <netinet/in.h>
+
+#include <contrib/rdma/ib_verbs.h>
+#include <contrib/rdma/ib_umem.h>
+#include <contrib/rdma/ib_user_verbs.h>
+
+
+#ifdef CONFIG_DEFINED
+#include <cxgb_include.h>
+#include <ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <ulp/iw_cxgb/iw_cxgb.h>
+#include <ulp/iw_cxgb/iw_cxgb_resource.h>
+#include <ulp/iw_cxgb/iw_cxgb_user.h>
+#else
+#include <dev/cxgb/cxgb_include.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h>
+#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h>
+#endif
+
+#ifdef needed
+static struct buf_ring *rhdl_fifo;
+static struct mtx rhdl_fifo_lock;
+#endif
+
+#define RANDOM_SIZE 16
+
+static int __cxio_init_resource_fifo(struct buf_ring **fifo,
+ struct mtx *fifo_lock,
+ u32 nr, u32 skip_low,
+ u32 skip_high,
+ int randomize)
+{
+ u32 i, j, idx;
+ u32 random_bytes;
+ u32 rarray[16];
+ mtx_init(fifo_lock, "cxio fifo", NULL, MTX_DEF|MTX_DUPOK);
+
+ *fifo = buf_ring_alloc(nr, M_NOWAIT);
+ if (*fifo == NULL)
+ return (-ENOMEM);
+#if 0
+ for (i = 0; i < skip_low + skip_high; i++) {
+ u32 entry = 0;
+
+ buf_ring_enqueue(*fifo, (uintptr_t) entry);
+ }
+#endif
+ if (randomize) {
+ j = 0;
+ random_bytes = random();
+ for (i = 0; i < RANDOM_SIZE; i++)
+ rarray[i] = i + skip_low;
+ for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) {
+ if (j >= RANDOM_SIZE) {
+ j = 0;
+ random_bytes = random();
+ }
+ idx = (random_bytes >> (j * 2)) & 0xF;
+ buf_ring_enqueue(*fifo, (void *)(uintptr_t)rarray[idx]);
+ rarray[idx] = i;
+ j++;
+ }
+ for (i = 0; i < RANDOM_SIZE; i++)
+ buf_ring_enqueue(*fifo, (void *) (uintptr_t)rarray[i]);
+ } else
+ for (i = skip_low; i < nr - skip_high; i++)
+ buf_ring_enqueue(*fifo, (void *) (uintptr_t)i);
+#if 0
+ for (i = 0; i < skip_low + skip_high; i++)
+ buf_ring_dequeue(*fifo);
+#endif
+ return 0;
+}
+
+static int cxio_init_resource_fifo(struct buf_ring **fifo, struct mtx * fifo_lock,
+ u32 nr, u32 skip_low, u32 skip_high)
+{
+ return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
+ skip_high, 0));
+}
+
+static int cxio_init_resource_fifo_random(struct buf_ring **fifo,
+ struct mtx * fifo_lock,
+ u32 nr, u32 skip_low, u32 skip_high)
+{
+
+ return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
+ skip_high, 1));
+}
+
+static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
+{
+ u32 i;
+
+ mtx_init(&rdev_p->rscp->qpid_fifo_lock, "qpid fifo", NULL, MTX_DEF);
+
+ rdev_p->rscp->qpid_fifo = buf_ring_alloc(T3_MAX_NUM_QP, M_NOWAIT);
+ if (rdev_p->rscp->qpid_fifo == NULL)
+ return (-ENOMEM);
+
+ for (i = 16; i < T3_MAX_NUM_QP; i++)
+ if (!(i & rdev_p->qpmask))
+ buf_ring_enqueue(rdev_p->rscp->qpid_fifo, (void *) (uintptr_t)i);
+ return 0;
+}
+
+#ifdef needed
+int cxio_hal_init_rhdl_resource(u32 nr_rhdl)
+{
+ return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1,
+ 0);
+}
+
+void cxio_hal_destroy_rhdl_resource(void)
+{
+ buf_ring_free(rhdl_fifo);
+}
+#endif
+
+/* nr_* must be power of 2 */
+int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
+ u32 nr_tpt, u32 nr_pbl,
+ u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid)
+{
+ int err = 0;
+ struct cxio_hal_resource *rscp;
+
+ rscp = malloc(sizeof(*rscp), M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (!rscp)
+ return (-ENOMEM);
+ rdev_p->rscp = rscp;
+ err = cxio_init_resource_fifo_random(&rscp->tpt_fifo,
+ &rscp->tpt_fifo_lock,
+ nr_tpt, 1, 0);
+ if (err)
+ goto tpt_err;
+ err = cxio_init_qpid_fifo(rdev_p);
+ if (err)
+ goto qpid_err;
+ err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock,
+ nr_cqid, 1, 0);
+ if (err)
+ goto cqid_err;
+ err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock,
+ nr_pdid, 1, 0);
+ if (err)
+ goto pdid_err;
+ return 0;
+pdid_err:
+ buf_ring_free(rscp->cqid_fifo);
+cqid_err:
+ buf_ring_free(rscp->qpid_fifo);
+qpid_err:
+ buf_ring_free(rscp->tpt_fifo);
+tpt_err:
+ return (-ENOMEM);
+}
+
+/*
+ * returns 0 if no resource available
+ */
+static u32 cxio_hal_get_resource(struct buf_ring *fifo, struct mtx *lock)
+{
+ u32 entry;
+
+ mtx_lock(lock);
+ entry = (u32)(uintptr_t)buf_ring_dequeue(fifo);
+ mtx_unlock(lock);
+ return entry;
+}
+
+static void cxio_hal_put_resource(struct buf_ring *fifo, u32 entry, struct mtx *lock)
+{
+ mtx_lock(lock);
+ buf_ring_enqueue(fifo, (void *) (uintptr_t)entry);
+ mtx_unlock(lock);
+}
+
+u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
+{
+ return cxio_hal_get_resource(rscp->tpt_fifo, &rscp->tpt_fifo_lock);
+}
+
+void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
+{
+ cxio_hal_put_resource(rscp->tpt_fifo, stag, &rscp->tpt_fifo_lock);
+}
+
+u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
+{
+ u32 qpid = cxio_hal_get_resource(rscp->qpid_fifo, &rscp->qpid_fifo_lock);
+ CTR2(KTR_IW_CXGB, "%s qpid 0x%x", __FUNCTION__, qpid);
+ return qpid;
+}
+
+void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
+{
+ CTR2(KTR_IW_CXGB, "%s qpid 0x%x", __FUNCTION__, qpid);
+ cxio_hal_put_resource(rscp->qpid_fifo, qpid, &rscp->qpid_fifo_lock);
+}
+
+u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
+{
+ return cxio_hal_get_resource(rscp->cqid_fifo, &rscp->cqid_fifo_lock);
+}
+
+void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
+{
+ cxio_hal_put_resource(rscp->cqid_fifo, cqid, &rscp->cqid_fifo_lock);
+}
+
+u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
+{
+ return cxio_hal_get_resource(rscp->pdid_fifo, &rscp->pdid_fifo_lock);
+}
+
+void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
+{
+ cxio_hal_put_resource(rscp->pdid_fifo, pdid, &rscp->pdid_fifo_lock);
+}
+
+void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
+{
+ buf_ring_free(rscp->tpt_fifo);
+ buf_ring_free(rscp->cqid_fifo);
+ buf_ring_free(rscp->qpid_fifo);
+ buf_ring_free(rscp->pdid_fifo);
+ free(rscp, M_DEVBUF);
+}
+
+/*
+ * PBL Memory Manager. Uses Linux generic allocator.
+ */
+
+#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */
+#define PBL_CHUNK 2*1024*1024
+
+u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
+{
+ unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size);
+ CTR3(KTR_IW_CXGB, "%s addr 0x%x size %d", __FUNCTION__, (u32)addr, size);
+ return (u32)addr;
+}
+
+void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
+{
+ CTR3(KTR_IW_CXGB, "%s addr 0x%x size %d", __FUNCTION__, addr, size);
+ gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size);
+}
+
+int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
+{
+
+ rdev_p->pbl_pool = gen_pool_create(rdev_p->rnic_info.pbl_base, MIN_PBL_SHIFT,
+ rdev_p->rnic_info.pbl_top - rdev_p->rnic_info.pbl_base);
+#if 0
+ if (rdev_p->pbl_pool) {
+
+ unsigned long i;
+ for (i = rdev_p->rnic_info.pbl_base;
+ i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1;
+ i += PBL_CHUNK)
+ gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1);
+ }
+#endif
+ return rdev_p->pbl_pool ? 0 : (-ENOMEM);
+}
+
+void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
+{
+ gen_pool_destroy(rdev_p->pbl_pool);
+}
+
+/*
+ * RQT Memory Manager. Uses Linux generic allocator.
+ */
+
+#define MIN_RQT_SHIFT 10 /* 1KB == mini RQT size (16 entries) */
+#define RQT_CHUNK 2*1024*1024
+
+u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size)
+{
+ unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6);
+ CTR3(KTR_IW_CXGB, "%s addr 0x%x size %d", __FUNCTION__, (u32)addr, size << 6);
+ return (u32)addr;
+}
+
+void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
+{
+ CTR3(KTR_IW_CXGB, "%s addr 0x%x size %d", __FUNCTION__, addr, size << 6);
+ gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6);
+}
+
+int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p)
+{
+
+ rdev_p->rqt_pool = gen_pool_create(rdev_p->rnic_info.rqt_base,
+ MIN_RQT_SHIFT, rdev_p->rnic_info.rqt_top - rdev_p->rnic_info.rqt_base);
+#if 0
+ if (rdev_p->rqt_pool) {
+ unsigned long i;
+
+ for (i = rdev_p->rnic_info.rqt_base;
+ i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1;
+ i += RQT_CHUNK)
+ gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1);
+ }
+#endif
+ return rdev_p->rqt_pool ? 0 : (-ENOMEM);
+}
+
+void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p)
+{
+ gen_pool_destroy(rdev_p->rqt_pool);
+}
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h
new file mode 100644
index 0000000000000..e0282a3453028
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h
@@ -0,0 +1,59 @@
+/**************************************************************************
+
+Copyright (c) 2007, 2008 Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+$FreeBSD$
+
+***************************************************************************/
+#ifndef __CXIO_RESOURCE_H__
+#define __CXIO_RESOURCE_H__
+
+extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl);
+extern void cxio_hal_destroy_rhdl_resource(void);
+extern int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
+ u32 nr_tpt, u32 nr_pbl,
+ u32 nr_rqt, u32 nr_qpid, u32 nr_cqid,
+ u32 nr_pdid);
+extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp);
+extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag);
+extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp);
+extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid);
+extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp);
+extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid);
+extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp);
+
+#define PBL_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.pbl_base )
+extern int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p);
+extern void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p);
+extern u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size);
+extern void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
+
+#define RQT_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.rqt_base )
+extern int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p);
+extern void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p);
+extern u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size);
+extern void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
+#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h
new file mode 100644
index 0000000000000..3086a6340a115
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2007, 2008 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+#ifndef __IWCH_USER_H__
+#define __IWCH_USER_H__
+
+#define IWCH_UVERBS_ABI_VERSION 1
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in uint64_t
+ * instead.
+ */
+struct iwch_create_cq_req {
+ uint64_t user_rptr_addr;
+};
+
+struct iwch_create_cq_resp {
+ uint64_t key;
+ uint32_t cqid;
+ uint32_t size_log2;
+};
+
+struct iwch_create_qp_resp {
+ uint64_t key;
+ uint64_t db_key;
+ uint32_t qpid;
+ uint32_t size_log2;
+ uint32_t sq_size_log2;
+ uint32_t rq_size_log2;
+};
+
+struct iwch_reg_user_mr_resp {
+ uint32_t pbl_addr;
+};
+#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h
new file mode 100644
index 0000000000000..bf8f2d609e041
--- /dev/null
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h
@@ -0,0 +1,684 @@
+/**************************************************************************
+
+Copyright (c) 2007, 2008 Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+$FreeBSD$
+
+***************************************************************************/
+#ifndef __CXIO_WR_H__
+#define __CXIO_WR_H__
+#define T3_MAX_SGE 4
+#define T3_MAX_INLINE 64
+
+#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr))
+#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \
+ ((rptr)!=(wptr)) )
+#define Q_GENBIT(ptr,size_log2) (!(((ptr)>>size_log2)&0x1))
+#define Q_FREECNT(rptr,wptr,size_log2) ((1UL<<size_log2)-((wptr)-(rptr)))
+#define Q_COUNT(rptr,wptr) ((wptr)-(rptr))
+#define Q_PTR2IDX(ptr,size_log2) (ptr & ((1UL<<size_log2)-1))
+
+static __inline void
+ring_doorbell(void /* __iomem */ *doorbell, u32 qpid)
+{
+ writel(doorbell, ((1<<31) | qpid));
+}
+
+#define SEQ32_GE(x,y) (!( (((u32) (x)) - ((u32) (y))) & 0x80000000 ))
+
+enum t3_wr_flags {
+ T3_COMPLETION_FLAG = 0x01,
+ T3_NOTIFY_FLAG = 0x02,
+ T3_SOLICITED_EVENT_FLAG = 0x04,
+ T3_READ_FENCE_FLAG = 0x08,
+ T3_LOCAL_FENCE_FLAG = 0x10
+} __attribute__ ((packed));
+
+enum t3_wr_opcode {
+ T3_WR_BP = FW_WROPCODE_RI_BYPASS,
+ T3_WR_SEND = FW_WROPCODE_RI_SEND,
+ T3_WR_WRITE = FW_WROPCODE_RI_RDMA_WRITE,
+ T3_WR_READ = FW_WROPCODE_RI_RDMA_READ,
+ T3_WR_INV_STAG = FW_WROPCODE_RI_LOCAL_INV,
+ T3_WR_BIND = FW_WROPCODE_RI_BIND_MW,
+ T3_WR_RCV = FW_WROPCODE_RI_RECEIVE,
+ T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT,
+ T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP
+} __attribute__ ((packed));
+
+enum t3_rdma_opcode {
+ T3_RDMA_WRITE, /* IETF RDMAP v1.0 ... */
+ T3_READ_REQ,
+ T3_READ_RESP,
+ T3_SEND,
+ T3_SEND_WITH_INV,
+ T3_SEND_WITH_SE,
+ T3_SEND_WITH_SE_INV,
+ T3_TERMINATE,
+ T3_RDMA_INIT, /* CHELSIO RI specific ... */
+ T3_BIND_MW,
+ T3_FAST_REGISTER,
+ T3_LOCAL_INV,
+ T3_QP_MOD,
+ T3_BYPASS
+} __attribute__ ((packed));
+
+static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop)
+{
+ switch (wrop) {
+ case T3_WR_BP: return T3_BYPASS;
+ case T3_WR_SEND: return T3_SEND;
+ case T3_WR_WRITE: return T3_RDMA_WRITE;
+ case T3_WR_READ: return T3_READ_REQ;
+ case T3_WR_INV_STAG: return T3_LOCAL_INV;
+ case T3_WR_BIND: return T3_BIND_MW;
+ case T3_WR_INIT: return T3_RDMA_INIT;
+ case T3_WR_QP_MOD: return T3_QP_MOD;
+ default: break;
+ }
+ return -1;
+}
+
+
+/* Work request id */
+union t3_wrid {
+ struct {
+ u32 hi;
+ u32 low;
+ } id0;
+ u64 id1;
+};
+
+#define WRID(wrid) (wrid.id1)
+#define WRID_GEN(wrid) (wrid.id0.wr_gen)
+#define WRID_IDX(wrid) (wrid.id0.wr_idx)
+#define WRID_LO(wrid) (wrid.id0.wr_lo)
+
+struct fw_riwrh {
+ __be32 op_seop_flags;
+ __be32 gen_tid_len;
+};
+
+#define S_FW_RIWR_OP 24
+#define M_FW_RIWR_OP 0xff
+#define V_FW_RIWR_OP(x) ((x) << S_FW_RIWR_OP)
+#define G_FW_RIWR_OP(x) ((((x) >> S_FW_RIWR_OP)) & M_FW_RIWR_OP)
+
+#define S_FW_RIWR_SOPEOP 22
+#define M_FW_RIWR_SOPEOP 0x3
+#define V_FW_RIWR_SOPEOP(x) ((x) << S_FW_RIWR_SOPEOP)
+
+#define S_FW_RIWR_FLAGS 8
+#define M_FW_RIWR_FLAGS 0x3fffff
+#define V_FW_RIWR_FLAGS(x) ((x) << S_FW_RIWR_FLAGS)
+#define G_FW_RIWR_FLAGS(x) ((((x) >> S_FW_RIWR_FLAGS)) & M_FW_RIWR_FLAGS)
+
+#define S_FW_RIWR_TID 8
+#define V_FW_RIWR_TID(x) ((x) << S_FW_RIWR_TID)
+
+#define S_FW_RIWR_LEN 0
+#define V_FW_RIWR_LEN(x) ((x) << S_FW_RIWR_LEN)
+
+#define S_FW_RIWR_GEN 31
+#define V_FW_RIWR_GEN(x) ((x) << S_FW_RIWR_GEN)
+
+struct t3_sge {
+ __be32 stag;
+ __be32 len;
+ __be64 to;
+};
+
+/* If num_sgle is zero, flit 5+ contains immediate data.*/
+struct t3_send_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+
+ u8 rdmaop; /* 2 */
+ u8 reserved[3];
+ __be32 rem_stag;
+ __be32 plen; /* 3 */
+ __be32 num_sgle;
+ struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */
+};
+
+struct t3_local_inv_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ __be32 stag; /* 2 */
+ __be32 reserved3;
+};
+
+struct t3_rdma_write_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ u8 rdmaop; /* 2 */
+ u8 reserved[3];
+ __be32 stag_sink;
+ __be64 to_sink; /* 3 */
+ __be32 plen; /* 4 */
+ __be32 num_sgle;
+ struct t3_sge sgl[T3_MAX_SGE]; /* 5+ */
+};
+
+struct t3_rdma_read_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ u8 rdmaop; /* 2 */
+ u8 reserved[3];
+ __be32 rem_stag;
+ __be64 rem_to; /* 3 */
+ __be32 local_stag; /* 4 */
+ __be32 local_len;
+ __be64 local_to; /* 5 */
+};
+
+enum t3_addr_type {
+ T3_VA_BASED_TO = 0x0,
+ T3_ZERO_BASED_TO = 0x1
+} __attribute__ ((packed));
+
+enum t3_mem_perms {
+ T3_MEM_ACCESS_LOCAL_READ = 0x1,
+ T3_MEM_ACCESS_LOCAL_WRITE = 0x2,
+ T3_MEM_ACCESS_REM_READ = 0x4,
+ T3_MEM_ACCESS_REM_WRITE = 0x8
+} __attribute__ ((packed));
+
+struct t3_bind_mw_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ u16 reserved; /* 2 */
+ u8 type;
+ u8 perms;
+ __be32 mr_stag;
+ __be32 mw_stag; /* 3 */
+ __be32 mw_len;
+ __be64 mw_va; /* 4 */
+ __be32 mr_pbl_addr; /* 5 */
+ u8 reserved2[3];
+ u8 mr_pagesz;
+};
+
+struct t3_receive_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ u8 pagesz[T3_MAX_SGE];
+ __be32 num_sgle; /* 2 */
+ struct t3_sge sgl[T3_MAX_SGE]; /* 3+ */
+ __be32 pbl_addr[T3_MAX_SGE];
+};
+
+struct t3_bypass_wr {
+ struct fw_riwrh wrh;
+ union t3_wrid wrid; /* 1 */
+};
+
+struct t3_modify_qp_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ __be32 flags; /* 2 */
+ __be32 quiesce; /* 2 */
+ __be32 max_ird; /* 3 */
+ __be32 max_ord; /* 3 */
+ __be64 sge_cmd; /* 4 */
+ __be64 ctx1; /* 5 */
+ __be64 ctx0; /* 6 */
+};
+
+enum t3_modify_qp_flags {
+ MODQP_QUIESCE = 0x01,
+ MODQP_MAX_IRD = 0x02,
+ MODQP_MAX_ORD = 0x04,
+ MODQP_WRITE_EC = 0x08,
+ MODQP_READ_EC = 0x10,
+};
+
+
+enum t3_mpa_attrs {
+ uP_RI_MPA_RX_MARKER_ENABLE = 0x1,
+ uP_RI_MPA_TX_MARKER_ENABLE = 0x2,
+ uP_RI_MPA_CRC_ENABLE = 0x4,
+ uP_RI_MPA_IETF_ENABLE = 0x8
+} __attribute__ ((packed));
+
+enum t3_qp_caps {
+ uP_RI_QP_RDMA_READ_ENABLE = 0x01,
+ uP_RI_QP_RDMA_WRITE_ENABLE = 0x02,
+ uP_RI_QP_BIND_ENABLE = 0x04,
+ uP_RI_QP_FAST_REGISTER_ENABLE = 0x08,
+ uP_RI_QP_STAG0_ENABLE = 0x10
+} __attribute__ ((packed));
+
+struct t3_rdma_init_attr {
+ u32 tid;
+ u32 qpid;
+ u32 pdid;
+ u32 scqid;
+ u32 rcqid;
+ u32 rq_addr;
+ u32 rq_size;
+ enum t3_mpa_attrs mpaattrs;
+ enum t3_qp_caps qpcaps;
+ u16 tcp_emss;
+ u32 ord;
+ u32 ird;
+ u64 qp_dma_addr;
+ u32 qp_dma_size;
+ u32 flags;
+ u32 irs;
+};
+
+struct t3_rdma_init_wr {
+ struct fw_riwrh wrh; /* 0 */
+ union t3_wrid wrid; /* 1 */
+ __be32 qpid; /* 2 */
+ __be32 pdid;
+ __be32 scqid; /* 3 */
+ __be32 rcqid;
+ __be32 rq_addr; /* 4 */
+ __be32 rq_size;
+ u8 mpaattrs; /* 5 */
+ u8 qpcaps;
+ __be16 ulpdu_size;
+ __be32 flags; /* bits 31-1 - reservered */
+ /* bit 0 - set if RECV posted */
+ __be32 ord; /* 6 */
+ __be32 ird;
+ __be64 qp_dma_addr; /* 7 */
+ __be32 qp_dma_size; /* 8 */
+ u32 irs;
+};
+
+struct t3_genbit {
+ u64 flit[15];
+ __be64 genbit;
+};
+
+enum rdma_init_wr_flags {
+ RECVS_POSTED = 1,
+};
+
+union t3_wr {
+ struct t3_send_wr send;
+ struct t3_rdma_write_wr write;
+ struct t3_rdma_read_wr read;
+ struct t3_receive_wr recv;
+ struct t3_local_inv_wr local_inv;
+ struct t3_bind_mw_wr bind;
+ struct t3_bypass_wr bypass;
+ struct t3_rdma_init_wr init;
+ struct t3_modify_qp_wr qp_mod;
+ struct t3_genbit genbit;
+ u64 flit[16];
+};
+
+#define T3_SQ_CQE_FLIT 13
+#define T3_SQ_COOKIE_FLIT 14
+
+#define T3_RQ_COOKIE_FLIT 13
+#define T3_RQ_CQE_FLIT 14
+
+static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe)
+{
+ return G_FW_RIWR_OP(be32toh(wqe->op_seop_flags));
+}
+
+static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op,
+ enum t3_wr_flags flags, u8 genbit, u32 tid,
+ u8 len)
+{
+ wqe->op_seop_flags = htobe32(V_FW_RIWR_OP(op) |
+ V_FW_RIWR_SOPEOP(M_FW_RIWR_SOPEOP) |
+ V_FW_RIWR_FLAGS(flags));
+ wmb();
+ wqe->gen_tid_len = htobe32(V_FW_RIWR_GEN(genbit) |
+ V_FW_RIWR_TID(tid) |
+ V_FW_RIWR_LEN(len));
+ /* 2nd gen bit... */
+ ((union t3_wr *)wqe)->genbit.genbit = htobe64(genbit);
+}
+
+/*
+ * T3 ULP2_TX commands
+ */
+enum t3_utx_mem_op {
+ T3_UTX_MEM_READ = 2,
+ T3_UTX_MEM_WRITE = 3
+};
+
+/* T3 MC7 RDMA TPT entry format */
+
+enum tpt_mem_type {
+ TPT_NON_SHARED_MR = 0x0,
+ TPT_SHARED_MR = 0x1,
+ TPT_MW = 0x2,
+ TPT_MW_RELAXED_PROTECTION = 0x3
+};
+
+enum tpt_addr_type {
+ TPT_ZBTO = 0,
+ TPT_VATO = 1
+};
+
+enum tpt_mem_perm {
+ TPT_LOCAL_READ = 0x8,
+ TPT_LOCAL_WRITE = 0x4,
+ TPT_REMOTE_READ = 0x2,
+ TPT_REMOTE_WRITE = 0x1
+};
+
+struct tpt_entry {
+ __be32 valid_stag_pdid;
+ __be32 flags_pagesize_qpid;
+
+ __be32 rsvd_pbl_addr;
+ __be32 len;
+ __be32 va_hi;
+ __be32 va_low_or_fbo;
+
+ __be32 rsvd_bind_cnt_or_pstag;
+ __be32 rsvd_pbl_size;
+};
+
+#define S_TPT_VALID 31
+#define V_TPT_VALID(x) ((x) << S_TPT_VALID)
+#define F_TPT_VALID V_TPT_VALID(1U)
+
+#define S_TPT_STAG_KEY 23
+#define M_TPT_STAG_KEY 0xFF
+#define V_TPT_STAG_KEY(x) ((x) << S_TPT_STAG_KEY)
+#define G_TPT_STAG_KEY(x) (((x) >> S_TPT_STAG_KEY) & M_TPT_STAG_KEY)
+
+#define S_TPT_STAG_STATE 22
+#define V_TPT_STAG_STATE(x) ((x) << S_TPT_STAG_STATE)
+#define F_TPT_STAG_STATE V_TPT_STAG_STATE(1U)
+
+#define S_TPT_STAG_TYPE 20
+#define M_TPT_STAG_TYPE 0x3
+#define V_TPT_STAG_TYPE(x) ((x) << S_TPT_STAG_TYPE)
+#define G_TPT_STAG_TYPE(x) (((x) >> S_TPT_STAG_TYPE) & M_TPT_STAG_TYPE)
+
+#define S_TPT_PDID 0
+#define M_TPT_PDID 0xFFFFF
+#define V_TPT_PDID(x) ((x) << S_TPT_PDID)
+#define G_TPT_PDID(x) (((x) >> S_TPT_PDID) & M_TPT_PDID)
+
+#define S_TPT_PERM 28
+#define M_TPT_PERM 0xF
+#define V_TPT_PERM(x) ((x) << S_TPT_PERM)
+#define G_TPT_PERM(x) (((x) >> S_TPT_PERM) & M_TPT_PERM)
+
+#define S_TPT_REM_INV_DIS 27
+#define V_TPT_REM_INV_DIS(x) ((x) << S_TPT_REM_INV_DIS)
+#define F_TPT_REM_INV_DIS V_TPT_REM_INV_DIS(1U)
+
+#define S_TPT_ADDR_TYPE 26
+#define V_TPT_ADDR_TYPE(x) ((x) << S_TPT_ADDR_TYPE)
+#define F_TPT_ADDR_TYPE V_TPT_ADDR_TYPE(1U)
+
+#define S_TPT_MW_BIND_ENABLE 25
+#define V_TPT_MW_BIND_ENABLE(x) ((x) << S_TPT_MW_BIND_ENABLE)
+#define F_TPT_MW_BIND_ENABLE V_TPT_MW_BIND_ENABLE(1U)
+
+#define S_TPT_PAGE_SIZE 20
+#define M_TPT_PAGE_SIZE 0x1F
+#define V_TPT_PAGE_SIZE(x) ((x) << S_TPT_PAGE_SIZE)
+#define G_TPT_PAGE_SIZE(x) (((x) >> S_TPT_PAGE_SIZE) & M_TPT_PAGE_SIZE)
+
+#define S_TPT_PBL_ADDR 0
+#define M_TPT_PBL_ADDR 0x1FFFFFFF
+#define V_TPT_PBL_ADDR(x) ((x) << S_TPT_PBL_ADDR)
+#define G_TPT_PBL_ADDR(x) (((x) >> S_TPT_PBL_ADDR) & M_TPT_PBL_ADDR)
+
+#define S_TPT_QPID 0
+#define M_TPT_QPID 0xFFFFF
+#define V_TPT_QPID(x) ((x) << S_TPT_QPID)
+#define G_TPT_QPID(x) (((x) >> S_TPT_QPID) & M_TPT_QPID)
+
+#define S_TPT_PSTAG 0
+#define M_TPT_PSTAG 0xFFFFFF
+#define V_TPT_PSTAG(x) ((x) << S_TPT_PSTAG)
+#define G_TPT_PSTAG(x) (((x) >> S_TPT_PSTAG) & M_TPT_PSTAG)
+
+#define S_TPT_PBL_SIZE 0
+#define M_TPT_PBL_SIZE 0xFFFFF
+#define V_TPT_PBL_SIZE(x) ((x) << S_TPT_PBL_SIZE)
+#define G_TPT_PBL_SIZE(x) (((x) >> S_TPT_PBL_SIZE) & M_TPT_PBL_SIZE)
+
+/*
+ * CQE defs
+ */
+struct t3_cqe {
+ __be32 header;
+ __be32 len;
+ union {
+ struct {
+ __be32 stag;
+ __be32 msn;
+ } rcqe;
+ struct {
+ u32 wrid_hi;
+ u32 wrid_low;
+ } scqe;
+ } u;
+};
+
+#define S_CQE_OOO 31
+#define M_CQE_OOO 0x1
+#define G_CQE_OOO(x) ((((x) >> S_CQE_OOO)) & M_CQE_OOO)
+#define V_CEQ_OOO(x) ((x)<<S_CQE_OOO)
+
+#define S_CQE_QPID 12
+#define M_CQE_QPID 0x7FFFF
+#define G_CQE_QPID(x) ((((x) >> S_CQE_QPID)) & M_CQE_QPID)
+#define V_CQE_QPID(x) ((x)<<S_CQE_QPID)
+
+#define S_CQE_SWCQE 11
+#define M_CQE_SWCQE 0x1
+#define G_CQE_SWCQE(x) ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE)
+#define V_CQE_SWCQE(x) ((x)<<S_CQE_SWCQE)
+
+#define S_CQE_GENBIT 10
+#define M_CQE_GENBIT 0x1
+#define G_CQE_GENBIT(x) (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT)
+#define V_CQE_GENBIT(x) ((x)<<S_CQE_GENBIT)
+
+#define S_CQE_STATUS 5
+#define M_CQE_STATUS 0x1F
+#define G_CQE_STATUS(x) ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS)
+#define V_CQE_STATUS(x) ((x)<<S_CQE_STATUS)
+
+#define S_CQE_TYPE 4
+#define M_CQE_TYPE 0x1
+#define G_CQE_TYPE(x) ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE)
+#define V_CQE_TYPE(x) ((x)<<S_CQE_TYPE)
+
+#define S_CQE_OPCODE 0
+#define M_CQE_OPCODE 0xF
+#define G_CQE_OPCODE(x) ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE)
+#define V_CQE_OPCODE(x) ((x)<<S_CQE_OPCODE)
+
+#define SW_CQE(x) (G_CQE_SWCQE(be32toh((x).header)))
+#define CQE_OOO(x) (G_CQE_OOO(be32toh((x).header)))
+#define CQE_QPID(x) (G_CQE_QPID(be32toh((x).header)))
+#define CQE_GENBIT(x) (G_CQE_GENBIT(be32toh((x).header)))
+#define CQE_TYPE(x) (G_CQE_TYPE(be32toh((x).header)))
+#define SQ_TYPE(x) (CQE_TYPE((x)))
+#define RQ_TYPE(x) (!CQE_TYPE((x)))
+#define CQE_STATUS(x) (G_CQE_STATUS(be32toh((x).header)))
+#define CQE_OPCODE(x) (G_CQE_OPCODE(be32toh((x).header)))
+
+#define CQE_LEN(x) (be32toh((x).len))
+
+/* used for RQ completion processing */
+#define CQE_WRID_STAG(x) (be32toh((x).u.rcqe.stag))
+#define CQE_WRID_MSN(x) (be32toh((x).u.rcqe.msn))
+
+/* used for SQ completion processing */
+#define CQE_WRID_SQ_WPTR(x) ((x).u.scqe.wrid_hi)
+#define CQE_WRID_WPTR(x) ((x).u.scqe.wrid_low)
+
+/* generic accessor macros */
+#define CQE_WRID_HI(x) ((x).u.scqe.wrid_hi)
+#define CQE_WRID_LOW(x) ((x).u.scqe.wrid_low)
+
+#define TPT_ERR_SUCCESS 0x0
+#define TPT_ERR_STAG 0x1 /* STAG invalid: either the */
+ /* STAG is offlimt, being 0, */
+ /* or STAG_key mismatch */
+#define TPT_ERR_PDID 0x2 /* PDID mismatch */
+#define TPT_ERR_QPID 0x3 /* QPID mismatch */
+#define TPT_ERR_ACCESS 0x4 /* Invalid access right */
+#define TPT_ERR_WRAP 0x5 /* Wrap error */
+#define TPT_ERR_BOUND 0x6 /* base and bounds voilation */
+#define TPT_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */
+ /* shared memory region */
+#define TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */
+ /* shared memory region */
+#define TPT_ERR_ECC 0x9 /* ECC error detected */
+#define TPT_ERR_ECC_PSTAG 0xA /* ECC error detected when */
+ /* reading PSTAG for a MW */
+ /* Invalidate */
+#define TPT_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */
+ /* software error */
+#define TPT_ERR_SWFLUSH 0xC /* SW FLUSHED */
+#define TPT_ERR_CRC 0x10 /* CRC error */
+#define TPT_ERR_MARKER 0x11 /* Marker error */
+#define TPT_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */
+#define TPT_ERR_OUT_OF_RQE 0x13 /* out of RQE */
+#define TPT_ERR_DDP_VERSION 0x14 /* wrong DDP version */
+#define TPT_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */
+#define TPT_ERR_OPCODE 0x16 /* invalid rdma opcode */
+#define TPT_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */
+#define TPT_ERR_MSN 0x18 /* MSN error */
+#define TPT_ERR_TBIT 0x19 /* tag bit not set correctly */
+#define TPT_ERR_MO 0x1A /* MO not 0 for TERMINATE */
+ /* or READ_REQ */
+#define TPT_ERR_MSN_GAP 0x1B
+#define TPT_ERR_MSN_RANGE 0x1C
+#define TPT_ERR_IRD_OVERFLOW 0x1D
+#define TPT_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */
+ /* software error */
+#define TPT_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */
+ /* mismatch) */
+
+struct t3_swsq {
+ uint64_t wr_id;
+ struct t3_cqe cqe;
+ uint32_t sq_wptr;
+ uint32_t read_len;
+ int opcode;
+ int complete;
+ int signaled;
+};
+
+/*
+ * A T3 WQ implements both the SQ and RQ.
+ */
+struct t3_wq {
+ union t3_wr *queue; /* DMA accessable memory */
+ bus_addr_t dma_addr; /* DMA address for HW */
+#ifdef notyet
+ DECLARE_PCI_UNMAP_ADDR(mapping) /* unmap kruft */
+#endif
+ u32 error; /* 1 once we go to ERROR */
+ u32 qpid;
+ u32 wptr; /* idx to next available WR slot */
+ u32 size_log2; /* total wq size */
+ struct t3_swsq *sq; /* SW SQ */
+ struct t3_swsq *oldest_read; /* tracks oldest pending read */
+ u32 sq_wptr; /* sq_wptr - sq_rptr == count of */
+ u32 sq_rptr; /* pending wrs */
+ u32 sq_size_log2; /* sq size */
+ u64 *rq; /* SW RQ (holds consumer wr_ids */
+ u32 rq_wptr; /* rq_wptr - rq_rptr == count of */
+ u32 rq_rptr; /* pending wrs */
+ u64 *rq_oldest_wr; /* oldest wr on the SW RQ */
+ u32 rq_size_log2; /* rq size */
+ u32 rq_addr; /* rq adapter address */
+ void /* __iomem */ *doorbell; /* kernel db */
+ u64 udb; /* user db if any */
+};
+
+struct t3_cq {
+ u32 cqid;
+ u32 rptr;
+ u32 wptr;
+ u32 size_log2;
+ bus_addr_t dma_addr;
+#ifdef notyet
+ DECLARE_PCI_UNMAP_ADDR(mapping)
+#endif
+ struct t3_cqe *queue;
+ struct t3_cqe *sw_queue;
+ u32 sw_rptr;
+ u32 sw_wptr;
+};
+
+#define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \
+ CQE_GENBIT(*cqe))
+
+static inline void cxio_set_wq_in_error(struct t3_wq *wq)
+{
+ wq->queue->flit[13] = 1;
+}
+
+static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
+{
+ struct t3_cqe *cqe;
+
+ cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
+ if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
+ return cqe;
+ return NULL;
+}
+
+static inline struct t3_cqe *cxio_next_sw_cqe(struct t3_cq *cq)
+{
+ struct t3_cqe *cqe;
+
+ if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
+ cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
+ return cqe;
+ }
+ return NULL;
+}
+
+static inline struct t3_cqe *cxio_next_cqe(struct t3_cq *cq)
+{
+ struct t3_cqe *cqe;
+
+ if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
+ cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
+ return cqe;
+ }
+ cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
+ if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
+ return cqe;
+ return NULL;
+}
+
+#endif
diff --git a/sys/dev/cxgb/ulp/toecore/cxgb_toedev.h b/sys/dev/cxgb/ulp/toecore/cxgb_toedev.h
index 6c97a27f674b8..56ccda949beb8 100644
--- a/sys/dev/cxgb/ulp/toecore/cxgb_toedev.h
+++ b/sys/dev/cxgb/ulp/toecore/cxgb_toedev.h
@@ -31,7 +31,10 @@ $FreeBSD$
***************************************************************************/
#ifndef _CXGB_TOEDEV_H_
-#define _CXGB_TOEDEV_H_
+#define _CXGB_TOEDEV_H_
+#ifdef notyet
+#include <netinet/toedev.h>
+#endif
/* offload type ids */
enum {
diff --git a/sys/dev/cxgb/ulp/toecore/toedev.c b/sys/dev/cxgb/ulp/toecore/toedev.c
new file mode 100644
index 0000000000000..07a0d6e94feb9
--- /dev/null
+++ b/sys/dev/cxgb/ulp/toecore/toedev.c
@@ -0,0 +1,424 @@
+
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/module.h>
+#include <sys/queue.h>
+#include <sys/mbuf.h>
+#include <sys/proc.h>
+
+#include <sys/socket.h>
+#include <sys/sockio.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/route.h>
+
+
+/*
+ * XXX
+ */
+#ifdef CONFIG_DEFINED
+#include <cxgb_include.h>
+#else
+#include <dev/cxgb/cxgb_include.h>
+#endif
+
+
+
+static struct mtx offload_db_lock;
+static TAILQ_HEAD(, toedev) offload_dev_list;
+static TAILQ_HEAD(, tom_info) offload_module_list;
+
+/*
+ * Returns the entry in the given table with the given offload id, or NULL
+ * if the id is not found.
+ */
+static const struct offload_id *
+id_find(unsigned int id, const struct offload_id *table)
+{
+ for ( ; table->id; ++table)
+ if (table->id == id)
+ return table;
+ return NULL;
+}
+
+/*
+ * Returns true if an offload device is presently attached to an offload module.
+ */
+static inline int
+is_attached(const struct toedev *dev)
+{
+ return dev->tod_offload_mod != NULL;
+}
+
+/*
+ * Try to attach a new offload device to an existing TCP offload module that
+ * can handle the device's offload id. Returns 0 if it succeeds.
+ *
+ * Must be called with the offload_db_lock held.
+ */
+static int
+offload_attach(struct toedev *dev)
+{
+ struct tom_info *t;
+
+ TAILQ_FOREACH(t, &offload_module_list, entry) {
+ const struct offload_id *entry;
+
+ entry = id_find(dev->tod_ttid, t->ti_id_table);
+ if (entry && t->ti_attach(dev, entry) == 0) {
+ dev->tod_offload_mod = t;
+ return 0;
+ }
+ }
+ return (ENOPROTOOPT);
+}
+
+/**
+ * register_tom - register a TCP Offload Module (TOM)
+ * @t: the offload module to register
+ *
+ * Register a TCP Offload Module (TOM).
+ */
+int
+register_tom(struct tom_info *t)
+{
+ mtx_lock(&offload_db_lock);
+ TAILQ_INSERT_HEAD(&offload_module_list, t, entry);
+ mtx_unlock(&offload_db_lock);
+ return 0;
+}
+
+/**
+ * unregister_tom - unregister a TCP Offload Module (TOM)
+ * @t: the offload module to register
+ *
+ * Unregister a TCP Offload Module (TOM). Note that this does not affect any
+ * TOE devices to which the TOM is already attached.
+ */
+int
+unregister_tom(struct tom_info *t)
+{
+ mtx_lock(&offload_db_lock);
+ TAILQ_REMOVE(&offload_module_list, t, entry);
+ mtx_unlock(&offload_db_lock);
+ return 0;
+}
+
+/*
+ * Find an offload device by name. Must be called with offload_db_lock held.
+ */
+static struct toedev *
+__find_offload_dev_by_name(const char *name)
+{
+ struct toedev *dev;
+
+ TAILQ_FOREACH(dev, &offload_dev_list, entry) {
+ if (!strncmp(dev->tod_name, name, TOENAMSIZ))
+ return dev;
+ }
+ return NULL;
+}
+
+/*
+ * Returns true if an offload device is already registered.
+ * Must be called with the offload_db_lock held.
+ */
+static int
+is_registered(const struct toedev *dev)
+{
+ struct toedev *d;
+
+ TAILQ_FOREACH(d, &offload_dev_list, entry) {
+ if (d == dev)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Finalize the name of an offload device by assigning values to any format
+ * strings in its name.
+ */
+static int
+assign_name(struct toedev *dev, const char *name, int limit)
+{
+ int i;
+
+ for (i = 0; i < limit; ++i) {
+ char s[TOENAMSIZ];
+
+ if (snprintf(s, sizeof(s), name, i) >= sizeof(s))
+ return -1; /* name too long */
+ if (!__find_offload_dev_by_name(s)) {
+ strcpy(dev->tod_name, s);
+ return 0;
+ }
+ }
+ return -1;
+}
+
+/**
+ * register_toedev - register a TOE device
+ * @dev: the device
+ * @name: a name template for the device
+ *
+ * Register a TOE device and try to attach an appropriate TCP offload module
+ * to it. @name is a template that may contain at most one %d format
+ * specifier.
+ */
+int
+register_toedev(struct toedev *dev, const char *name)
+{
+ int ret;
+ const char *p;
+
+ /*
+ * Validate the name template. Only one %d allowed and name must be
+ * a valid filename so it can appear in sysfs.
+ */
+ if (!name || !*name || !strcmp(name, ".") || !strcmp(name, "..") ||
+ strchr(name, '/'))
+ return EINVAL;
+
+ p = strchr(name, '%');
+ if (p && (p[1] != 'd' || strchr(p + 2, '%')))
+ return EINVAL;
+
+ mtx_lock(&offload_db_lock);
+ if (is_registered(dev)) { /* device already registered */
+ ret = EEXIST;
+ goto out;
+ }
+
+ if ((ret = assign_name(dev, name, 32)) != 0)
+ goto out;
+
+ dev->tod_offload_mod = NULL;
+ TAILQ_INSERT_TAIL(&offload_dev_list, dev, entry);
+out:
+ mtx_unlock(&offload_db_lock);
+ return ret;
+}
+
+/**
+ * unregister_toedev - unregister a TOE device
+ * @dev: the device
+ *
+ * Unregister a TOE device. The device must not be attached to an offload
+ * module.
+ */
+int
+unregister_toedev(struct toedev *dev)
+{
+ int ret = 0;
+
+ mtx_lock(&offload_db_lock);
+ if (!is_registered(dev)) {
+ ret = ENODEV;
+ goto out;
+ }
+ if (is_attached(dev)) {
+ ret = EBUSY;
+ goto out;
+ }
+ TAILQ_REMOVE(&offload_dev_list, dev, entry);
+out:
+ mtx_unlock(&offload_db_lock);
+ return ret;
+}
+
+/**
+ * activate_offload - activate an offload device
+ * @dev: the device
+ *
+ * Activate an offload device by locating an appropriate registered offload
+ * module. If no module is found the operation fails and may be retried at
+ * a later time.
+ */
+int
+activate_offload(struct toedev *dev)
+{
+ int ret = 0;
+
+ mtx_lock(&offload_db_lock);
+ if (!is_registered(dev))
+ ret = ENODEV;
+ else if (!is_attached(dev))
+ ret = offload_attach(dev);
+ mtx_unlock(&offload_db_lock);
+ return ret;
+}
+
+/**
+ * toe_send - send a packet to a TOE device
+ * @dev: the device
+ * @m: the packet
+ *
+ * Sends an mbuf to a TOE driver after dealing with any active network taps.
+ */
+int
+toe_send(struct toedev *dev, struct mbuf *m)
+{
+ int r;
+
+ critical_enter(); /* XXX neccessary? */
+ r = dev->tod_send(dev, m);
+ critical_exit();
+ if (r)
+ BPF_MTAP(dev->tod_lldev, m);
+ return r;
+}
+
+/**
+ * toe_receive_mbuf - process n received TOE packets
+ * @dev: the toe device
+ * @m: an array of offload packets
+ * @n: the number of offload packets
+ *
+ * Process an array of ingress offload packets. Each packet is forwarded
+ * to any active network taps and then passed to the toe device's receive
+ * method. We optimize passing packets to the receive method by passing
+ * it the whole array at once except when there are active taps.
+ */
+int
+toe_receive_mbuf(struct toedev *dev, struct mbuf **m, int n)
+{
+ if (__predict_true(!bpf_peers_present(dev->tod_lldev->if_bpf)))
+ return dev->tod_recv(dev, m, n);
+
+ for ( ; n; n--, m++) {
+ m[0]->m_pkthdr.rcvif = dev->tod_lldev;
+ BPF_MTAP(dev->tod_lldev, m[0]);
+ dev->tod_recv(dev, m, 1);
+ }
+ return 0;
+}
+
+static inline int
+ifnet_is_offload(const struct ifnet *ifp)
+{
+ return (ifp->if_flags & IFCAP_TOE);
+}
+
+void
+toe_arp_update(struct rtentry *rt)
+{
+ struct ifnet *ifp = rt->rt_ifp;
+
+ if (ifp && ifnet_is_offload(ifp)) {
+ struct toedev *tdev = TOEDEV(ifp);
+
+ if (tdev && tdev->tod_arp_update)
+ tdev->tod_arp_update(tdev, rt);
+ }
+}
+
+/**
+ * offload_get_phys_egress - find the physical egress device
+ * @root_dev: the root device anchoring the search
+ * @so: the socket used to determine egress port in bonding mode
+ * @context: in bonding mode, indicates a connection set up or failover
+ *
+ * Given a root network device it returns the physical egress device that is a
+ * descendant of the root device. The root device may be either a physical
+ * device, in which case it is the device returned, or a virtual device, such
+ * as a VLAN or bonding device. In case of a bonding device the search
+ * considers the decisions of the bonding device given its mode to locate the
+ * correct egress device.
+ */
+struct ifnet *
+offload_get_phys_egress(struct ifnet *root_dev, struct socket *so, int context)
+{
+
+#if 0
+ while (root_dev && ifnet_is_offload(root_dev)) {
+ if (root_dev->tod_priv_flags & IFF_802_1Q_VLAN)
+ root_dev = VLAN_DEV_INFO(root_dev)->real_dev;
+ else if (root_dev->tod_flags & IFF_MASTER)
+ root_dev = toe_bond_get_slave(root_dev, sk, context);
+ else
+ break;
+ }
+#endif
+ return root_dev;
+}
+
+static int
+toecore_load(module_t mod, int cmd, void *arg)
+{
+ int err = 0;
+
+ switch (cmd) {
+ case MOD_LOAD:
+ mtx_init(&offload_db_lock, "toedev lock", NULL, MTX_DEF);
+ TAILQ_INIT(&offload_dev_list);
+ TAILQ_INIT(&offload_module_list);
+ break;
+ case MOD_QUIESCE:
+ break;
+ case MOD_UNLOAD:
+ mtx_lock(&offload_db_lock);
+ if (!TAILQ_EMPTY(&offload_dev_list) ||
+ !TAILQ_EMPTY(&offload_module_list)) {
+ err = EBUSY;
+ mtx_unlock(&offload_db_lock);
+ break;
+ }
+ mtx_unlock(&offload_db_lock);
+ mtx_destroy(&offload_db_lock);
+ break;
+ case MOD_SHUTDOWN:
+ break;
+ default:
+ err = EOPNOTSUPP;
+ break;
+ }
+
+ return (err);
+}
+
+
+static moduledata_t mod_data= {
+ "toecore",
+ toecore_load,
+ 0
+};
+
+MODULE_VERSION(toecore, 1);
+DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c b/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
new file mode 100644
index 0000000000000..00b45750e752b
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
@@ -0,0 +1,4456 @@
+/**************************************************************************
+
+Copyright (c) 2007-2008, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/fcntl.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/protosw.h>
+#include <sys/priv.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+
+
+#include <dev/cxgb/cxgb_osdep.h>
+#include <dev/cxgb/sys/mbufq.h>
+
+#include <netinet/ip.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_offload.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_syncache.h>
+#include <netinet/tcp_timer.h>
+#include <net/route.h>
+
+#include <dev/cxgb/t3cdev.h>
+#include <dev/cxgb/common/cxgb_firmware_exports.h>
+#include <dev/cxgb/common/cxgb_t3_cpl.h>
+#include <dev/cxgb/common/cxgb_tcb.h>
+#include <dev/cxgb/common/cxgb_ctl_defs.h>
+#include <dev/cxgb/cxgb_offload.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/bus.h>
+#include <dev/cxgb/sys/mvec.h>
+#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
+#include <dev/cxgb/ulp/tom/cxgb_defs.h>
+#include <dev/cxgb/ulp/tom/cxgb_tom.h>
+#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
+#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
+#include <dev/cxgb/ulp/tom/cxgb_tcp.h>
+
+#include <dev/cxgb/ulp/tom/cxgb_tcp_offload.h>
+
+/*
+ * For ULP connections HW may add headers, e.g., for digests, that aren't part
+ * of the messages sent by the host but that are part of the TCP payload and
+ * therefore consume TCP sequence space. Tx connection parameters that
+ * operate in TCP sequence space are affected by the HW additions and need to
+ * compensate for them to accurately track TCP sequence numbers. This array
+ * contains the compensating extra lengths for ULP packets. It is indexed by
+ * a packet's ULP submode.
+ */
+const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8};
+
+#ifdef notyet
+/*
+ * This sk_buff holds a fake header-only TCP segment that we use whenever we
+ * need to exploit SW TCP functionality that expects TCP headers, such as
+ * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple
+ * CPUs without locking.
+ */
+static struct mbuf *tcphdr_mbuf __read_mostly;
+#endif
+
+/*
+ * Size of WRs in bytes. Note that we assume all devices we are handling have
+ * the same WR size.
+ */
+static unsigned int wrlen __read_mostly;
+
+/*
+ * The number of WRs needed for an skb depends on the number of page fragments
+ * in the skb and whether it has any payload in its main body. This maps the
+ * length of the gather list represented by an skb into the # of necessary WRs.
+ */
+static unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly;
+
+/*
+ * Max receive window supported by HW in bytes. Only a small part of it can
+ * be set through option0, the rest needs to be set through RX_DATA_ACK.
+ */
+#define MAX_RCV_WND ((1U << 27) - 1)
+
+/*
+ * Min receive window. We want it to be large enough to accommodate receive
+ * coalescing, handle jumbo frames, and not trigger sender SWS avoidance.
+ */
+#define MIN_RCV_WND (24 * 1024U)
+#define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS)
+
+#define VALIDATE_SEQ 0
+#define VALIDATE_SOCK(so)
+#define DEBUG_WR 0
+
+#define TCP_TIMEWAIT 1
+#define TCP_CLOSE 2
+#define TCP_DROP 3
+
+extern int tcp_do_autorcvbuf;
+extern int tcp_do_autosndbuf;
+extern int tcp_autorcvbuf_max;
+extern int tcp_autosndbuf_max;
+
+static void t3_send_reset(struct toepcb *toep);
+static void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status);
+static inline void free_atid(struct t3cdev *cdev, unsigned int tid);
+static void handle_syncache_event(int event, void *arg);
+
+static inline void
+SBAPPEND(struct sockbuf *sb, struct mbuf *n)
+{
+ struct mbuf *m;
+
+ m = sb->sb_mb;
+ while (m) {
+ KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) ||
+ !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n",
+ !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
+ KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
+ m->m_next, m->m_nextpkt, m->m_flags));
+ m = m->m_next;
+ }
+ m = n;
+ while (m) {
+ KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) ||
+ !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n",
+ !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
+ KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
+ m->m_next, m->m_nextpkt, m->m_flags));
+ m = m->m_next;
+ }
+ KASSERT(sb->sb_flags & SB_NOCOALESCE, ("NOCOALESCE not set"));
+ sbappendstream_locked(sb, n);
+ m = sb->sb_mb;
+
+ while (m) {
+ KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x",
+ m->m_next, m->m_nextpkt, m->m_flags));
+ m = m->m_next;
+ }
+}
+
+static inline int
+is_t3a(const struct toedev *dev)
+{
+ return (dev->tod_ttid == TOE_ID_CHELSIO_T3);
+}
+
+static void
+dump_toepcb(struct toepcb *toep)
+{
+ DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n",
+ toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode,
+ toep->tp_mtu_idx, toep->tp_tid);
+
+ DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n",
+ toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked,
+ toep->tp_mss_clamp, toep->tp_flags);
+}
+
+#ifndef RTALLOC2_DEFINED
+static struct rtentry *
+rtalloc2(struct sockaddr *dst, int report, u_long ignflags)
+{
+ struct rtentry *rt = NULL;
+
+ if ((rt = rtalloc1(dst, report, ignflags)) != NULL)
+ RT_UNLOCK(rt);
+
+ return (rt);
+}
+#endif
+
+/*
+ * Determine whether to send a CPL message now or defer it. A message is
+ * deferred if the connection is in SYN_SENT since we don't know the TID yet.
+ * For connections in other states the message is sent immediately.
+ * If through_l2t is set the message is subject to ARP processing, otherwise
+ * it is sent directly.
+ */
+static inline void
+send_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t)
+{
+ struct tcpcb *tp = toep->tp_tp;
+
+ if (__predict_false(tp->t_state == TCPS_SYN_SENT)) {
+ inp_wlock(tp->t_inpcb);
+ mbufq_tail(&toep->out_of_order_queue, m); // defer
+ inp_wunlock(tp->t_inpcb);
+ } else if (through_l2t)
+ l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); // send through L2T
+ else
+ cxgb_ofld_send(TOEP_T3C_DEV(toep), m); // send directly
+}
+
+static inline unsigned int
+mkprio(unsigned int cntrl, const struct toepcb *toep)
+{
+ return (cntrl);
+}
+
+/*
+ * Populate a TID_RELEASE WR. The skb must be already propely sized.
+ */
+static inline void
+mk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid)
+{
+ struct cpl_tid_release *req;
+
+ m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep));
+ m->m_pkthdr.len = m->m_len = sizeof(*req);
+ req = mtod(m, struct cpl_tid_release *);
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ req->wr.wr_lo = 0;
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
+}
+
+static inline void
+make_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail)
+{
+ struct tcpcb *tp = so_sototcpcb(so);
+ struct toepcb *toep = tp->t_toe;
+ struct tx_data_wr *req;
+ struct sockbuf *snd;
+
+ inp_lock_assert(tp->t_inpcb);
+ snd = so_sockbuf_snd(so);
+
+ req = mtod(m, struct tx_data_wr *);
+ m->m_len = sizeof(*req);
+ req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+ req->wr_lo = htonl(V_WR_TID(toep->tp_tid));
+ /* len includes the length of any HW ULP additions */
+ req->len = htonl(len);
+ req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx));
+ /* V_TX_ULP_SUBMODE sets both the mode and submode */
+ req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) |
+ V_TX_URG(/* skb_urgent(skb) */ 0 ) |
+ V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) &&
+ (tail ? 0 : 1))));
+ req->sndseq = htonl(tp->snd_nxt);
+ if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) {
+ req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
+ V_TX_CPU_IDX(toep->tp_qset));
+
+ /* Sendbuffer is in units of 32KB.
+ */
+ if (tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE)
+ req->param |= htonl(V_TX_SNDBUF(tcp_autosndbuf_max >> 15));
+ else {
+ req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15));
+ }
+
+ toep->tp_flags |= TP_DATASENT;
+ }
+}
+
+#define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */
+
+int
+t3_push_frames(struct socket *so, int req_completion)
+{
+ struct tcpcb *tp = so_sototcpcb(so);
+ struct toepcb *toep = tp->t_toe;
+
+ struct mbuf *tail, *m0, *last;
+ struct t3cdev *cdev;
+ struct tom_data *d;
+ int state, bytes, count, total_bytes;
+ bus_dma_segment_t segs[TX_MAX_SEGS], *segp;
+ struct sockbuf *snd;
+
+ if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) {
+ DPRINTF("tcp state=%d\n", tp->t_state);
+ return (0);
+ }
+
+ state = so_state_get(so);
+
+ if (state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) {
+ DPRINTF("disconnecting\n");
+
+ return (0);
+ }
+
+ inp_lock_assert(tp->t_inpcb);
+
+ snd = so_sockbuf_snd(so);
+ sockbuf_lock(snd);
+
+ d = TOM_DATA(toep->tp_toedev);
+ cdev = d->cdev;
+
+ last = tail = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb;
+
+ total_bytes = 0;
+ DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n",
+ toep->tp_wr_avail, tail, snd->sb_cc, toep->tp_m_last);
+
+ if (last && toep->tp_m_last == last && snd->sb_sndptroff != 0) {
+ KASSERT(tail, ("sbdrop error"));
+ last = tail = tail->m_next;
+ }
+
+ if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) {
+ DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail);
+ sockbuf_unlock(snd);
+
+ return (0);
+ }
+
+ toep->tp_m_last = NULL;
+ while (toep->tp_wr_avail && (tail != NULL)) {
+ count = bytes = 0;
+ segp = segs;
+ if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) {
+ sockbuf_unlock(snd);
+ return (0);
+ }
+ /*
+ * If the data in tail fits as in-line, then
+ * make an immediate data wr.
+ */
+ if (tail->m_len <= IMM_LEN) {
+ count = 1;
+ bytes = tail->m_len;
+ last = tail;
+ tail = tail->m_next;
+ m_set_sgl(m0, NULL);
+ m_set_sgllen(m0, 0);
+ make_tx_data_wr(so, m0, bytes, tail);
+ m_append(m0, bytes, mtod(last, caddr_t));
+ KASSERT(!m0->m_next, ("bad append"));
+ } else {
+ while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail)
+ && (tail != NULL) && (count < TX_MAX_SEGS-1)) {
+ bytes += tail->m_len;
+ last = tail;
+ count++;
+ /*
+ * technically an abuse to be using this for a VA
+ * but less gross than defining my own structure
+ * or calling pmap_kextract from here :-|
+ */
+ segp->ds_addr = (bus_addr_t)tail->m_data;
+ segp->ds_len = tail->m_len;
+ DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n",
+ count, mbuf_wrs[count], tail->m_data, tail->m_len);
+ segp++;
+ tail = tail->m_next;
+ }
+ DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n",
+ toep->tp_wr_avail, count, mbuf_wrs[count], tail);
+
+ m_set_sgl(m0, segs);
+ m_set_sgllen(m0, count);
+ make_tx_data_wr(so, m0, bytes, tail);
+ }
+ m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep));
+
+ if (tail) {
+ snd->sb_sndptr = tail;
+ toep->tp_m_last = NULL;
+ } else
+ toep->tp_m_last = snd->sb_sndptr = last;
+
+
+ DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last);
+
+ snd->sb_sndptroff += bytes;
+ total_bytes += bytes;
+ toep->tp_write_seq += bytes;
+ CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d"
+ " tail=%p sndptr=%p sndptroff=%d",
+ toep->tp_wr_avail, count, mbuf_wrs[count],
+ tail, snd->sb_sndptr, snd->sb_sndptroff);
+ if (tail)
+ CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d"
+ " tp_m_last=%p tailbuf=%p snd_una=0x%08x",
+ total_bytes, toep->tp_m_last, tail->m_data,
+ tp->snd_una);
+ else
+ CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d"
+ " tp_m_last=%p snd_una=0x%08x",
+ total_bytes, toep->tp_m_last, tp->snd_una);
+
+
+#ifdef KTR
+{
+ int i;
+
+ i = 0;
+ while (i < count && m_get_sgllen(m0)) {
+ if ((count - i) >= 3) {
+ CTR6(KTR_TOM,
+ "t3_push_frames: pa=0x%zx len=%d pa=0x%zx"
+ " len=%d pa=0x%zx len=%d",
+ segs[i].ds_addr, segs[i].ds_len,
+ segs[i + 1].ds_addr, segs[i + 1].ds_len,
+ segs[i + 2].ds_addr, segs[i + 2].ds_len);
+ i += 3;
+ } else if ((count - i) == 2) {
+ CTR4(KTR_TOM,
+ "t3_push_frames: pa=0x%zx len=%d pa=0x%zx"
+ " len=%d",
+ segs[i].ds_addr, segs[i].ds_len,
+ segs[i + 1].ds_addr, segs[i + 1].ds_len);
+ i += 2;
+ } else {
+ CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d",
+ segs[i].ds_addr, segs[i].ds_len);
+ i++;
+ }
+
+ }
+}
+#endif
+ /*
+ * remember credits used
+ */
+ m0->m_pkthdr.csum_data = mbuf_wrs[count];
+ m0->m_pkthdr.len = bytes;
+ toep->tp_wr_avail -= mbuf_wrs[count];
+ toep->tp_wr_unacked += mbuf_wrs[count];
+
+ if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) ||
+ toep->tp_wr_unacked >= toep->tp_wr_max / 2) {
+ struct work_request_hdr *wr = cplhdr(m0);
+
+ wr->wr_hi |= htonl(F_WR_COMPL);
+ toep->tp_wr_unacked = 0;
+ }
+ KASSERT((m0->m_pkthdr.csum_data > 0) &&
+ (m0->m_pkthdr.csum_data <= 4), ("bad credit count %d",
+ m0->m_pkthdr.csum_data));
+ m0->m_type = MT_DONTFREE;
+ enqueue_wr(toep, m0);
+ DPRINTF("sending offload tx with %d bytes in %d segments\n",
+ bytes, count);
+ l2t_send(cdev, m0, toep->tp_l2t);
+ }
+ sockbuf_unlock(snd);
+ return (total_bytes);
+}
+
+/*
+ * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail
+ * under any circumstances. We take the easy way out and always queue the
+ * message to the write_queue. We can optimize the case where the queue is
+ * already empty though the optimization is probably not worth it.
+ */
+static void
+close_conn(struct socket *so)
+{
+ struct mbuf *m;
+ struct cpl_close_con_req *req;
+ struct tom_data *d;
+ struct inpcb *inp = so_sotoinpcb(so);
+ struct tcpcb *tp;
+ struct toepcb *toep;
+ unsigned int tid;
+
+
+ inp_wlock(inp);
+ tp = so_sototcpcb(so);
+ toep = tp->t_toe;
+
+ if (tp->t_state != TCPS_SYN_SENT)
+ t3_push_frames(so, 1);
+
+ if (toep->tp_flags & TP_FIN_SENT) {
+ inp_wunlock(inp);
+ return;
+ }
+
+ tid = toep->tp_tid;
+
+ d = TOM_DATA(toep->tp_toedev);
+
+ m = m_gethdr_nofail(sizeof(*req));
+ m_set_priority(m, CPL_PRIORITY_DATA);
+ m_set_sgl(m, NULL);
+ m_set_sgllen(m, 0);
+
+ toep->tp_flags |= TP_FIN_SENT;
+ req = mtod(m, struct cpl_close_con_req *);
+
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
+ req->wr.wr_lo = htonl(V_WR_TID(tid));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
+ req->rsvd = 0;
+ inp_wunlock(inp);
+ /*
+ * XXX - need to defer shutdown while there is still data in the queue
+ *
+ */
+ CTR4(KTR_TOM, "%s CLOSE_CON_REQ so %p tp %p tid=%u", __FUNCTION__, so, tp, tid);
+ cxgb_ofld_send(d->cdev, m);
+
+}
+
+/*
+ * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
+ * and send it along.
+ */
+static void
+abort_arp_failure(struct t3cdev *cdev, struct mbuf *m)
+{
+ struct cpl_abort_req *req = cplhdr(m);
+
+ req->cmd = CPL_ABORT_NO_RST;
+ cxgb_ofld_send(cdev, m);
+}
+
+/*
+ * Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are
+ * permitted to return without sending the message in case we cannot allocate
+ * an sk_buff. Returns the number of credits sent.
+ */
+uint32_t
+t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail)
+{
+ struct mbuf *m;
+ struct cpl_rx_data_ack *req;
+ struct toepcb *toep = tp->t_toe;
+ struct toedev *tdev = toep->tp_toedev;
+
+ m = m_gethdr_nofail(sizeof(*req));
+
+ DPRINTF("returning %u credits to HW\n", credits);
+
+ req = mtod(m, struct cpl_rx_data_ack *);
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ req->wr.wr_lo = 0;
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
+ req->credit_dack = htonl(dack | V_RX_CREDITS(credits));
+ m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep));
+ cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
+ return (credits);
+}
+
+/*
+ * Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled.
+ * This is only used in DDP mode, so we take the opportunity to also set the
+ * DACK mode and flush any Rx credits.
+ */
+void
+t3_send_rx_modulate(struct toepcb *toep)
+{
+ struct mbuf *m;
+ struct cpl_rx_data_ack *req;
+
+ m = m_gethdr_nofail(sizeof(*req));
+
+ req = mtod(m, struct cpl_rx_data_ack *);
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ req->wr.wr_lo = 0;
+ m->m_pkthdr.len = m->m_len = sizeof(*req);
+
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
+ req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
+ V_RX_DACK_MODE(1) |
+ V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup));
+ m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
+ cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
+ toep->tp_rcv_wup = toep->tp_copied_seq;
+}
+
+/*
+ * Handle receipt of an urgent pointer.
+ */
+static void
+handle_urg_ptr(struct socket *so, uint32_t urg_seq)
+{
+#ifdef URGENT_DATA_SUPPORTED
+ struct tcpcb *tp = so_sototcpcb(so);
+
+ urg_seq--; /* initially points past the urgent data, per BSD */
+
+ if (tp->urg_data && !after(urg_seq, tp->urg_seq))
+ return; /* duplicate pointer */
+ sk_send_sigurg(sk);
+ if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
+ !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
+ struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+
+ tp->copied_seq++;
+ if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len)
+ tom_eat_skb(sk, skb, 0);
+ }
+ tp->urg_data = TCP_URG_NOTYET;
+ tp->urg_seq = urg_seq;
+#endif
+}
+
+/*
+ * Returns true if a socket cannot accept new Rx data.
+ */
+static inline int
+so_no_receive(const struct socket *so)
+{
+ return (so_state_get(so) & (SS_ISDISCONNECTED|SS_ISDISCONNECTING));
+}
+
+/*
+ * Process an urgent data notification.
+ */
+static void
+rx_urg_notify(struct toepcb *toep, struct mbuf *m)
+{
+ struct cpl_rx_urg_notify *hdr = cplhdr(m);
+ struct socket *so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
+
+ VALIDATE_SOCK(so);
+
+ if (!so_no_receive(so))
+ handle_urg_ptr(so, ntohl(hdr->seq));
+
+ m_freem(m);
+}
+
+/*
+ * Handler for RX_URG_NOTIFY CPL messages.
+ */
+static int
+do_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+{
+ struct toepcb *toep = (struct toepcb *)ctx;
+
+ rx_urg_notify(toep, m);
+ return (0);
+}
+
+static __inline int
+is_delack_mode_valid(struct toedev *dev, struct toepcb *toep)
+{
+ return (toep->tp_ulp_mode ||
+ (toep->tp_ulp_mode == ULP_MODE_TCPDDP &&
+ dev->tod_ttid >= TOE_ID_CHELSIO_T3));
+}
+
+/*
+ * Set of states for which we should return RX credits.
+ */
+#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2)
+
+/*
+ * Called after some received data has been read. It returns RX credits
+ * to the HW for the amount of data processed.
+ */
+void
+t3_cleanup_rbuf(struct tcpcb *tp, int copied)
+{
+ struct toepcb *toep = tp->t_toe;
+ struct socket *so;
+ struct toedev *dev;
+ int dack_mode, must_send, read;
+ u32 thres, credits, dack = 0;
+ struct sockbuf *rcv;
+
+ so = inp_inpcbtosocket(tp->t_inpcb);
+ rcv = so_sockbuf_rcv(so);
+
+ if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) ||
+ (tp->t_state == TCPS_FIN_WAIT_2))) {
+ if (copied) {
+ sockbuf_lock(rcv);
+ toep->tp_copied_seq += copied;
+ sockbuf_unlock(rcv);
+ }
+
+ return;
+ }
+
+ inp_lock_assert(tp->t_inpcb);
+
+ sockbuf_lock(rcv);
+ if (copied)
+ toep->tp_copied_seq += copied;
+ else {
+ read = toep->tp_enqueued_bytes - rcv->sb_cc;
+ toep->tp_copied_seq += read;
+ }
+ credits = toep->tp_copied_seq - toep->tp_rcv_wup;
+ toep->tp_enqueued_bytes = rcv->sb_cc;
+ sockbuf_unlock(rcv);
+
+ if (credits > rcv->sb_mbmax) {
+ log(LOG_ERR, "copied_seq=%u rcv_wup=%u credits=%u\n",
+ toep->tp_copied_seq, toep->tp_rcv_wup, credits);
+ credits = rcv->sb_mbmax;
+ }
+
+
+ /*
+ * XXX this won't accurately reflect credit return - we need
+ * to look at the difference between the amount that has been
+ * put in the recv sockbuf and what is there now
+ */
+
+ if (__predict_false(!credits))
+ return;
+
+ dev = toep->tp_toedev;
+ thres = TOM_TUNABLE(dev, rx_credit_thres);
+
+ if (__predict_false(thres == 0))
+ return;
+
+ if (is_delack_mode_valid(dev, toep)) {
+ dack_mode = TOM_TUNABLE(dev, delack);
+ if (__predict_false(dack_mode != toep->tp_delack_mode)) {
+ u32 r = tp->rcv_nxt - toep->tp_delack_seq;
+
+ if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp)
+ dack = F_RX_DACK_CHANGE |
+ V_RX_DACK_MODE(dack_mode);
+ }
+ } else
+ dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
+
+ /*
+ * For coalescing to work effectively ensure the receive window has
+ * at least 16KB left.
+ */
+ must_send = credits + 16384 >= tp->rcv_wnd;
+
+ if (must_send || credits >= thres)
+ toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send);
+}
+
+static int
+cxgb_toe_disconnect(struct tcpcb *tp)
+{
+ struct socket *so;
+
+ DPRINTF("cxgb_toe_disconnect\n");
+
+ so = inp_inpcbtosocket(tp->t_inpcb);
+ close_conn(so);
+ return (0);
+}
+
+static int
+cxgb_toe_reset(struct tcpcb *tp)
+{
+ struct toepcb *toep = tp->t_toe;
+
+ t3_send_reset(toep);
+
+ /*
+ * unhook from socket
+ */
+ tp->t_flags &= ~TF_TOE;
+ toep->tp_tp = NULL;
+ tp->t_toe = NULL;
+ return (0);
+}
+
+static int
+cxgb_toe_send(struct tcpcb *tp)
+{
+ struct socket *so;
+
+ DPRINTF("cxgb_toe_send\n");
+ dump_toepcb(tp->t_toe);
+
+ so = inp_inpcbtosocket(tp->t_inpcb);
+ t3_push_frames(so, 1);
+ return (0);
+}
+
+static int
+cxgb_toe_rcvd(struct tcpcb *tp)
+{
+
+ inp_lock_assert(tp->t_inpcb);
+
+ t3_cleanup_rbuf(tp, 0);
+
+ return (0);
+}
+
+static void
+cxgb_toe_detach(struct tcpcb *tp)
+{
+ struct toepcb *toep;
+
+ /*
+ * XXX how do we handle teardown in the SYN_SENT state?
+ *
+ */
+ inp_lock_assert(tp->t_inpcb);
+ toep = tp->t_toe;
+ toep->tp_tp = NULL;
+
+ /*
+ * unhook from socket
+ */
+ tp->t_flags &= ~TF_TOE;
+ tp->t_toe = NULL;
+}
+
+
+static struct toe_usrreqs cxgb_toe_usrreqs = {
+ .tu_disconnect = cxgb_toe_disconnect,
+ .tu_reset = cxgb_toe_reset,
+ .tu_send = cxgb_toe_send,
+ .tu_rcvd = cxgb_toe_rcvd,
+ .tu_detach = cxgb_toe_detach,
+ .tu_detach = cxgb_toe_detach,
+ .tu_syncache_event = handle_syncache_event,
+};
+
+
+static void
+__set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word,
+ uint64_t mask, uint64_t val, int no_reply)
+{
+ struct cpl_set_tcb_field *req;
+
+ CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx",
+ toep->tp_tid, word, mask, val);
+
+ req = mtod(m, struct cpl_set_tcb_field *);
+ m->m_pkthdr.len = m->m_len = sizeof(*req);
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ req->wr.wr_lo = 0;
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid));
+ req->reply = V_NO_REPLY(no_reply);
+ req->cpu_idx = 0;
+ req->word = htons(word);
+ req->mask = htobe64(mask);
+ req->val = htobe64(val);
+
+ m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
+ send_or_defer(toep, m, 0);
+}
+
+static void
+t3_set_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val)
+{
+ struct mbuf *m;
+ struct tcpcb *tp = toep->tp_tp;
+
+ if (toep == NULL)
+ return;
+
+ if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) {
+ printf("not seting field\n");
+ return;
+ }
+
+ m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field));
+
+ __set_tcb_field(toep, m, word, mask, val, 1);
+}
+
+/*
+ * Set one of the t_flags bits in the TCB.
+ */
+static void
+set_tcb_tflag(struct toepcb *toep, unsigned int bit_pos, int val)
+{
+
+ t3_set_tcb_field(toep, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos);
+}
+
+/*
+ * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting.
+ */
+static void
+t3_set_nagle(struct toepcb *toep)
+{
+ struct tcpcb *tp = toep->tp_tp;
+
+ set_tcb_tflag(toep, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY));
+}
+
+/*
+ * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting.
+ */
+void
+t3_set_keepalive(struct toepcb *toep, int on_off)
+{
+
+ set_tcb_tflag(toep, S_TF_KEEPALIVE, on_off);
+}
+
+void
+t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off)
+{
+ set_tcb_tflag(toep, S_TF_RCV_COALESCE_ENABLE, on_off);
+}
+
+void
+t3_set_dack_mss(struct toepcb *toep, int on_off)
+{
+
+ set_tcb_tflag(toep, S_TF_DACK_MSS, on_off);
+}
+
+/*
+ * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting.
+ */
+static void
+t3_set_tos(struct toepcb *toep)
+{
+ int tos = inp_ip_tos_get(toep->tp_tp->t_inpcb);
+
+ t3_set_tcb_field(toep, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS),
+ V_TCB_TOS(tos));
+}
+
+
+/*
+ * In DDP mode, TP fails to schedule a timer to push RX data to the host when
+ * DDP is disabled (data is delivered to freelist). [Note that, the peer should
+ * set the PSH bit in the last segment, which would trigger delivery.]
+ * We work around the issue by setting a DDP buffer in a partial placed state,
+ * which guarantees that TP will schedule a timer.
+ */
+#define TP_DDP_TIMER_WORKAROUND_MASK\
+ (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\
+ ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\
+ V_TCB_RX_DDP_BUF0_LEN(3)) << 32))
+#define TP_DDP_TIMER_WORKAROUND_VAL\
+ (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\
+ ((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\
+ 32))
+
+static void
+t3_enable_ddp(struct toepcb *toep, int on)
+{
+ if (on) {
+
+ t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1),
+ V_TF_DDP_OFF(0));
+ } else
+ t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS,
+ V_TF_DDP_OFF(1) |
+ TP_DDP_TIMER_WORKAROUND_MASK,
+ V_TF_DDP_OFF(1) |
+ TP_DDP_TIMER_WORKAROUND_VAL);
+
+}
+
+void
+t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag_color)
+{
+ t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_TAG + buf_idx,
+ V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG),
+ tag_color);
+}
+
+void
+t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset,
+ unsigned int len)
+{
+ if (buf_idx == 0)
+ t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_OFFSET,
+ V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
+ V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
+ V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) |
+ V_TCB_RX_DDP_BUF0_LEN((uint64_t)len));
+ else
+ t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF1_OFFSET,
+ V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
+ V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32),
+ V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) |
+ V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32));
+}
+
+static int
+t3_set_cong_control(struct socket *so, const char *name)
+{
+#ifdef CONGESTION_CONTROL_SUPPORTED
+ int cong_algo;
+
+ for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++)
+ if (!strcmp(name, t3_cong_ops[cong_algo].name))
+ break;
+
+ if (cong_algo >= ARRAY_SIZE(t3_cong_ops))
+ return -EINVAL;
+#endif
+ return 0;
+}
+
+int
+t3_get_tcb(struct toepcb *toep)
+{
+ struct cpl_get_tcb *req;
+ struct tcpcb *tp = toep->tp_tp;
+ struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
+
+ if (!m)
+ return (ENOMEM);
+
+ inp_lock_assert(tp->t_inpcb);
+ m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
+ req = mtod(m, struct cpl_get_tcb *);
+ m->m_pkthdr.len = m->m_len = sizeof(*req);
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ req->wr.wr_lo = 0;
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid));
+ req->cpuno = htons(toep->tp_qset);
+ req->rsvd = 0;
+ if (tp->t_state == TCPS_SYN_SENT)
+ mbufq_tail(&toep->out_of_order_queue, m); // defer
+ else
+ cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
+ return 0;
+}
+
+static inline void
+so_insert_tid(struct tom_data *d, struct toepcb *toep, unsigned int tid)
+{
+
+ toepcb_hold(toep);
+
+ cxgb_insert_tid(d->cdev, d->client, toep, tid);
+}
+
+/**
+ * find_best_mtu - find the entry in the MTU table closest to an MTU
+ * @d: TOM state
+ * @mtu: the target MTU
+ *
+ * Returns the index of the value in the MTU table that is closest to but
+ * does not exceed the target MTU.
+ */
+static unsigned int
+find_best_mtu(const struct t3c_data *d, unsigned short mtu)
+{
+ int i = 0;
+
+ while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
+ ++i;
+ return (i);
+}
+
+static unsigned int
+select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu)
+{
+ unsigned int idx;
+
+#ifdef notyet
+ struct rtentry *dst = so_sotoinpcb(so)->inp_route.ro_rt;
+#endif
+ if (tp) {
+ tp->t_maxseg = pmtu - 40;
+ if (tp->t_maxseg < td->mtus[0] - 40)
+ tp->t_maxseg = td->mtus[0] - 40;
+ idx = find_best_mtu(td, tp->t_maxseg + 40);
+
+ tp->t_maxseg = td->mtus[idx] - 40;
+ } else
+ idx = find_best_mtu(td, pmtu);
+
+ return (idx);
+}
+
+static inline void
+free_atid(struct t3cdev *cdev, unsigned int tid)
+{
+ struct toepcb *toep = cxgb_free_atid(cdev, tid);
+
+ if (toep)
+ toepcb_release(toep);
+}
+
+/*
+ * Release resources held by an offload connection (TID, L2T entry, etc.)
+ */
+static void
+t3_release_offload_resources(struct toepcb *toep)
+{
+ struct tcpcb *tp = toep->tp_tp;
+ struct toedev *tdev = toep->tp_toedev;
+ struct t3cdev *cdev;
+ struct socket *so;
+ unsigned int tid = toep->tp_tid;
+ struct sockbuf *rcv;
+
+ CTR0(KTR_TOM, "t3_release_offload_resources");
+
+ if (!tdev)
+ return;
+
+ cdev = TOEP_T3C_DEV(toep);
+ if (!cdev)
+ return;
+
+ toep->tp_qset = 0;
+ t3_release_ddp_resources(toep);
+
+#ifdef CTRL_SKB_CACHE
+ kfree_skb(CTRL_SKB_CACHE(tp));
+ CTRL_SKB_CACHE(tp) = NULL;
+#endif
+
+ if (toep->tp_wr_avail != toep->tp_wr_max) {
+ purge_wr_queue(toep);
+ reset_wr_list(toep);
+ }
+
+ if (toep->tp_l2t) {
+ l2t_release(L2DATA(cdev), toep->tp_l2t);
+ toep->tp_l2t = NULL;
+ }
+ toep->tp_tp = NULL;
+ if (tp) {
+ inp_lock_assert(tp->t_inpcb);
+ so = inp_inpcbtosocket(tp->t_inpcb);
+ rcv = so_sockbuf_rcv(so);
+ /*
+ * cancel any offloaded reads
+ *
+ */
+ sockbuf_lock(rcv);
+ tp->t_toe = NULL;
+ tp->t_flags &= ~TF_TOE;
+ if (toep->tp_ddp_state.user_ddp_pending) {
+ t3_cancel_ubuf(toep, rcv);
+ toep->tp_ddp_state.user_ddp_pending = 0;
+ }
+ so_sorwakeup_locked(so);
+
+ }
+
+ if (toep->tp_state == TCPS_SYN_SENT) {
+ free_atid(cdev, tid);
+#ifdef notyet
+ __skb_queue_purge(&tp->out_of_order_queue);
+#endif
+ } else { // we have TID
+ cxgb_remove_tid(cdev, toep, tid);
+ toepcb_release(toep);
+ }
+#if 0
+ log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state);
+#endif
+}
+
+static void
+install_offload_ops(struct socket *so)
+{
+ struct tcpcb *tp = so_sototcpcb(so);
+
+ KASSERT(tp->t_toe != NULL, ("toepcb not set"));
+
+ t3_install_socket_ops(so);
+ tp->t_flags |= TF_TOE;
+ tp->t_tu = &cxgb_toe_usrreqs;
+}
+
+/*
+ * Determine the receive window scaling factor given a target max
+ * receive window.
+ */
+static __inline int
+select_rcv_wscale(int space)
+{
+ int wscale = 0;
+
+ if (space > MAX_RCV_WND)
+ space = MAX_RCV_WND;
+
+ if (tcp_do_rfc1323)
+ for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ;
+
+ return (wscale);
+}
+
+/*
+ * Determine the receive window size for a socket.
+ */
+static unsigned long
+select_rcv_wnd(struct toedev *dev, struct socket *so)
+{
+ struct tom_data *d = TOM_DATA(dev);
+ unsigned int wnd;
+ unsigned int max_rcv_wnd;
+ struct sockbuf *rcv;
+
+ rcv = so_sockbuf_rcv(so);
+
+ if (tcp_do_autorcvbuf)
+ wnd = tcp_autorcvbuf_max;
+ else
+ wnd = rcv->sb_hiwat;
+
+
+
+ /* XXX
+ * For receive coalescing to work effectively we need a receive window
+ * that can accomodate a coalesced segment.
+ */
+ if (wnd < MIN_RCV_WND)
+ wnd = MIN_RCV_WND;
+
+ /* PR 5138 */
+ max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ?
+ (uint32_t)d->rx_page_size * 23 :
+ MAX_RCV_WND);
+
+ return min(wnd, max_rcv_wnd);
+}
+
+/*
+ * Assign offload parameters to some socket fields. This code is used by
+ * both active and passive opens.
+ */
+static inline void
+init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid,
+ struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep)
+{
+ struct tcpcb *tp = so_sototcpcb(so);
+ struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev);
+ struct sockbuf *snd, *rcv;
+
+#ifdef notyet
+ SOCK_LOCK_ASSERT(so);
+#endif
+
+ snd = so_sockbuf_snd(so);
+ rcv = so_sockbuf_rcv(so);
+
+ log(LOG_INFO, "initializing offload socket\n");
+ /*
+ * We either need to fix push frames to work with sbcompress
+ * or we need to add this
+ */
+ snd->sb_flags |= SB_NOCOALESCE;
+ rcv->sb_flags |= SB_NOCOALESCE;
+
+ tp->t_toe = toep;
+ toep->tp_tp = tp;
+ toep->tp_toedev = dev;
+
+ toep->tp_tid = tid;
+ toep->tp_l2t = e;
+ toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs);
+ toep->tp_wr_unacked = 0;
+ toep->tp_delack_mode = 0;
+
+ toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu);
+ /*
+ * XXX broken
+ *
+ */
+ tp->rcv_wnd = select_rcv_wnd(dev, so);
+
+ toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so_options_get(so) & SO_NO_DDP) &&
+ tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
+ toep->tp_qset_idx = 0;
+
+ reset_wr_list(toep);
+ DPRINTF("initialization done\n");
+}
+
+/*
+ * The next two functions calculate the option 0 value for a socket.
+ */
+static inline unsigned int
+calc_opt0h(struct socket *so, int mtu_idx)
+{
+ struct tcpcb *tp = so_sototcpcb(so);
+ int wscale = select_rcv_wscale(tp->rcv_wnd);
+
+ return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) |
+ V_KEEP_ALIVE((so_options_get(so) & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS |
+ V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx);
+}
+
+static inline unsigned int
+calc_opt0l(struct socket *so, int ulp_mode)
+{
+ struct tcpcb *tp = so_sototcpcb(so);
+ unsigned int val;
+
+ val = V_TOS(INP_TOS(tp->t_inpcb)) | V_ULP_MODE(ulp_mode) |
+ V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ));
+
+ DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", INP_TOS(tp->t_inpcb), tp->rcv_wnd, val);
+ return (val);
+}
+
+static inline unsigned int
+calc_opt2(const struct socket *so, struct toedev *dev)
+{
+ int flv_valid;
+
+ flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1);
+
+ return (V_FLAVORS_VALID(flv_valid) |
+ V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0));
+}
+
+#if DEBUG_WR > 1
+static int
+count_pending_wrs(const struct toepcb *toep)
+{
+ const struct mbuf *m;
+ int n = 0;
+
+ wr_queue_walk(toep, m)
+ n += m->m_pkthdr.csum_data;
+ return (n);
+}
+#endif
+
+#if 0
+(((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1)
+#endif
+
+static void
+mk_act_open_req(struct socket *so, struct mbuf *m,
+ unsigned int atid, const struct l2t_entry *e)
+{
+ struct cpl_act_open_req *req;
+ struct inpcb *inp = so_sotoinpcb(so);
+ struct tcpcb *tp = inp_inpcbtotcpcb(inp);
+ struct toepcb *toep = tp->t_toe;
+ struct toedev *tdev = toep->tp_toedev;
+
+ m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep));
+
+ req = mtod(m, struct cpl_act_open_req *);
+ m->m_pkthdr.len = m->m_len = sizeof(*req);
+
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ req->wr.wr_lo = 0;
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid));
+ inp_4tuple_get(inp, &req->local_ip, &req->local_port, &req->peer_ip, &req->peer_port);
+#if 0
+ req->local_port = inp->inp_lport;
+ req->peer_port = inp->inp_fport;
+ memcpy(&req->local_ip, &inp->inp_laddr, 4);
+ memcpy(&req->peer_ip, &inp->inp_faddr, 4);
+#endif
+ req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) |
+ V_TX_CHANNEL(e->smt_idx));
+ req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode));
+ req->params = 0;
+ req->opt2 = htonl(calc_opt2(so, tdev));
+}
+
+
+/*
+ * Convert an ACT_OPEN_RPL status to an errno.
+ */
+static int
+act_open_rpl_status_to_errno(int status)
+{
+ switch (status) {
+ case CPL_ERR_CONN_RESET:
+ return (ECONNREFUSED);
+ case CPL_ERR_ARP_MISS:
+ return (EHOSTUNREACH);
+ case CPL_ERR_CONN_TIMEDOUT:
+ return (ETIMEDOUT);
+ case CPL_ERR_TCAM_FULL:
+ return (ENOMEM);
+ case CPL_ERR_CONN_EXIST:
+ log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n");
+ return (EADDRINUSE);
+ default:
+ return (EIO);
+ }
+}
+
+static void
+fail_act_open(struct toepcb *toep, int errno)
+{
+ struct tcpcb *tp = toep->tp_tp;
+
+ t3_release_offload_resources(toep);
+ if (tp) {
+ inp_wunlock(tp->t_inpcb);
+ tcp_offload_drop(tp, errno);
+ }
+
+#ifdef notyet
+ TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+#endif
+}
+
+/*
+ * Handle active open failures.
+ */
+static void
+active_open_failed(struct toepcb *toep, struct mbuf *m)
+{
+ struct cpl_act_open_rpl *rpl = cplhdr(m);
+ struct inpcb *inp;
+
+ if (toep->tp_tp == NULL)
+ goto done;
+
+ inp = toep->tp_tp->t_inpcb;
+
+/*
+ * Don't handle connection retry for now
+ */
+#ifdef notyet
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (rpl->status == CPL_ERR_CONN_EXIST &&
+ icsk->icsk_retransmit_timer.function != act_open_retry_timer) {
+ icsk->icsk_retransmit_timer.function = act_open_retry_timer;
+ sk_reset_timer(so, &icsk->icsk_retransmit_timer,
+ jiffies + HZ / 2);
+ } else
+#endif
+ {
+ inp_wlock(inp);
+ /*
+ * drops the inpcb lock
+ */
+ fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status));
+ }
+
+ done:
+ m_free(m);
+}
+
+/*
+ * Return whether a failed active open has allocated a TID
+ */
+static inline int
+act_open_has_tid(int status)
+{
+ return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
+ status != CPL_ERR_ARP_MISS;
+}
+
+/*
+ * Process an ACT_OPEN_RPL CPL message.
+ */
+static int
+do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+{
+ struct toepcb *toep = (struct toepcb *)ctx;
+ struct cpl_act_open_rpl *rpl = cplhdr(m);
+
+ if (cdev->type != T3A && act_open_has_tid(rpl->status))
+ cxgb_queue_tid_release(cdev, GET_TID(rpl));
+
+ active_open_failed(toep, m);
+ return (0);
+}
+
+/*
+ * Handle an ARP failure for an active open. XXX purge ofo queue
+ *
+ * XXX badly broken for crossed SYNs as the ATID is no longer valid.
+ * XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should
+ * check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't
+ * free the atid. Hmm.
+ */
+#ifdef notyet
+static void
+act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m)
+{
+ struct toepcb *toep = m_get_toep(m);
+ struct tcpcb *tp = toep->tp_tp;
+ struct inpcb *inp = tp->t_inpcb;
+ struct socket *so;
+
+ inp_wlock(inp);
+ if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) {
+ /*
+ * drops the inpcb lock
+ */
+ fail_act_open(so, EHOSTUNREACH);
+ printf("freeing %p\n", m);
+
+ m_free(m);
+ } else
+ inp_wunlock(inp);
+}
+#endif
+/*
+ * Send an active open request.
+ */
+int
+t3_connect(struct toedev *tdev, struct socket *so,
+ struct rtentry *rt, struct sockaddr *nam)
+{
+ struct mbuf *m;
+ struct l2t_entry *e;
+ struct tom_data *d = TOM_DATA(tdev);
+ struct inpcb *inp = so_sotoinpcb(so);
+ struct tcpcb *tp = intotcpcb(inp);
+ struct toepcb *toep; /* allocated by init_offload_socket */
+
+ int atid;
+
+ toep = toepcb_alloc();
+ if (toep == NULL)
+ goto out_err;
+
+ if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0)
+ goto out_err;
+
+ e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam);
+ if (!e)
+ goto free_tid;
+
+ inp_lock_assert(inp);
+ m = m_gethdr(MT_DATA, M_WAITOK);
+
+#if 0
+ m->m_toe.mt_toepcb = tp->t_toe;
+ set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure);
+#endif
+ so_lock(so);
+
+ init_offload_socket(so, tdev, atid, e, rt, toep);
+
+ install_offload_ops(so);
+
+ mk_act_open_req(so, m, atid, e);
+ so_unlock(so);
+
+ soisconnecting(so);
+ toep = tp->t_toe;
+ m_set_toep(m, tp->t_toe);
+
+ toep->tp_state = TCPS_SYN_SENT;
+ l2t_send(d->cdev, (struct mbuf *)m, e);
+
+ if (toep->tp_ulp_mode)
+ t3_enable_ddp(toep, 0);
+ return (0);
+
+free_tid:
+ printf("failing connect - free atid\n");
+
+ free_atid(d->cdev, atid);
+out_err:
+ printf("return ENOMEM\n");
+ return (ENOMEM);
+}
+
+/*
+ * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do
+ * not send multiple ABORT_REQs for the same connection and also that we do
+ * not try to send a message after the connection has closed. Returns 1 if
+ * an ABORT_REQ wasn't generated after all, 0 otherwise.
+ */
+static void
+t3_send_reset(struct toepcb *toep)
+{
+
+ struct cpl_abort_req *req;
+ unsigned int tid = toep->tp_tid;
+ int mode = CPL_ABORT_SEND_RST;
+ struct tcpcb *tp = toep->tp_tp;
+ struct toedev *tdev = toep->tp_toedev;
+ struct socket *so = NULL;
+ struct mbuf *m;
+ struct sockbuf *snd;
+
+ if (tp) {
+ inp_lock_assert(tp->t_inpcb);
+ so = inp_inpcbtosocket(tp->t_inpcb);
+ }
+
+ if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) ||
+ tdev == NULL))
+ return;
+ toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN);
+
+ snd = so_sockbuf_snd(so);
+ /* Purge the send queue so we don't send anything after an abort. */
+ if (so)
+ sbflush(snd);
+ if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev))
+ mode |= CPL_ABORT_POST_CLOSE_REQ;
+
+ m = m_gethdr_nofail(sizeof(*req));
+ m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep));
+ set_arp_failure_handler(m, abort_arp_failure);
+
+ req = mtod(m, struct cpl_abort_req *);
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
+ req->wr.wr_lo = htonl(V_WR_TID(tid));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
+ req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0;
+ req->rsvd1 = !(toep->tp_flags & TP_DATASENT);
+ req->cmd = mode;
+ if (tp && (tp->t_state == TCPS_SYN_SENT))
+ mbufq_tail(&toep->out_of_order_queue, m); // defer
+ else
+ l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t);
+}
+
+static int
+t3_ip_ctloutput(struct socket *so, struct sockopt *sopt)
+{
+ struct inpcb *inp;
+ int error, optval;
+
+ if (sopt->sopt_name == IP_OPTIONS)
+ return (ENOPROTOOPT);
+
+ if (sopt->sopt_name != IP_TOS)
+ return (EOPNOTSUPP);
+
+ error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
+
+ if (error)
+ return (error);
+
+ if (optval > IPTOS_PREC_CRITIC_ECP && !suser(curthread))
+ return (EPERM);
+
+ inp = so_sotoinpcb(so);
+ inp_wlock(inp);
+ inp_ip_tos_set(inp, optval);
+#if 0
+ inp->inp_ip_tos = optval;
+#endif
+ t3_set_tos(inp_inpcbtotcpcb(inp)->t_toe);
+ inp_wunlock(inp);
+
+ return (0);
+}
+
+static int
+t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt)
+{
+ int err = 0;
+ size_t copied;
+
+ if (sopt->sopt_name != TCP_CONGESTION &&
+ sopt->sopt_name != TCP_NODELAY)
+ return (EOPNOTSUPP);
+
+ if (sopt->sopt_name == TCP_CONGESTION) {
+ char name[TCP_CA_NAME_MAX];
+ int optlen = sopt->sopt_valsize;
+ struct tcpcb *tp;
+
+ if (sopt->sopt_dir == SOPT_GET) {
+ KASSERT(0, ("unimplemented"));
+ return (EOPNOTSUPP);
+ }
+
+ if (optlen < 1)
+ return (EINVAL);
+
+ err = copyinstr(sopt->sopt_val, name,
+ min(TCP_CA_NAME_MAX - 1, optlen), &copied);
+ if (err)
+ return (err);
+ if (copied < 1)
+ return (EINVAL);
+
+ tp = so_sototcpcb(so);
+ /*
+ * XXX I need to revisit this
+ */
+ if ((err = t3_set_cong_control(so, name)) == 0) {
+#ifdef CONGESTION_CONTROL_SUPPORTED
+ tp->t_cong_control = strdup(name, M_CXGB);
+#endif
+ } else
+ return (err);
+ } else {
+ int optval, oldval;
+ struct inpcb *inp;
+ struct tcpcb *tp;
+
+ if (sopt->sopt_dir == SOPT_GET)
+ return (EOPNOTSUPP);
+
+ err = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+
+ if (err)
+ return (err);
+
+ inp = so_sotoinpcb(so);
+ tp = inp_inpcbtotcpcb(inp);
+
+ inp_wlock(inp);
+
+ oldval = tp->t_flags;
+ if (optval)
+ tp->t_flags |= TF_NODELAY;
+ else
+ tp->t_flags &= ~TF_NODELAY;
+ inp_wunlock(inp);
+
+
+ if (oldval != tp->t_flags && (tp->t_toe != NULL))
+ t3_set_nagle(tp->t_toe);
+
+ }
+
+ return (0);
+}
+
+int
+t3_ctloutput(struct socket *so, struct sockopt *sopt)
+{
+ int err;
+
+ if (sopt->sopt_level != IPPROTO_TCP)
+ err = t3_ip_ctloutput(so, sopt);
+ else
+ err = t3_tcp_ctloutput(so, sopt);
+
+ if (err != EOPNOTSUPP)
+ return (err);
+
+ return (tcp_ctloutput(so, sopt));
+}
+
+/*
+ * Returns true if we need to explicitly request RST when we receive new data
+ * on an RX-closed connection.
+ */
+static inline int
+need_rst_on_excess_rx(const struct toepcb *toep)
+{
+ return (1);
+}
+
+/*
+ * Handles Rx data that arrives in a state where the socket isn't accepting
+ * new data.
+ */
+static void
+handle_excess_rx(struct toepcb *toep, struct mbuf *m)
+{
+
+ if (need_rst_on_excess_rx(toep) &&
+ !(toep->tp_flags & TP_ABORT_SHUTDOWN))
+ t3_send_reset(toep);
+ m_freem(m);
+}
+
+/*
+ * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE)
+ * by getting the DDP offset from the TCB.
+ */
+static void
+tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m)
+{
+ struct ddp_state *q = &toep->tp_ddp_state;
+ struct ddp_buf_state *bsp;
+ struct cpl_get_tcb_rpl *hdr;
+ unsigned int ddp_offset;
+ struct socket *so;
+ struct tcpcb *tp;
+ struct sockbuf *rcv;
+ int state;
+
+ uint64_t t;
+ __be64 *tcb;
+
+ tp = toep->tp_tp;
+ so = inp_inpcbtosocket(tp->t_inpcb);
+
+ inp_lock_assert(tp->t_inpcb);
+ rcv = so_sockbuf_rcv(so);
+ sockbuf_lock(rcv);
+
+ /* Note that we only accout for CPL_GET_TCB issued by the DDP code.
+ * We really need a cookie in order to dispatch the RPLs.
+ */
+ q->get_tcb_count--;
+
+ /* It is a possible that a previous CPL already invalidated UBUF DDP
+ * and moved the cur_buf idx and hence no further processing of this
+ * skb is required. However, the app might be sleeping on
+ * !q->get_tcb_count and we need to wake it up.
+ */
+ if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) {
+ int state = so_state_get(so);
+
+ m_freem(m);
+ if (__predict_true((state & SS_NOFDREF) == 0))
+ so_sorwakeup_locked(so);
+ else
+ sockbuf_unlock(rcv);
+
+ return;
+ }
+
+ bsp = &q->buf_state[q->cur_buf];
+ hdr = cplhdr(m);
+ tcb = (__be64 *)(hdr + 1);
+ if (q->cur_buf == 0) {
+ t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]);
+ ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET);
+ } else {
+ t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]);
+ ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET;
+ }
+ ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET;
+ m->m_cur_offset = bsp->cur_offset;
+ bsp->cur_offset = ddp_offset;
+ m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset;
+
+ CTR5(KTR_TOM,
+ "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u",
+ q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset);
+ KASSERT(ddp_offset >= m->m_cur_offset,
+ ("ddp_offset=%u less than cur_offset=%u",
+ ddp_offset, m->m_cur_offset));
+
+#if 0
+{
+ unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx;
+
+ t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]);
+ ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS;
+
+ t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]);
+ rcv_nxt = t >> S_TCB_RCV_NXT;
+ rcv_nxt &= M_TCB_RCV_NXT;
+
+ t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]);
+ rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET);
+ rx_hdr_offset &= M_TCB_RX_HDR_OFFSET;
+
+ T3_TRACE2(TIDTB(sk),
+ "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x",
+ ddp_flags, rcv_nxt - rx_hdr_offset);
+ T3_TRACE4(TB(q),
+ "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u",
+ tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf);
+ T3_TRACE3(TB(q),
+ "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u",
+ rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset);
+ T3_TRACE2(TB(q),
+ "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x",
+ q->buf_state[0].flags, q->buf_state[1].flags);
+
+}
+#endif
+ if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) {
+ handle_excess_rx(toep, m);
+ return;
+ }
+
+#ifdef T3_TRACE
+ if ((int)m->m_pkthdr.len < 0) {
+ t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len");
+ }
+#endif
+ if (bsp->flags & DDP_BF_NOCOPY) {
+#ifdef T3_TRACE
+ T3_TRACE0(TB(q),
+ "tcb_rpl_as_ddp_complete: CANCEL UBUF");
+
+ if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
+ printk("!cancel_ubuf");
+ t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf");
+ }
+#endif
+ m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1;
+ bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA);
+ q->cur_buf ^= 1;
+ } else if (bsp->flags & DDP_BF_NOFLIP) {
+
+ m->m_ddp_flags = 1; /* always a kernel buffer */
+
+ /* now HW buffer carries a user buffer */
+ bsp->flags &= ~DDP_BF_NOFLIP;
+ bsp->flags |= DDP_BF_NOCOPY;
+
+ /* It is possible that the CPL_GET_TCB_RPL doesn't indicate
+ * any new data in which case we're done. If in addition the
+ * offset is 0, then there wasn't a completion for the kbuf
+ * and we need to decrement the posted count.
+ */
+ if (m->m_pkthdr.len == 0) {
+ if (ddp_offset == 0) {
+ q->kbuf_posted--;
+ bsp->flags |= DDP_BF_NODATA;
+ }
+ sockbuf_unlock(rcv);
+ m_free(m);
+ return;
+ }
+ } else {
+ sockbuf_unlock(rcv);
+
+ /* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP,
+ * but it got here way late and nobody cares anymore.
+ */
+ m_free(m);
+ return;
+ }
+
+ m->m_ddp_gl = (unsigned char *)bsp->gl;
+ m->m_flags |= M_DDP;
+ m->m_seq = tp->rcv_nxt;
+ tp->rcv_nxt += m->m_pkthdr.len;
+ tp->t_rcvtime = ticks;
+ CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u",
+ m->m_seq, q->cur_buf, m->m_pkthdr.len);
+ if (m->m_pkthdr.len == 0) {
+ q->user_ddp_pending = 0;
+ m_free(m);
+ } else
+ SBAPPEND(rcv, m);
+
+ state = so_state_get(so);
+ if (__predict_true((state & SS_NOFDREF) == 0))
+ so_sorwakeup_locked(so);
+ else
+ sockbuf_unlock(rcv);
+}
+
+/*
+ * Process a CPL_GET_TCB_RPL. These can also be generated by the DDP code,
+ * in that case they are similar to DDP completions.
+ */
+static int
+do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+{
+ struct toepcb *toep = (struct toepcb *)ctx;
+
+ /* OK if socket doesn't exist */
+ if (toep == NULL) {
+ printf("null toep in do_get_tcb_rpl\n");
+ return (CPL_RET_BUF_DONE);
+ }
+
+ inp_wlock(toep->tp_tp->t_inpcb);
+ tcb_rpl_as_ddp_complete(toep, m);
+ inp_wunlock(toep->tp_tp->t_inpcb);
+
+ return (0);
+}
+
+static void
+handle_ddp_data(struct toepcb *toep, struct mbuf *m)
+{
+ struct tcpcb *tp = toep->tp_tp;
+ struct socket *so;
+ struct ddp_state *q;
+ struct ddp_buf_state *bsp;
+ struct cpl_rx_data *hdr = cplhdr(m);
+ unsigned int rcv_nxt = ntohl(hdr->seq);
+ struct sockbuf *rcv;
+
+ if (tp->rcv_nxt == rcv_nxt)
+ return;
+
+ inp_lock_assert(tp->t_inpcb);
+ so = inp_inpcbtosocket(tp->t_inpcb);
+ rcv = so_sockbuf_rcv(so);
+ sockbuf_lock(rcv);
+
+ q = &toep->tp_ddp_state;
+ bsp = &q->buf_state[q->cur_buf];
+ KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x",
+ rcv_nxt, tp->rcv_nxt));
+ m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
+ KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
+ CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d",
+ rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len);
+
+#ifdef T3_TRACE
+ if ((int)m->m_pkthdr.len < 0) {
+ t3_ddp_error(so, "handle_ddp_data: neg len");
+ }
+#endif
+ m->m_ddp_gl = (unsigned char *)bsp->gl;
+ m->m_flags |= M_DDP;
+ m->m_cur_offset = bsp->cur_offset;
+ m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
+ if (bsp->flags & DDP_BF_NOCOPY)
+ bsp->flags &= ~DDP_BF_NOCOPY;
+
+ m->m_seq = tp->rcv_nxt;
+ tp->rcv_nxt = rcv_nxt;
+ bsp->cur_offset += m->m_pkthdr.len;
+ if (!(bsp->flags & DDP_BF_NOFLIP))
+ q->cur_buf ^= 1;
+ /*
+ * For now, don't re-enable DDP after a connection fell out of DDP
+ * mode.
+ */
+ q->ubuf_ddp_ready = 0;
+ sockbuf_unlock(rcv);
+}
+
+/*
+ * Process new data received for a connection.
+ */
+static void
+new_rx_data(struct toepcb *toep, struct mbuf *m)
+{
+ struct cpl_rx_data *hdr = cplhdr(m);
+ struct tcpcb *tp = toep->tp_tp;
+ struct socket *so;
+ struct sockbuf *rcv;
+ int state;
+ int len = be16toh(hdr->len);
+
+ inp_wlock(tp->t_inpcb);
+
+ so = inp_inpcbtosocket(tp->t_inpcb);
+
+ if (__predict_false(so_no_receive(so))) {
+ handle_excess_rx(toep, m);
+ inp_wunlock(tp->t_inpcb);
+ TRACE_EXIT;
+ return;
+ }
+
+ if (toep->tp_ulp_mode == ULP_MODE_TCPDDP)
+ handle_ddp_data(toep, m);
+
+ m->m_seq = ntohl(hdr->seq);
+ m->m_ulp_mode = 0; /* for iSCSI */
+
+#if VALIDATE_SEQ
+ if (__predict_false(m->m_seq != tp->rcv_nxt)) {
+ log(LOG_ERR,
+ "%s: TID %u: Bad sequence number %u, expected %u\n",
+ toep->tp_toedev->name, toep->tp_tid, m->m_seq,
+ tp->rcv_nxt);
+ m_freem(m);
+ inp_wunlock(tp->t_inpcb);
+ return;
+ }
+#endif
+ m_adj(m, sizeof(*hdr));
+
+#ifdef URGENT_DATA_SUPPORTED
+ /*
+ * We don't handle urgent data yet
+ */
+ if (__predict_false(hdr->urg))
+ handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg));
+ if (__predict_false(tp->urg_data == TCP_URG_NOTYET &&
+ tp->urg_seq - tp->rcv_nxt < skb->len))
+ tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq -
+ tp->rcv_nxt];
+#endif
+ if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) {
+ toep->tp_delack_mode = hdr->dack_mode;
+ toep->tp_delack_seq = tp->rcv_nxt;
+ }
+ CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d",
+ m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes);
+
+ if (len < m->m_pkthdr.len)
+ m->m_pkthdr.len = m->m_len = len;
+
+ tp->rcv_nxt += m->m_pkthdr.len;
+ tp->t_rcvtime = ticks;
+ toep->tp_enqueued_bytes += m->m_pkthdr.len;
+ CTR2(KTR_TOM,
+ "new_rx_data: seq 0x%x len %u",
+ m->m_seq, m->m_pkthdr.len);
+ inp_wunlock(tp->t_inpcb);
+ rcv = so_sockbuf_rcv(so);
+ sockbuf_lock(rcv);
+#if 0
+ if (sb_notify(rcv))
+ DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, rcv->sb_flags, m->m_pkthdr.len);
+#endif
+ SBAPPEND(rcv, m);
+
+#ifdef notyet
+ /*
+ * We're giving too many credits to the card - but disable this check so we can keep on moving :-|
+ *
+ */
+ KASSERT(rcv->sb_cc < (rcv->sb_mbmax << 1),
+
+ ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d",
+ so, rcv->sb_cc, rcv->sb_mbmax));
+#endif
+
+
+ CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d",
+ rcv->sb_cc, rcv->sb_mbcnt);
+
+ state = so_state_get(so);
+ if (__predict_true((state & SS_NOFDREF) == 0))
+ so_sorwakeup_locked(so);
+ else
+ sockbuf_unlock(rcv);
+}
+
+/*
+ * Handler for RX_DATA CPL messages.
+ */
+static int
+do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+{
+ struct toepcb *toep = (struct toepcb *)ctx;
+
+ DPRINTF("rx_data len=%d\n", m->m_pkthdr.len);
+
+ new_rx_data(toep, m);
+
+ return (0);
+}
+
+static void
+new_rx_data_ddp(struct toepcb *toep, struct mbuf *m)
+{
+ struct tcpcb *tp;
+ struct ddp_state *q;
+ struct ddp_buf_state *bsp;
+ struct cpl_rx_data_ddp *hdr;
+ struct socket *so;
+ unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx;
+ int nomoredata = 0;
+ unsigned int delack_mode;
+ struct sockbuf *rcv;
+
+ tp = toep->tp_tp;
+ inp_wlock(tp->t_inpcb);
+ so = inp_inpcbtosocket(tp->t_inpcb);
+
+ if (__predict_false(so_no_receive(so))) {
+
+ handle_excess_rx(toep, m);
+ inp_wunlock(tp->t_inpcb);
+ return;
+ }
+
+ q = &toep->tp_ddp_state;
+ hdr = cplhdr(m);
+ ddp_report = ntohl(hdr->u.ddp_report);
+ buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1;
+ bsp = &q->buf_state[buf_idx];
+
+ CTR4(KTR_TOM,
+ "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u "
+ "hdr seq 0x%x len %u",
+ tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq),
+ ntohs(hdr->len));
+ CTR3(KTR_TOM,
+ "new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d",
+ G_DDP_OFFSET(ddp_report), ddp_report, buf_idx);
+
+ ddp_len = ntohs(hdr->len);
+ rcv_nxt = ntohl(hdr->seq) + ddp_len;
+
+ delack_mode = G_DDP_DACK_MODE(ddp_report);
+ if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) {
+ toep->tp_delack_mode = delack_mode;
+ toep->tp_delack_seq = tp->rcv_nxt;
+ }
+
+ m->m_seq = tp->rcv_nxt;
+ tp->rcv_nxt = rcv_nxt;
+
+ tp->t_rcvtime = ticks;
+ /*
+ * Store the length in m->m_len. We are changing the meaning of
+ * m->m_len here, we need to be very careful that nothing from now on
+ * interprets ->len of this packet the usual way.
+ */
+ m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq;
+ inp_wunlock(tp->t_inpcb);
+ CTR3(KTR_TOM,
+ "new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ",
+ m->m_len, rcv_nxt, m->m_seq);
+ /*
+ * Figure out where the new data was placed in the buffer and store it
+ * in when. Assumes the buffer offset starts at 0, consumer needs to
+ * account for page pod's pg_offset.
+ */
+ end_offset = G_DDP_OFFSET(ddp_report) + ddp_len;
+ m->m_cur_offset = end_offset - m->m_pkthdr.len;
+
+ rcv = so_sockbuf_rcv(so);
+ sockbuf_lock(rcv);
+
+ m->m_ddp_gl = (unsigned char *)bsp->gl;
+ m->m_flags |= M_DDP;
+ bsp->cur_offset = end_offset;
+ toep->tp_enqueued_bytes += m->m_pkthdr.len;
+
+ /*
+ * Length is only meaningful for kbuf
+ */
+ if (!(bsp->flags & DDP_BF_NOCOPY))
+ KASSERT(m->m_len <= bsp->gl->dgl_length,
+ ("length received exceeds ddp pages: len=%d dgl_length=%d",
+ m->m_len, bsp->gl->dgl_length));
+
+ KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
+ KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next));
+ /*
+ * Bit 0 of flags stores whether the DDP buffer is completed.
+ * Note that other parts of the code depend on this being in bit 0.
+ */
+ if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) {
+ panic("spurious ddp completion");
+ } else {
+ m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE);
+ if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP))
+ q->cur_buf ^= 1; /* flip buffers */
+ }
+
+ if (bsp->flags & DDP_BF_NOCOPY) {
+ m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY);
+ bsp->flags &= ~DDP_BF_NOCOPY;
+ }
+
+ if (ddp_report & F_DDP_PSH)
+ m->m_ddp_flags |= DDP_BF_PSH;
+ if (nomoredata)
+ m->m_ddp_flags |= DDP_BF_NODATA;
+
+#ifdef notyet
+ skb_reset_transport_header(skb);
+ tcp_hdr(skb)->fin = 0; /* changes original hdr->ddp_report */
+#endif
+ SBAPPEND(rcv, m);
+
+ if ((so_state_get(so) & SS_NOFDREF) == 0 && ((ddp_report & F_DDP_PSH) ||
+ (((m->m_ddp_flags & (DDP_BF_NOCOPY|1)) == (DDP_BF_NOCOPY|1))
+ || !(m->m_ddp_flags & DDP_BF_NOCOPY))))
+ so_sorwakeup_locked(so);
+ else
+ sockbuf_unlock(rcv);
+}
+
+#define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\
+ F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\
+ F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\
+ F_DDP_INVALID_PPOD)
+
+/*
+ * Handler for RX_DATA_DDP CPL messages.
+ */
+static int
+do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+{
+ struct toepcb *toep = ctx;
+ const struct cpl_rx_data_ddp *hdr = cplhdr(m);
+
+ VALIDATE_SOCK(so);
+
+ if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) {
+ log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n",
+ GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status)));
+ return (CPL_RET_BUF_DONE);
+ }
+#if 0
+ skb->h.th = tcphdr_skb->h.th;
+#endif
+ new_rx_data_ddp(toep, m);
+ return (0);
+}
+
+static void
+process_ddp_complete(struct toepcb *toep, struct mbuf *m)
+{
+ struct tcpcb *tp = toep->tp_tp;
+ struct socket *so;
+ struct ddp_state *q;
+ struct ddp_buf_state *bsp;
+ struct cpl_rx_ddp_complete *hdr;
+ unsigned int ddp_report, buf_idx, when, delack_mode;
+ int nomoredata = 0;
+ struct sockbuf *rcv;
+
+ inp_wlock(tp->t_inpcb);
+ so = inp_inpcbtosocket(tp->t_inpcb);
+
+ if (__predict_false(so_no_receive(so))) {
+ struct inpcb *inp = so_sotoinpcb(so);
+
+ handle_excess_rx(toep, m);
+ inp_wunlock(inp);
+ return;
+ }
+ q = &toep->tp_ddp_state;
+ hdr = cplhdr(m);
+ ddp_report = ntohl(hdr->ddp_report);
+ buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1;
+ m->m_pkthdr.csum_data = tp->rcv_nxt;
+
+ rcv = so_sockbuf_rcv(so);
+ sockbuf_lock(rcv);
+
+ bsp = &q->buf_state[buf_idx];
+ when = bsp->cur_offset;
+ m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when;
+ tp->rcv_nxt += m->m_len;
+ tp->t_rcvtime = ticks;
+
+ delack_mode = G_DDP_DACK_MODE(ddp_report);
+ if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) {
+ toep->tp_delack_mode = delack_mode;
+ toep->tp_delack_seq = tp->rcv_nxt;
+ }
+#ifdef notyet
+ skb_reset_transport_header(skb);
+ tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */
+#endif
+ inp_wunlock(tp->t_inpcb);
+
+ KASSERT(m->m_len >= 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
+ CTR5(KTR_TOM,
+ "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
+ "ddp_report 0x%x offset %u, len %u",
+ tp->rcv_nxt, bsp->cur_offset, ddp_report,
+ G_DDP_OFFSET(ddp_report), m->m_len);
+
+ m->m_cur_offset = bsp->cur_offset;
+ bsp->cur_offset += m->m_len;
+
+ if (!(bsp->flags & DDP_BF_NOFLIP)) {
+ q->cur_buf ^= 1; /* flip buffers */
+ if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length)
+ nomoredata=1;
+ }
+
+ CTR4(KTR_TOM,
+ "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u "
+ "ddp_report %u offset %u",
+ tp->rcv_nxt, bsp->cur_offset, ddp_report,
+ G_DDP_OFFSET(ddp_report));
+
+ m->m_ddp_gl = (unsigned char *)bsp->gl;
+ m->m_flags |= M_DDP;
+ m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1;
+ if (bsp->flags & DDP_BF_NOCOPY)
+ bsp->flags &= ~DDP_BF_NOCOPY;
+ if (nomoredata)
+ m->m_ddp_flags |= DDP_BF_NODATA;
+
+ SBAPPEND(rcv, m);
+ if ((so_state_get(so) & SS_NOFDREF) == 0)
+ so_sorwakeup_locked(so);
+ else
+ sockbuf_unlock(rcv);
+}
+
+/*
+ * Handler for RX_DDP_COMPLETE CPL messages.
+ */
+static int
+do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+{
+ struct toepcb *toep = ctx;
+
+ VALIDATE_SOCK(so);
+#if 0
+ skb->h.th = tcphdr_skb->h.th;
+#endif
+ process_ddp_complete(toep, m);
+ return (0);
+}
+
+/*
+ * Move a socket to TIME_WAIT state. We need to make some adjustments to the
+ * socket state before calling tcp_time_wait to comply with its expectations.
+ */
+static void
+enter_timewait(struct tcpcb *tp)
+{
+ /*
+ * Bump rcv_nxt for the peer FIN. We don't do this at the time we
+ * process peer_close because we don't want to carry the peer FIN in
+ * the socket's receive queue and if we increment rcv_nxt without
+ * having the FIN in the receive queue we'll confuse facilities such
+ * as SIOCINQ.
+ */
+ inp_wlock(tp->t_inpcb);
+ tp->rcv_nxt++;
+
+ tp->ts_recent_age = 0; /* defeat recycling */
+ tp->t_srtt = 0; /* defeat tcp_update_metrics */
+ inp_wunlock(tp->t_inpcb);
+ tcp_offload_twstart(tp);
+}
+
+/*
+ * For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE. This
+ * function deals with the data that may be reported along with the FIN.
+ * Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to
+ * perform normal FIN-related processing. In the latter case 1 indicates that
+ * there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the
+ * skb can be freed.
+ */
+static int
+handle_peer_close_data(struct socket *so, struct mbuf *m)
+{
+ struct tcpcb *tp = so_sototcpcb(so);
+ struct toepcb *toep = tp->t_toe;
+ struct ddp_state *q;
+ struct ddp_buf_state *bsp;
+ struct cpl_peer_close *req = cplhdr(m);
+ unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */
+ struct sockbuf *rcv;
+
+ if (tp->rcv_nxt == rcv_nxt) /* no data */
+ return (0);
+
+ CTR0(KTR_TOM, "handle_peer_close_data");
+ if (__predict_false(so_no_receive(so))) {
+ handle_excess_rx(toep, m);
+
+ /*
+ * Although we discard the data we want to process the FIN so
+ * that PEER_CLOSE + data behaves the same as RX_DATA_DDP +
+ * PEER_CLOSE without data. In particular this PEER_CLOSE
+ * may be what will close the connection. We return 1 because
+ * handle_excess_rx() already freed the packet.
+ */
+ return (1);
+ }
+
+ inp_lock_assert(tp->t_inpcb);
+ q = &toep->tp_ddp_state;
+ rcv = so_sockbuf_rcv(so);
+ sockbuf_lock(rcv);
+
+ bsp = &q->buf_state[q->cur_buf];
+ m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
+ KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len));
+ m->m_ddp_gl = (unsigned char *)bsp->gl;
+ m->m_flags |= M_DDP;
+ m->m_cur_offset = bsp->cur_offset;
+ m->m_ddp_flags =
+ DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
+ m->m_seq = tp->rcv_nxt;
+ tp->rcv_nxt = rcv_nxt;
+ bsp->cur_offset += m->m_pkthdr.len;
+ if (!(bsp->flags & DDP_BF_NOFLIP))
+ q->cur_buf ^= 1;
+#ifdef notyet
+ skb_reset_transport_header(skb);
+ tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */
+#endif
+ tp->t_rcvtime = ticks;
+ SBAPPEND(rcv, m);
+ if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0))
+ so_sorwakeup_locked(so);
+ else
+ sockbuf_unlock(rcv);
+
+ return (1);
+}
+
+/*
+ * Handle a peer FIN.
+ */
+static void
+do_peer_fin(struct toepcb *toep, struct mbuf *m)
+{
+ struct socket *so;
+ struct tcpcb *tp = toep->tp_tp;
+ int keep, action;
+
+ action = keep = 0;
+ CTR1(KTR_TOM, "do_peer_fin state=%d", tp->t_state);
+ if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) {
+ printf("abort_pending set\n");
+
+ goto out;
+ }
+ inp_wlock(tp->t_inpcb);
+ so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
+ if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) {
+ keep = handle_peer_close_data(so, m);
+ if (keep < 0) {
+ inp_wunlock(tp->t_inpcb);
+ return;
+ }
+ }
+ if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+ CTR1(KTR_TOM,
+ "waking up waiters for cantrcvmore on %p ", so);
+ socantrcvmore(so);
+
+ /*
+ * If connection is half-synchronized
+ * (ie NEEDSYN flag on) then delay ACK,
+ * so it may be piggybacked when SYN is sent.
+ * Otherwise, since we received a FIN then no
+ * more input can be expected, send ACK now.
+ */
+ if (tp->t_flags & TF_NEEDSYN)
+ tp->t_flags |= TF_DELACK;
+ else
+ tp->t_flags |= TF_ACKNOW;
+ tp->rcv_nxt++;
+ }
+
+ switch (tp->t_state) {
+ case TCPS_SYN_RECEIVED:
+ tp->t_starttime = ticks;
+ /* FALLTHROUGH */
+ case TCPS_ESTABLISHED:
+ tp->t_state = TCPS_CLOSE_WAIT;
+ break;
+ case TCPS_FIN_WAIT_1:
+ tp->t_state = TCPS_CLOSING;
+ break;
+ case TCPS_FIN_WAIT_2:
+ /*
+ * If we've sent an abort_req we must have sent it too late,
+ * HW will send us a reply telling us so, and this peer_close
+ * is really the last message for this connection and needs to
+ * be treated as an abort_rpl, i.e., transition the connection
+ * to TCP_CLOSE (note that the host stack does this at the
+ * time of generating the RST but we must wait for HW).
+ * Otherwise we enter TIME_WAIT.
+ */
+ t3_release_offload_resources(toep);
+ if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
+ action = TCP_CLOSE;
+ } else {
+ action = TCP_TIMEWAIT;
+ }
+ break;
+ default:
+ log(LOG_ERR,
+ "%s: TID %u received PEER_CLOSE in bad state %d\n",
+ toep->tp_toedev->tod_name, toep->tp_tid, tp->t_state);
+ }
+ inp_wunlock(tp->t_inpcb);
+
+ if (action == TCP_TIMEWAIT) {
+ enter_timewait(tp);
+ } else if (action == TCP_DROP) {
+ tcp_offload_drop(tp, 0);
+ } else if (action == TCP_CLOSE) {
+ tcp_offload_close(tp);
+ }
+
+#ifdef notyet
+ /* Do not send POLL_HUP for half duplex close. */
+ if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
+ sk->sk_state == TCP_CLOSE)
+ sk_wake_async(so, 1, POLL_HUP);
+ else
+ sk_wake_async(so, 1, POLL_IN);
+#endif
+
+out:
+ if (!keep)
+ m_free(m);
+}
+
+/*
+ * Handler for PEER_CLOSE CPL messages.
+ */
+static int
+do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+{
+ struct toepcb *toep = (struct toepcb *)ctx;
+
+ VALIDATE_SOCK(so);
+
+ do_peer_fin(toep, m);
+ return (0);
+}
+
+static void
+process_close_con_rpl(struct toepcb *toep, struct mbuf *m)
+{
+ struct cpl_close_con_rpl *rpl = cplhdr(m);
+ struct tcpcb *tp = toep->tp_tp;
+ struct socket *so;
+ int action = 0;
+ struct sockbuf *rcv;
+
+ inp_wlock(tp->t_inpcb);
+ so = inp_inpcbtosocket(tp->t_inpcb);
+
+ tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */
+
+ if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) {
+ inp_wunlock(tp->t_inpcb);
+ goto out;
+ }
+
+ CTR3(KTR_TOM, "process_close_con_rpl(%p) state=%d dead=%d", toep,
+ tp->t_state, !!(so_state_get(so) & SS_NOFDREF));
+
+ switch (tp->t_state) {
+ case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */
+ t3_release_offload_resources(toep);
+ if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
+ action = TCP_CLOSE;
+
+ } else {
+ action = TCP_TIMEWAIT;
+ }
+ break;
+ case TCPS_LAST_ACK:
+ /*
+ * In this state we don't care about pending abort_rpl.
+ * If we've sent abort_req it was post-close and was sent too
+ * late, this close_con_rpl is the actual last message.
+ */
+ t3_release_offload_resources(toep);
+ action = TCP_CLOSE;
+ break;
+ case TCPS_FIN_WAIT_1:
+ /*
+ * If we can't receive any more
+ * data, then closing user can proceed.
+ * Starting the timer is contrary to the
+ * specification, but if we don't get a FIN
+ * we'll hang forever.
+ *
+ * XXXjl:
+ * we should release the tp also, and use a
+ * compressed state.
+ */
+ if (so)
+ rcv = so_sockbuf_rcv(so);
+ else
+ break;
+
+ if (rcv->sb_state & SBS_CANTRCVMORE) {
+ int timeout;
+
+ if (so)
+ soisdisconnected(so);
+ timeout = (tcp_fast_finwait2_recycle) ?
+ tcp_finwait2_timeout : tcp_maxidle;
+ tcp_timer_activate(tp, TT_2MSL, timeout);
+ }
+ tp->t_state = TCPS_FIN_WAIT_2;
+ if ((so_options_get(so) & SO_LINGER) && so_linger_get(so) == 0 &&
+ (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) {
+ action = TCP_DROP;
+ }
+
+ break;
+ default:
+ log(LOG_ERR,
+ "%s: TID %u received CLOSE_CON_RPL in bad state %d\n",
+ toep->tp_toedev->tod_name, toep->tp_tid,
+ tp->t_state);
+ }
+ inp_wunlock(tp->t_inpcb);
+
+
+ if (action == TCP_TIMEWAIT) {
+ enter_timewait(tp);
+ } else if (action == TCP_DROP) {
+ tcp_offload_drop(tp, 0);
+ } else if (action == TCP_CLOSE) {
+ tcp_offload_close(tp);
+ }
+out:
+ m_freem(m);
+}
+
+/*
+ * Handler for CLOSE_CON_RPL CPL messages.
+ */
+static int
+do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m,
+ void *ctx)
+{
+ struct toepcb *toep = (struct toepcb *)ctx;
+
+ process_close_con_rpl(toep, m);
+ return (0);
+}
+
+/*
+ * Process abort replies. We only process these messages if we anticipate
+ * them as the coordination between SW and HW in this area is somewhat lacking
+ * and sometimes we get ABORT_RPLs after we are done with the connection that
+ * originated the ABORT_REQ.
+ */
+static void
+process_abort_rpl(struct toepcb *toep, struct mbuf *m)
+{
+ struct tcpcb *tp = toep->tp_tp;
+ struct socket *so;
+ int needclose = 0;
+
+#ifdef T3_TRACE
+ T3_TRACE1(TIDTB(sk),
+ "process_abort_rpl: GTS rpl pending %d",
+ sock_flag(sk, ABORT_RPL_PENDING));
+#endif
+
+ inp_wlock(tp->t_inpcb);
+ so = inp_inpcbtosocket(tp->t_inpcb);
+
+ if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
+ /*
+ * XXX panic on tcpdrop
+ */
+ if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(toep->tp_toedev))
+ toep->tp_flags |= TP_ABORT_RPL_RCVD;
+ else {
+ toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING);
+ if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) ||
+ !is_t3a(toep->tp_toedev)) {
+ if (toep->tp_flags & TP_ABORT_REQ_RCVD)
+ panic("TP_ABORT_REQ_RCVD set");
+ t3_release_offload_resources(toep);
+ needclose = 1;
+ }
+ }
+ }
+ inp_wunlock(tp->t_inpcb);
+
+ if (needclose)
+ tcp_offload_close(tp);
+
+ m_free(m);
+}
+
+/*
+ * Handle an ABORT_RPL_RSS CPL message.
+ */
+static int
+do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+{
+ struct cpl_abort_rpl_rss *rpl = cplhdr(m);
+ struct toepcb *toep;
+
+ /*
+ * Ignore replies to post-close aborts indicating that the abort was
+ * requested too late. These connections are terminated when we get
+ * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss
+ * arrives the TID is either no longer used or it has been recycled.
+ */
+ if (rpl->status == CPL_ERR_ABORT_FAILED) {
+discard:
+ m_free(m);
+ return (0);
+ }
+
+ toep = (struct toepcb *)ctx;
+
+ /*
+ * Sometimes we've already closed the socket, e.g., a post-close
+ * abort races with ABORT_REQ_RSS, the latter frees the socket
+ * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED,
+ * but FW turns the ABORT_REQ into a regular one and so we get
+ * ABORT_RPL_RSS with status 0 and no socket. Only on T3A.
+ */
+ if (!toep)
+ goto discard;
+
+ if (toep->tp_tp == NULL) {
+ log(LOG_NOTICE, "removing tid for abort\n");
+ cxgb_remove_tid(cdev, toep, toep->tp_tid);
+ if (toep->tp_l2t)
+ l2t_release(L2DATA(cdev), toep->tp_l2t);
+
+ toepcb_release(toep);
+ goto discard;
+ }
+
+ log(LOG_NOTICE, "toep=%p\n", toep);
+ log(LOG_NOTICE, "tp=%p\n", toep->tp_tp);
+
+ toepcb_hold(toep);
+ process_abort_rpl(toep, m);
+ toepcb_release(toep);
+ return (0);
+}
+
+/*
+ * Convert the status code of an ABORT_REQ into a FreeBSD error code. Also
+ * indicate whether RST should be sent in response.
+ */
+static int
+abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst)
+{
+ struct tcpcb *tp = so_sototcpcb(so);
+
+ switch (abort_reason) {
+ case CPL_ERR_BAD_SYN:
+#if 0
+ NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); // fall through
+#endif
+ case CPL_ERR_CONN_RESET:
+ // XXX need to handle SYN_RECV due to crossed SYNs
+ return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
+ case CPL_ERR_XMIT_TIMEDOUT:
+ case CPL_ERR_PERSIST_TIMEDOUT:
+ case CPL_ERR_FINWAIT2_TIMEDOUT:
+ case CPL_ERR_KEEPALIVE_TIMEDOUT:
+#if 0
+ NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
+#endif
+ return (ETIMEDOUT);
+ default:
+ return (EIO);
+ }
+}
+
+static inline void
+set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd)
+{
+ struct cpl_abort_rpl *rpl = cplhdr(m);
+
+ rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
+ rpl->wr.wr_lo = htonl(V_WR_TID(tid));
+ m->m_len = m->m_pkthdr.len = sizeof(*rpl);
+
+ OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
+ rpl->cmd = cmd;
+}
+
+static void
+send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m)
+{
+ struct mbuf *reply_mbuf;
+ struct cpl_abort_req_rss *req = cplhdr(m);
+
+ reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl));
+ m_set_priority(m, CPL_PRIORITY_DATA);
+ m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl);
+ set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status);
+ cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
+ m_free(m);
+}
+
+/*
+ * Returns whether an ABORT_REQ_RSS message is a negative advice.
+ */
+static inline int
+is_neg_adv_abort(unsigned int status)
+{
+ return status == CPL_ERR_RTX_NEG_ADVICE ||
+ status == CPL_ERR_PERSIST_NEG_ADVICE;
+}
+
+static void
+send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status)
+{
+ struct mbuf *reply_mbuf;
+ struct cpl_abort_req_rss *req = cplhdr(m);
+
+ reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
+
+ if (!reply_mbuf) {
+ /* Defer the reply. Stick rst_status into req->cmd. */
+ req->status = rst_status;
+ t3_defer_reply(m, tdev, send_deferred_abort_rpl);
+ return;
+ }
+
+ m_set_priority(reply_mbuf, CPL_PRIORITY_DATA);
+ set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status);
+ m_free(m);
+
+ /*
+ * XXX need to sync with ARP as for SYN_RECV connections we can send
+ * these messages while ARP is pending. For other connection states
+ * it's not a problem.
+ */
+ cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
+}
+
+#ifdef notyet
+static void
+cleanup_syn_rcv_conn(struct socket *child, struct socket *parent)
+{
+ CXGB_UNIMPLEMENTED();
+#ifdef notyet
+ struct request_sock *req = child->sk_user_data;
+
+ inet_csk_reqsk_queue_removed(parent, req);
+ synq_remove(tcp_sk(child));
+ __reqsk_free(req);
+ child->sk_user_data = NULL;
+#endif
+}
+
+
+/*
+ * Performs the actual work to abort a SYN_RECV connection.
+ */
+static void
+do_abort_syn_rcv(struct socket *child, struct socket *parent)
+{
+ struct tcpcb *parenttp = so_sototcpcb(parent);
+ struct tcpcb *childtp = so_sototcpcb(child);
+
+ /*
+ * If the server is still open we clean up the child connection,
+ * otherwise the server already did the clean up as it was purging
+ * its SYN queue and the skb was just sitting in its backlog.
+ */
+ if (__predict_false(parenttp->t_state == TCPS_LISTEN)) {
+ cleanup_syn_rcv_conn(child, parent);
+ inp_wlock(childtp->t_inpcb);
+ t3_release_offload_resources(childtp->t_toe);
+ inp_wunlock(childtp->t_inpcb);
+ tcp_offload_close(childtp);
+ }
+}
+#endif
+
+/*
+ * Handle abort requests for a SYN_RECV connection. These need extra work
+ * because the socket is on its parent's SYN queue.
+ */
+static int
+abort_syn_rcv(struct socket *so, struct mbuf *m)
+{
+ CXGB_UNIMPLEMENTED();
+#ifdef notyet
+ struct socket *parent;
+ struct toedev *tdev = toep->tp_toedev;
+ struct t3cdev *cdev = TOM_DATA(tdev)->cdev;
+ struct socket *oreq = so->so_incomp;
+ struct t3c_tid_entry *t3c_stid;
+ struct tid_info *t;
+
+ if (!oreq)
+ return -1; /* somehow we are not on the SYN queue */
+
+ t = &(T3C_DATA(cdev))->tid_maps;
+ t3c_stid = lookup_stid(t, oreq->ts_recent);
+ parent = ((struct listen_ctx *)t3c_stid->ctx)->lso;
+
+ so_lock(parent);
+ do_abort_syn_rcv(so, parent);
+ send_abort_rpl(m, tdev, CPL_ABORT_NO_RST);
+ so_unlock(parent);
+#endif
+ return (0);
+}
+
+/*
+ * Process abort requests. If we are waiting for an ABORT_RPL we ignore this
+ * request except that we need to reply to it.
+ */
+static void
+process_abort_req(struct toepcb *toep, struct mbuf *m, struct toedev *tdev)
+{
+ int rst_status = CPL_ABORT_NO_RST;
+ const struct cpl_abort_req_rss *req = cplhdr(m);
+ struct tcpcb *tp = toep->tp_tp;
+ struct socket *so;
+ int needclose = 0;
+
+ inp_wlock(tp->t_inpcb);
+ so = inp_inpcbtosocket(toep->tp_tp->t_inpcb);
+ if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) {
+ toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN);
+ m_free(m);
+ goto skip;
+ }
+
+ toep->tp_flags &= ~TP_ABORT_REQ_RCVD;
+ /*
+ * Three cases to consider:
+ * a) We haven't sent an abort_req; close the connection.
+ * b) We have sent a post-close abort_req that will get to TP too late
+ * and will generate a CPL_ERR_ABORT_FAILED reply. The reply will
+ * be ignored and the connection should be closed now.
+ * c) We have sent a regular abort_req that will get to TP too late.
+ * That will generate an abort_rpl with status 0, wait for it.
+ */
+ if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) ||
+ (is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) {
+ int error;
+
+ error = abort_status_to_errno(so, req->status,
+ &rst_status);
+ so_error_set(so, error);
+
+ if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0))
+ so_sorwakeup(so);
+ /*
+ * SYN_RECV needs special processing. If abort_syn_rcv()
+ * returns 0 is has taken care of the abort.
+ */
+ if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m))
+ goto skip;
+
+ t3_release_offload_resources(toep);
+ needclose = 1;
+ }
+ inp_wunlock(tp->t_inpcb);
+
+ if (needclose)
+ tcp_offload_close(tp);
+
+ send_abort_rpl(m, tdev, rst_status);
+ return;
+skip:
+ inp_wunlock(tp->t_inpcb);
+}
+
+/*
+ * Handle an ABORT_REQ_RSS CPL message.
+ */
+static int
+do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+{
+ const struct cpl_abort_req_rss *req = cplhdr(m);
+ struct toepcb *toep = (struct toepcb *)ctx;
+
+ if (is_neg_adv_abort(req->status)) {
+ m_free(m);
+ return (0);
+ }
+
+ log(LOG_NOTICE, "aborting tid=%d\n", toep->tp_tid);
+
+ if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) {
+ cxgb_remove_tid(cdev, toep, toep->tp_tid);
+ toep->tp_flags |= TP_ABORT_REQ_RCVD;
+
+ send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST);
+ if (toep->tp_l2t)
+ l2t_release(L2DATA(cdev), toep->tp_l2t);
+
+ /*
+ * Unhook
+ */
+ toep->tp_tp->t_toe = NULL;
+ toep->tp_tp->t_flags &= ~TF_TOE;
+ toep->tp_tp = NULL;
+ /*
+ * XXX need to call syncache_chkrst - but we don't
+ * have a way of doing that yet
+ */
+ toepcb_release(toep);
+ log(LOG_ERR, "abort for unestablished connection :-(\n");
+ return (0);
+ }
+ if (toep->tp_tp == NULL) {
+ log(LOG_NOTICE, "disconnected toepcb\n");
+ /* should be freed momentarily */
+ return (0);
+ }
+
+
+ toepcb_hold(toep);
+ process_abort_req(toep, m, toep->tp_toedev);
+ toepcb_release(toep);
+ return (0);
+}
+#ifdef notyet
+static void
+pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m)
+{
+ struct toedev *tdev = TOE_DEV(parent);
+
+ do_abort_syn_rcv(child, parent);
+ if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) {
+ struct cpl_pass_accept_rpl *rpl = cplhdr(m);
+
+ rpl->opt0h = htonl(F_TCAM_BYPASS);
+ rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
+ cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
+ } else
+ m_free(m);
+}
+#endif
+static void
+handle_pass_open_arp_failure(struct socket *so, struct mbuf *m)
+{
+ CXGB_UNIMPLEMENTED();
+
+#ifdef notyet
+ struct t3cdev *cdev;
+ struct socket *parent;
+ struct socket *oreq;
+ struct t3c_tid_entry *t3c_stid;
+ struct tid_info *t;
+ struct tcpcb *otp, *tp = so_sototcpcb(so);
+ struct toepcb *toep = tp->t_toe;
+
+ /*
+ * If the connection is being aborted due to the parent listening
+ * socket going away there's nothing to do, the ABORT_REQ will close
+ * the connection.
+ */
+ if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
+ m_free(m);
+ return;
+ }
+
+ oreq = so->so_incomp;
+ otp = so_sototcpcb(oreq);
+
+ cdev = T3C_DEV(so);
+ t = &(T3C_DATA(cdev))->tid_maps;
+ t3c_stid = lookup_stid(t, otp->ts_recent);
+ parent = ((struct listen_ctx *)t3c_stid->ctx)->lso;
+
+ so_lock(parent);
+ pass_open_abort(so, parent, m);
+ so_unlock(parent);
+#endif
+}
+
+/*
+ * Handle an ARP failure for a CPL_PASS_ACCEPT_RPL. This is treated similarly
+ * to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV
+ * connection.
+ */
+static void
+pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m)
+{
+
+#ifdef notyet
+ TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+ BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk);
+#endif
+ handle_pass_open_arp_failure(m_get_socket(m), m);
+}
+
+/*
+ * Populate a reject CPL_PASS_ACCEPT_RPL WR.
+ */
+static void
+mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf)
+{
+ struct cpl_pass_accept_req *req = cplhdr(req_mbuf);
+ struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf);
+ unsigned int tid = GET_TID(req);
+
+ m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP);
+ rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
+ rpl->peer_ip = req->peer_ip; // req->peer_ip not overwritten yet
+ rpl->opt0h = htonl(F_TCAM_BYPASS);
+ rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
+ rpl->opt2 = 0;
+ rpl->rsvd = rpl->opt2; /* workaround for HW bug */
+}
+
+/*
+ * Send a deferred reject to an accept request.
+ */
+static void
+reject_pass_request(struct toedev *tdev, struct mbuf *m)
+{
+ struct mbuf *reply_mbuf;
+
+ reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl));
+ mk_pass_accept_rpl(reply_mbuf, m);
+ cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf);
+ m_free(m);
+}
+
+static void
+handle_syncache_event(int event, void *arg)
+{
+ struct toepcb *toep = arg;
+
+ switch (event) {
+ case TOE_SC_ENTRY_PRESENT:
+ /*
+ * entry already exists - free toepcb
+ * and l2t
+ */
+ printf("syncache entry present\n");
+ toepcb_release(toep);
+ break;
+ case TOE_SC_DROP:
+ /*
+ * The syncache has given up on this entry
+ * either it timed out, or it was evicted
+ * we need to explicitly release the tid
+ */
+ printf("syncache entry dropped\n");
+ toepcb_release(toep);
+ break;
+ default:
+ log(LOG_ERR, "unknown syncache event %d\n", event);
+ break;
+ }
+}
+
+static void
+syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep)
+{
+ struct in_conninfo inc;
+ struct tcpopt to;
+ struct tcphdr th;
+ struct inpcb *inp;
+ int mss, wsf, sack, ts;
+ uint32_t rcv_isn = ntohl(req->rcv_isn);
+
+ bzero(&to, sizeof(struct tcpopt));
+ inp = so_sotoinpcb(lso);
+
+ /*
+ * Fill out information for entering us into the syncache
+ */
+ inc.inc_fport = th.th_sport = req->peer_port;
+ inc.inc_lport = th.th_dport = req->local_port;
+ th.th_seq = req->rcv_isn;
+ th.th_flags = TH_SYN;
+
+ toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1;
+
+
+ inc.inc_isipv6 = 0;
+ inc.inc_len = 0;
+ inc.inc_faddr.s_addr = req->peer_ip;
+ inc.inc_laddr.s_addr = req->local_ip;
+
+ DPRINTF("syncache add of %d:%d %d:%d\n",
+ ntohl(req->local_ip), ntohs(req->local_port),
+ ntohl(req->peer_ip), ntohs(req->peer_port));
+
+ mss = req->tcp_options.mss;
+ wsf = req->tcp_options.wsf;
+ ts = req->tcp_options.tstamp;
+ sack = req->tcp_options.sack;
+ to.to_mss = mss;
+ to.to_wscale = wsf;
+ to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
+ tcp_offload_syncache_add(&inc, &to, &th, inp, &lso, &cxgb_toe_usrreqs, toep);
+}
+
+
+/*
+ * Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket
+ * lock held. Note that the sock here is a listening socket that is not owned
+ * by the TOE.
+ */
+static void
+process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev,
+ struct listen_ctx *lctx)
+{
+ int rt_flags;
+ struct l2t_entry *e;
+ struct iff_mac tim;
+ struct mbuf *reply_mbuf, *ddp_mbuf = NULL;
+ struct cpl_pass_accept_rpl *rpl;
+ struct cpl_pass_accept_req *req = cplhdr(m);
+ unsigned int tid = GET_TID(req);
+ struct tom_data *d = TOM_DATA(tdev);
+ struct t3cdev *cdev = d->cdev;
+ struct tcpcb *tp = so_sototcpcb(so);
+ struct toepcb *newtoep;
+ struct rtentry *dst;
+ struct sockaddr_in nam;
+ struct t3c_data *td = T3C_DATA(cdev);
+
+ reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
+ if (__predict_false(reply_mbuf == NULL)) {
+ if (tdev->tod_ttid == TOE_ID_CHELSIO_T3)
+ t3_defer_reply(m, tdev, reject_pass_request);
+ else {
+ cxgb_queue_tid_release(cdev, tid);
+ m_free(m);
+ }
+ DPRINTF("failed to get reply_mbuf\n");
+
+ goto out;
+ }
+
+ if (tp->t_state != TCPS_LISTEN) {
+ DPRINTF("socket not in listen state\n");
+
+ goto reject;
+ }
+
+ tim.mac_addr = req->dst_mac;
+ tim.vlan_tag = ntohs(req->vlan_tag);
+ if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
+ DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n");
+ goto reject;
+ }
+
+#ifdef notyet
+ /*
+ * XXX do route lookup to confirm that we're still listening on this
+ * address
+ */
+ if (ip_route_input(skb, req->local_ip, req->peer_ip,
+ G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev))
+ goto reject;
+ rt_flags = ((struct rtable *)skb->dst)->rt_flags &
+ (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL);
+ dst_release(skb->dst); // done with the input route, release it
+ skb->dst = NULL;
+
+ if ((rt_flags & RTF_LOCAL) == 0)
+ goto reject;
+#endif
+ /*
+ * XXX
+ */
+ rt_flags = RTF_LOCAL;
+ if ((rt_flags & RTF_LOCAL) == 0)
+ goto reject;
+
+ /*
+ * Calculate values and add to syncache
+ */
+
+ newtoep = toepcb_alloc();
+ if (newtoep == NULL)
+ goto reject;
+
+ bzero(&nam, sizeof(struct sockaddr_in));
+
+ nam.sin_len = sizeof(struct sockaddr_in);
+ nam.sin_family = AF_INET;
+ nam.sin_addr.s_addr =req->peer_ip;
+ dst = rtalloc2((struct sockaddr *)&nam, 1, 0);
+
+ if (dst == NULL) {
+ printf("failed to find route\n");
+ goto reject;
+ }
+ e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev,
+ (struct sockaddr *)&nam);
+ if (e == NULL) {
+ DPRINTF("failed to get l2t\n");
+ }
+ /*
+ * Point to our listen socket until accept
+ */
+ newtoep->tp_tp = tp;
+ newtoep->tp_flags = TP_SYN_RCVD;
+ newtoep->tp_tid = tid;
+ newtoep->tp_toedev = tdev;
+ tp->rcv_wnd = select_rcv_wnd(tdev, so);
+
+ cxgb_insert_tid(cdev, d->client, newtoep, tid);
+ so_lock(so);
+ LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry);
+ so_unlock(so);
+
+ newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so_options_get(so) & SO_NO_DDP) &&
+ tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
+
+ if (newtoep->tp_ulp_mode) {
+ ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
+
+ if (ddp_mbuf == NULL)
+ newtoep->tp_ulp_mode = 0;
+ }
+
+ CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d",
+ TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode);
+ set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure);
+ /*
+ * XXX workaround for lack of syncache drop
+ */
+ toepcb_hold(newtoep);
+ syncache_add_accept_req(req, so, newtoep);
+
+ rpl = cplhdr(reply_mbuf);
+ reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl);
+ rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ rpl->wr.wr_lo = 0;
+ OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
+ rpl->opt2 = htonl(calc_opt2(so, tdev));
+ rpl->rsvd = rpl->opt2; /* workaround for HW bug */
+ rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten
+
+ rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) |
+ V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx));
+ rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) |
+ CPL_PASS_OPEN_ACCEPT);
+
+ DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status);
+
+ m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep));
+
+ l2t_send(cdev, reply_mbuf, e);
+ m_free(m);
+ if (newtoep->tp_ulp_mode) {
+ __set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS,
+ V_TF_DDP_OFF(1) |
+ TP_DDP_TIMER_WORKAROUND_MASK,
+ V_TF_DDP_OFF(1) |
+ TP_DDP_TIMER_WORKAROUND_VAL, 1);
+ } else
+ printf("not offloading\n");
+
+
+
+ return;
+reject:
+ if (tdev->tod_ttid == TOE_ID_CHELSIO_T3)
+ mk_pass_accept_rpl(reply_mbuf, m);
+ else
+ mk_tid_release(reply_mbuf, newtoep, tid);
+ cxgb_ofld_send(cdev, reply_mbuf);
+ m_free(m);
+out:
+#if 0
+ TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+#else
+ return;
+#endif
+}
+
+/*
+ * Handle a CPL_PASS_ACCEPT_REQ message.
+ */
+static int
+do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+{
+ struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
+ struct socket *lso = listen_ctx->lso; /* XXX need an interlock against the listen socket going away */
+ struct tom_data *d = listen_ctx->tom_data;
+
+#if VALIDATE_TID
+ struct cpl_pass_accept_req *req = cplhdr(m);
+ unsigned int tid = GET_TID(req);
+ struct tid_info *t = &(T3C_DATA(cdev))->tid_maps;
+
+ if (unlikely(!lsk)) {
+ printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n",
+ cdev->name,
+ (unsigned long)((union listen_entry *)ctx -
+ t->stid_tab));
+ return CPL_RET_BUF_DONE;
+ }
+ if (unlikely(tid >= t->ntids)) {
+ printk(KERN_ERR "%s: passive open TID %u too large\n",
+ cdev->name, tid);
+ return CPL_RET_BUF_DONE;
+ }
+ /*
+ * For T3A the current user of the TID may have closed but its last
+ * message(s) may have been backlogged so the TID appears to be still
+ * in use. Just take the TID away, the connection can close at its
+ * own leisure. For T3B this situation is a bug.
+ */
+ if (!valid_new_tid(t, tid) &&
+ cdev->type != T3A) {
+ printk(KERN_ERR "%s: passive open uses existing TID %u\n",
+ cdev->name, tid);
+ return CPL_RET_BUF_DONE;
+ }
+#endif
+
+ process_pass_accept_req(lso, m, &d->tdev, listen_ctx);
+ return (0);
+}
+
+/*
+ * Called when a connection is established to translate the TCP options
+ * reported by HW to FreeBSD's native format.
+ */
+static void
+assign_rxopt(struct socket *so, unsigned int opt)
+{
+ struct tcpcb *tp = so_sototcpcb(so);
+ struct toepcb *toep = tp->t_toe;
+ const struct t3c_data *td = T3C_DATA(TOEP_T3C_DEV(toep));
+
+ inp_lock_assert(tp->t_inpcb);
+
+ toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40;
+ tp->t_flags |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0;
+ tp->t_flags |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0;
+ tp->t_flags |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0;
+ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+ (TF_RCVD_SCALE|TF_REQ_SCALE))
+ tp->rcv_scale = tp->request_r_scale;
+}
+
+/*
+ * Completes some final bits of initialization for just established connections
+ * and changes their state to TCP_ESTABLISHED.
+ *
+ * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1.
+ */
+static void
+make_established(struct socket *so, u32 snd_isn, unsigned int opt)
+{
+ struct tcpcb *tp = so_sototcpcb(so);
+ struct toepcb *toep = tp->t_toe;
+
+ toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn;
+ assign_rxopt(so, opt);
+
+ /*
+ *XXXXXXXXXXX
+ *
+ */
+#ifdef notyet
+ so->so_proto->pr_ctloutput = t3_ctloutput;
+#endif
+
+#if 0
+ inet_sk(sk)->id = tp->write_seq ^ jiffies;
+#endif
+ /*
+ * XXX not clear what rcv_wup maps to
+ */
+ /*
+ * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't
+ * pass through opt0.
+ */
+ if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10))
+ toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10);
+
+ dump_toepcb(toep);
+
+#ifdef notyet
+/*
+ * no clean interface for marking ARP up to date
+ */
+ dst_confirm(sk->sk_dst_cache);
+#endif
+ tp->t_starttime = ticks;
+ tp->t_state = TCPS_ESTABLISHED;
+ soisconnected(so);
+}
+
+static int
+syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep)
+{
+
+ struct in_conninfo inc;
+ struct tcpopt to;
+ struct tcphdr th;
+ int mss, wsf, sack, ts;
+ struct mbuf *m = NULL;
+ const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev);
+ unsigned int opt;
+
+#ifdef MAC
+#error "no MAC support"
+#endif
+
+ opt = ntohs(req->tcp_opt);
+
+ bzero(&to, sizeof(struct tcpopt));
+
+ /*
+ * Fill out information for entering us into the syncache
+ */
+ inc.inc_fport = th.th_sport = req->peer_port;
+ inc.inc_lport = th.th_dport = req->local_port;
+ th.th_seq = req->rcv_isn;
+ th.th_flags = TH_ACK;
+
+ inc.inc_isipv6 = 0;
+ inc.inc_len = 0;
+ inc.inc_faddr.s_addr = req->peer_ip;
+ inc.inc_laddr.s_addr = req->local_ip;
+
+ mss = td->mtus[G_TCPOPT_MSS(opt)] - 40;
+ wsf = G_TCPOPT_WSCALE_OK(opt);
+ ts = G_TCPOPT_TSTAMP(opt);
+ sack = G_TCPOPT_SACK(opt);
+
+ to.to_mss = mss;
+ to.to_wscale = G_TCPOPT_SND_WSCALE(opt);
+ to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
+
+ DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n",
+ ntohl(req->local_ip), ntohs(req->local_port),
+ ntohl(req->peer_ip), ntohs(req->peer_port),
+ mss, wsf, ts, sack);
+ return tcp_offload_syncache_expand(&inc, &to, &th, so, m);
+}
+
+
+/*
+ * Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work
+ * if we are in TCP_SYN_RECV due to crossed SYNs
+ */
+static int
+do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+{
+ struct cpl_pass_establish *req = cplhdr(m);
+ struct toepcb *toep = (struct toepcb *)ctx;
+ struct tcpcb *tp = toep->tp_tp;
+ struct socket *so, *lso;
+ struct t3c_data *td = T3C_DATA(cdev);
+ struct sockbuf *snd, *rcv;
+
+ // Complete socket initialization now that we have the SND_ISN
+
+ struct toedev *tdev;
+
+
+ tdev = toep->tp_toedev;
+
+ inp_wlock(tp->t_inpcb);
+
+ /*
+ *
+ * XXX need to add reference while we're manipulating
+ */
+ so = lso = inp_inpcbtosocket(tp->t_inpcb);
+
+ inp_wunlock(tp->t_inpcb);
+
+ so_lock(so);
+ LIST_REMOVE(toep, synq_entry);
+ so_unlock(so);
+
+ if (!syncache_expand_establish_req(req, &so, toep)) {
+ /*
+ * No entry
+ */
+ CXGB_UNIMPLEMENTED();
+ }
+ if (so == NULL) {
+ /*
+ * Couldn't create the socket
+ */
+ CXGB_UNIMPLEMENTED();
+ }
+
+ tp = so_sototcpcb(so);
+ inp_wlock(tp->t_inpcb);
+
+ snd = so_sockbuf_snd(so);
+ rcv = so_sockbuf_rcv(so);
+
+ snd->sb_flags |= SB_NOCOALESCE;
+ rcv->sb_flags |= SB_NOCOALESCE;
+
+ toep->tp_tp = tp;
+ toep->tp_flags = 0;
+ tp->t_toe = toep;
+ reset_wr_list(toep);
+ tp->rcv_wnd = select_rcv_wnd(tdev, so);
+ tp->rcv_nxt = toep->tp_copied_seq;
+ install_offload_ops(so);
+
+ toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs);
+ toep->tp_wr_unacked = 0;
+ toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
+ toep->tp_qset_idx = 0;
+ toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu);
+
+ /*
+ * XXX Cancel any keep alive timer
+ */
+
+ make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
+
+ /*
+ * XXX workaround for lack of syncache drop
+ */
+ toepcb_release(toep);
+ inp_wunlock(tp->t_inpcb);
+
+ CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid);
+ cxgb_log_tcb(cdev->adapter, toep->tp_tid);
+#ifdef notyet
+ /*
+ * XXX not sure how these checks map to us
+ */
+ if (unlikely(sk->sk_socket)) { // simultaneous opens only
+ sk->sk_state_change(sk);
+ sk_wake_async(so, 0, POLL_OUT);
+ }
+ /*
+ * The state for the new connection is now up to date.
+ * Next check if we should add the connection to the parent's
+ * accept queue. When the parent closes it resets connections
+ * on its SYN queue, so check if we are being reset. If so we
+ * don't need to do anything more, the coming ABORT_RPL will
+ * destroy this socket. Otherwise move the connection to the
+ * accept queue.
+ *
+ * Note that we reset the synq before closing the server so if
+ * we are not being reset the stid is still open.
+ */
+ if (unlikely(!tp->forward_skb_hint)) { // removed from synq
+ __kfree_skb(skb);
+ goto unlock;
+ }
+#endif
+ m_free(m);
+
+ return (0);
+}
+
+/*
+ * Fill in the right TID for CPL messages waiting in the out-of-order queue
+ * and send them to the TOE.
+ */
+static void
+fixup_and_send_ofo(struct toepcb *toep)
+{
+ struct mbuf *m;
+ struct toedev *tdev = toep->tp_toedev;
+ struct tcpcb *tp = toep->tp_tp;
+ unsigned int tid = toep->tp_tid;
+
+ log(LOG_NOTICE, "fixup_and_send_ofo\n");
+
+ inp_lock_assert(tp->t_inpcb);
+ while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) {
+ /*
+ * A variety of messages can be waiting but the fields we'll
+ * be touching are common to all so any message type will do.
+ */
+ struct cpl_close_con_req *p = cplhdr(m);
+
+ p->wr.wr_lo = htonl(V_WR_TID(tid));
+ OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid));
+ cxgb_ofld_send(TOM_DATA(tdev)->cdev, m);
+ }
+}
+
+/*
+ * Updates socket state from an active establish CPL message. Runs with the
+ * socket lock held.
+ */
+static void
+socket_act_establish(struct socket *so, struct mbuf *m)
+{
+ struct cpl_act_establish *req = cplhdr(m);
+ u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */
+ struct tcpcb *tp = so_sototcpcb(so);
+ struct toepcb *toep = tp->t_toe;
+
+ if (__predict_false(tp->t_state != TCPS_SYN_SENT))
+ log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n",
+ toep->tp_tid, tp->t_state);
+
+ tp->ts_recent_age = ticks;
+ tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn;
+ toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs;
+
+ make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
+
+ /*
+ * Now that we finally have a TID send any CPL messages that we had to
+ * defer for lack of a TID.
+ */
+ if (mbufq_len(&toep->out_of_order_queue))
+ fixup_and_send_ofo(toep);
+
+ if (__predict_false(so_state_get(so) & SS_NOFDREF)) {
+ /*
+ * XXX does this even make sense?
+ */
+ so_sorwakeup(so);
+ }
+ m_free(m);
+#ifdef notyet
+/*
+ * XXX assume no write requests permitted while socket connection is
+ * incomplete
+ */
+ /*
+ * Currently the send queue must be empty at this point because the
+ * socket layer does not send anything before a connection is
+ * established. To be future proof though we handle the possibility
+ * that there are pending buffers to send (either TX_DATA or
+ * CLOSE_CON_REQ). First we need to adjust the sequence number of the
+ * buffers according to the just learned write_seq, and then we send
+ * them on their way.
+ */
+ fixup_pending_writeq_buffers(sk);
+ if (t3_push_frames(so, 1))
+ sk->sk_write_space(sk);
+#endif
+
+ toep->tp_state = tp->t_state;
+ tcpstat.tcps_connects++;
+
+}
+
+/*
+ * Process a CPL_ACT_ESTABLISH message.
+ */
+static int
+do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+{
+ struct cpl_act_establish *req = cplhdr(m);
+ unsigned int tid = GET_TID(req);
+ unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
+ struct toepcb *toep = (struct toepcb *)ctx;
+ struct tcpcb *tp = toep->tp_tp;
+ struct socket *so;
+ struct toedev *tdev;
+ struct tom_data *d;
+
+ if (tp == NULL) {
+ free_atid(cdev, atid);
+ return (0);
+ }
+ inp_wlock(tp->t_inpcb);
+
+ /*
+ * XXX
+ */
+ so = inp_inpcbtosocket(tp->t_inpcb);
+ tdev = toep->tp_toedev; /* blow up here if link was down */
+ d = TOM_DATA(tdev);
+
+ /*
+ * It's OK if the TID is currently in use, the owning socket may have
+ * backlogged its last CPL message(s). Just take it away.
+ */
+ toep->tp_tid = tid;
+ toep->tp_tp = tp;
+ so_insert_tid(d, toep, tid);
+ free_atid(cdev, atid);
+ toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
+
+ socket_act_establish(so, m);
+ inp_wunlock(tp->t_inpcb);
+ CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid);
+ cxgb_log_tcb(cdev->adapter, toep->tp_tid);
+
+ return (0);
+}
+
+/*
+ * Process an acknowledgment of WR completion. Advance snd_una and send the
+ * next batch of work requests from the write queue.
+ */
+static void
+wr_ack(struct toepcb *toep, struct mbuf *m)
+{
+ struct tcpcb *tp = toep->tp_tp;
+ struct cpl_wr_ack *hdr = cplhdr(m);
+ struct socket *so;
+ unsigned int credits = ntohs(hdr->credits);
+ u32 snd_una = ntohl(hdr->snd_una);
+ int bytes = 0;
+ struct sockbuf *snd;
+
+ CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits);
+
+ inp_wlock(tp->t_inpcb);
+ so = inp_inpcbtosocket(tp->t_inpcb);
+ toep->tp_wr_avail += credits;
+ if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail)
+ toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail;
+
+ while (credits) {
+ struct mbuf *p = peek_wr(toep);
+
+ if (__predict_false(!p)) {
+ log(LOG_ERR, "%u WR_ACK credits for TID %u with "
+ "nothing pending, state %u wr_avail=%u\n",
+ credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail);
+ break;
+ }
+ CTR2(KTR_TOM,
+ "wr_ack: p->credits=%d p->bytes=%d",
+ p->m_pkthdr.csum_data, p->m_pkthdr.len);
+ KASSERT(p->m_pkthdr.csum_data != 0,
+ ("empty request still on list"));
+
+ if (__predict_false(credits < p->m_pkthdr.csum_data)) {
+
+#if DEBUG_WR > 1
+ struct tx_data_wr *w = cplhdr(p);
+ log(LOG_ERR,
+ "TID %u got %u WR credits, need %u, len %u, "
+ "main body %u, frags %u, seq # %u, ACK una %u,"
+ " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n",
+ toep->tp_tid, credits, p->csum, p->len,
+ p->len - p->data_len, skb_shinfo(p)->nr_frags,
+ ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt),
+ toep->tp_wr_avail, count_pending_wrs(tp) - credits);
+#endif
+ p->m_pkthdr.csum_data -= credits;
+ break;
+ } else {
+ dequeue_wr(toep);
+ credits -= p->m_pkthdr.csum_data;
+ bytes += p->m_pkthdr.len;
+ CTR3(KTR_TOM,
+ "wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d",
+ p->m_pkthdr.len, credits, p->m_pkthdr.csum_data);
+
+ m_free(p);
+ }
+ }
+
+#if DEBUG_WR
+ check_wr_invariants(tp);
+#endif
+
+ if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
+#if VALIDATE_SEQ
+ struct tom_data *d = TOM_DATA(TOE_DEV(so));
+
+ log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK "
+ "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una,
+ toep->tp_tid, tp->snd_una);
+#endif
+ goto out_free;
+ }
+
+ if (tp->snd_una != snd_una) {
+ tp->snd_una = snd_una;
+ tp->ts_recent_age = ticks;
+#ifdef notyet
+ /*
+ * Keep ARP entry "minty fresh"
+ */
+ dst_confirm(sk->sk_dst_cache);
+#endif
+ if (tp->snd_una == tp->snd_nxt)
+ toep->tp_flags &= ~TP_TX_WAIT_IDLE;
+ }
+
+ snd = so_sockbuf_snd(so);
+ if (bytes) {
+ CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes);
+ snd = so_sockbuf_snd(so);
+ sockbuf_lock(snd);
+ sbdrop_locked(snd, bytes);
+ so_sowwakeup_locked(so);
+ }
+
+ if (snd->sb_sndptroff < snd->sb_cc)
+ t3_push_frames(so, 0);
+
+out_free:
+ inp_wunlock(tp->t_inpcb);
+ m_free(m);
+}
+
+/*
+ * Handler for TX_DATA_ACK CPL messages.
+ */
+static int
+do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx)
+{
+ struct toepcb *toep = (struct toepcb *)ctx;
+
+ VALIDATE_SOCK(so);
+
+ wr_ack(toep, m);
+ return 0;
+}
+
+/*
+ * Handler for TRACE_PKT CPL messages. Just sink these packets.
+ */
+static int
+do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx)
+{
+ m_freem(m);
+ return 0;
+}
+
+/*
+ * Reset a connection that is on a listener's SYN queue or accept queue,
+ * i.e., one that has not had a struct socket associated with it.
+ * Must be called from process context.
+ *
+ * Modeled after code in inet_csk_listen_stop().
+ */
+static void
+t3_reset_listen_child(struct socket *child)
+{
+ struct tcpcb *tp = so_sototcpcb(child);
+
+ t3_send_reset(tp->t_toe);
+}
+
+
+static void
+t3_child_disconnect(struct socket *so, void *arg)
+{
+ struct tcpcb *tp = so_sototcpcb(so);
+
+ if (tp->t_flags & TF_TOE) {
+ inp_wlock(tp->t_inpcb);
+ t3_reset_listen_child(so);
+ inp_wunlock(tp->t_inpcb);
+ }
+}
+
+/*
+ * Disconnect offloaded established but not yet accepted connections sitting
+ * on a server's accept_queue. We just send an ABORT_REQ at this point and
+ * finish off the disconnect later as we may need to wait for the ABORT_RPL.
+ */
+void
+t3_disconnect_acceptq(struct socket *listen_so)
+{
+
+ so_lock(listen_so);
+ so_listeners_apply_all(listen_so, t3_child_disconnect, NULL);
+ so_unlock(listen_so);
+}
+
+/*
+ * Reset offloaded connections sitting on a server's syn queue. As above
+ * we send ABORT_REQ and finish off when we get ABORT_RPL.
+ */
+
+void
+t3_reset_synq(struct listen_ctx *lctx)
+{
+ struct toepcb *toep;
+
+ so_lock(lctx->lso);
+ while (!LIST_EMPTY(&lctx->synq_head)) {
+ toep = LIST_FIRST(&lctx->synq_head);
+ LIST_REMOVE(toep, synq_entry);
+ toep->tp_tp = NULL;
+ t3_send_reset(toep);
+ cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid);
+ toepcb_release(toep);
+ }
+ so_unlock(lctx->lso);
+}
+
+
+int
+t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl,
+ unsigned int nppods, unsigned int tag, unsigned int maxoff,
+ unsigned int pg_off, unsigned int color)
+{
+ unsigned int i, j, pidx;
+ struct pagepod *p;
+ struct mbuf *m;
+ struct ulp_mem_io *req;
+ unsigned int tid = toep->tp_tid;
+ const struct tom_data *td = TOM_DATA(toep->tp_toedev);
+ unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit;
+
+ CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)",
+ gl, nppods, tag, maxoff, pg_off, color);
+
+ for (i = 0; i < nppods; ++i) {
+ m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE);
+ m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
+ req = mtod(m, struct ulp_mem_io *);
+ m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE;
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
+ req->wr.wr_lo = 0;
+ req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) |
+ V_ULPTX_CMD(ULP_MEM_WRITE));
+ req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) |
+ V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1));
+
+ p = (struct pagepod *)(req + 1);
+ if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) {
+ p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid));
+ p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) |
+ V_PPOD_COLOR(color));
+ p->pp_max_offset = htonl(maxoff);
+ p->pp_page_offset = htonl(pg_off);
+ p->pp_rsvd = 0;
+ for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx)
+ p->pp_addr[j] = pidx < gl->dgl_nelem ?
+ htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0;
+ } else
+ p->pp_vld_tid = 0; /* mark sentinel page pods invalid */
+ send_or_defer(toep, m, 0);
+ ppod_addr += PPOD_SIZE;
+ }
+ return (0);
+}
+
+/*
+ * Build a CPL_BARRIER message as payload of a ULP_TX_PKT command.
+ */
+static inline void
+mk_cpl_barrier_ulp(struct cpl_barrier *b)
+{
+ struct ulp_txpkt *txpkt = (struct ulp_txpkt *)b;
+
+ txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
+ txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*b) / 8));
+ b->opcode = CPL_BARRIER;
+}
+
+/*
+ * Build a CPL_GET_TCB message as payload of a ULP_TX_PKT command.
+ */
+static inline void
+mk_get_tcb_ulp(struct cpl_get_tcb *req, unsigned int tid, unsigned int cpuno)
+{
+ struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
+
+ txpkt = (struct ulp_txpkt *)req;
+ txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
+ txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, tid));
+ req->cpuno = htons(cpuno);
+}
+
+/*
+ * Build a CPL_SET_TCB_FIELD message as payload of a ULP_TX_PKT command.
+ */
+static inline void
+mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
+ unsigned int word, uint64_t mask, uint64_t val)
+{
+ struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
+
+ CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx",
+ tid, word, mask, val);
+
+ txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
+ txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
+ req->reply = V_NO_REPLY(1);
+ req->cpu_idx = 0;
+ req->word = htons(word);
+ req->mask = htobe64(mask);
+ req->val = htobe64(val);
+}
+
+/*
+ * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command.
+ */
+static void
+mk_rx_data_ack_ulp(struct toepcb *toep, struct cpl_rx_data_ack *ack,
+ unsigned int tid, unsigned int credits)
+{
+ struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack;
+
+ txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
+ txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8));
+ OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid));
+ ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE |
+ V_RX_DACK_MODE(TOM_TUNABLE(toep->tp_toedev, delack)) |
+ V_RX_CREDITS(credits));
+}
+
+void
+t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx)
+{
+ unsigned int wrlen;
+ struct mbuf *m;
+ struct work_request_hdr *wr;
+ struct cpl_barrier *lock;
+ struct cpl_set_tcb_field *req;
+ struct cpl_get_tcb *getreq;
+ struct ddp_state *p = &toep->tp_ddp_state;
+
+#if 0
+ SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
+#endif
+ wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) +
+ sizeof(*getreq);
+ m = m_gethdr_nofail(wrlen);
+ m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
+ wr = mtod(m, struct work_request_hdr *);
+ bzero(wr, wrlen);
+
+ wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
+ m->m_pkthdr.len = m->m_len = wrlen;
+
+ lock = (struct cpl_barrier *)(wr + 1);
+ mk_cpl_barrier_ulp(lock);
+
+ req = (struct cpl_set_tcb_field *)(lock + 1);
+
+ CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx);
+
+ /* Hmmm, not sure if this actually a good thing: reactivating
+ * the other buffer might be an issue if it has been completed
+ * already. However, that is unlikely, since the fact that the UBUF
+ * is not completed indicates that there is no oustanding data.
+ */
+ if (bufidx == 0)
+ mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
+ V_TF_DDP_ACTIVE_BUF(1) |
+ V_TF_DDP_BUF0_VALID(1),
+ V_TF_DDP_ACTIVE_BUF(1));
+ else
+ mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
+ V_TF_DDP_ACTIVE_BUF(1) |
+ V_TF_DDP_BUF1_VALID(1), 0);
+
+ getreq = (struct cpl_get_tcb *)(req + 1);
+ mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset);
+
+ mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1));
+
+ /* Keep track of the number of oustanding CPL_GET_TCB requests
+ */
+ p->get_tcb_count++;
+
+#ifdef T3_TRACE
+ T3_TRACE1(TIDTB(so),
+ "t3_cancel_ddpbuf: bufidx %u", bufidx);
+#endif
+ cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
+}
+
+/**
+ * t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one
+ * @sk: the socket associated with the buffers
+ * @bufidx: index of HW DDP buffer (0 or 1)
+ * @tag0: new tag for HW buffer 0
+ * @tag1: new tag for HW buffer 1
+ * @len: new length for HW buf @bufidx
+ *
+ * Sends a compound WR to overlay a new DDP buffer on top of an existing
+ * buffer by changing the buffer tag and length and setting the valid and
+ * active flag accordingly. The caller must ensure the new buffer is at
+ * least as big as the existing one. Since we typically reprogram both HW
+ * buffers this function sets both tags for convenience. Read the TCB to
+ * determine how made data was written into the buffer before the overlay
+ * took place.
+ */
+void
+t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0,
+ unsigned int tag1, unsigned int len)
+{
+ unsigned int wrlen;
+ struct mbuf *m;
+ struct work_request_hdr *wr;
+ struct cpl_get_tcb *getreq;
+ struct cpl_set_tcb_field *req;
+ struct ddp_state *p = &toep->tp_ddp_state;
+
+ CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)",
+ bufidx, tag0, tag1, len);
+#if 0
+ SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
+#endif
+ wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq);
+ m = m_gethdr_nofail(wrlen);
+ m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
+ wr = mtod(m, struct work_request_hdr *);
+ m->m_pkthdr.len = m->m_len = wrlen;
+ bzero(wr, wrlen);
+
+
+ /* Set the ATOMIC flag to make sure that TP processes the following
+ * CPLs in an atomic manner and no wire segments can be interleaved.
+ */
+ wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
+ req = (struct cpl_set_tcb_field *)(wr + 1);
+ mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG,
+ V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) |
+ V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32,
+ V_TCB_RX_DDP_BUF0_TAG(tag0) |
+ V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32);
+ req++;
+ if (bufidx == 0) {
+ mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN,
+ V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
+ V_TCB_RX_DDP_BUF0_LEN((uint64_t)len));
+ req++;
+ mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
+ V_TF_DDP_PUSH_DISABLE_0(1) |
+ V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
+ V_TF_DDP_PUSH_DISABLE_0(0) |
+ V_TF_DDP_BUF0_VALID(1));
+ } else {
+ mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN,
+ V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN),
+ V_TCB_RX_DDP_BUF1_LEN((uint64_t)len));
+ req++;
+ mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS,
+ V_TF_DDP_PUSH_DISABLE_1(1) |
+ V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
+ V_TF_DDP_PUSH_DISABLE_1(0) |
+ V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1));
+ }
+
+ getreq = (struct cpl_get_tcb *)(req + 1);
+ mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset);
+
+ /* Keep track of the number of oustanding CPL_GET_TCB requests
+ */
+ p->get_tcb_count++;
+
+#ifdef T3_TRACE
+ T3_TRACE4(TIDTB(sk),
+ "t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u "
+ "len %d",
+ bufidx, tag0, tag1, len);
+#endif
+ cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
+}
+
+/*
+ * Sends a compound WR containing all the CPL messages needed to program the
+ * two HW DDP buffers, namely optionally setting up the length and offset of
+ * each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK.
+ */
+void
+t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0,
+ unsigned int len1, unsigned int offset1,
+ uint64_t ddp_flags, uint64_t flag_mask, int modulate)
+{
+ unsigned int wrlen;
+ struct mbuf *m;
+ struct work_request_hdr *wr;
+ struct cpl_set_tcb_field *req;
+
+ CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ",
+ len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff);
+
+#if 0
+ SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv);
+#endif
+ wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) +
+ (len1 ? sizeof(*req) : 0) +
+ (modulate ? sizeof(struct cpl_rx_data_ack) : 0);
+ m = m_gethdr_nofail(wrlen);
+ m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep));
+ wr = mtod(m, struct work_request_hdr *);
+ bzero(wr, wrlen);
+
+ wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS));
+ m->m_pkthdr.len = m->m_len = wrlen;
+
+ req = (struct cpl_set_tcb_field *)(wr + 1);
+ if (len0) { /* program buffer 0 offset and length */
+ mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET,
+ V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |
+ V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN),
+ V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) |
+ V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0));
+ req++;
+ }
+ if (len1) { /* program buffer 1 offset and length */
+ mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET,
+ V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) |
+ V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32,
+ V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) |
+ V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32);
+ req++;
+ }
+
+ mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask,
+ ddp_flags);
+
+ if (modulate) {
+ mk_rx_data_ack_ulp(toep,
+ (struct cpl_rx_data_ack *)(req + 1), toep->tp_tid,
+ toep->tp_copied_seq - toep->tp_rcv_wup);
+ toep->tp_rcv_wup = toep->tp_copied_seq;
+ }
+
+#ifdef T3_TRACE
+ T3_TRACE5(TIDTB(sk),
+ "t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x "
+ "modulate %d",
+ len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff,
+ modulate);
+#endif
+
+ cxgb_ofld_send(TOEP_T3C_DEV(toep), m);
+}
+
+void
+t3_init_wr_tab(unsigned int wr_len)
+{
+ int i;
+
+ if (mbuf_wrs[1]) /* already initialized */
+ return;
+
+ for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) {
+ int sgl_len = (3 * i) / 2 + (i & 1);
+
+ sgl_len += 3;
+ mbuf_wrs[i] = sgl_len <= wr_len ?
+ 1 : 1 + (sgl_len - 2) / (wr_len - 1);
+ }
+
+ wrlen = wr_len * 8;
+}
+
+int
+t3_init_cpl_io(void)
+{
+#ifdef notyet
+ tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL);
+ if (!tcphdr_skb) {
+ log(LOG_ERR,
+ "Chelsio TCP offload: can't allocate sk_buff\n");
+ return -1;
+ }
+ skb_put(tcphdr_skb, sizeof(struct tcphdr));
+ tcphdr_skb->h.raw = tcphdr_skb->data;
+ memset(tcphdr_skb->data, 0, tcphdr_skb->len);
+#endif
+
+ t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish);
+ t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl);
+ t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack);
+ t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data);
+ t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl);
+ t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close);
+ t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish);
+ t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
+ t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req);
+ t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl);
+ t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp);
+ t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete);
+ t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify);
+ t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt);
+ t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl);
+ return (0);
+}
+
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c b/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c
new file mode 100644
index 0000000000000..77a3d760f54f7
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c
@@ -0,0 +1,1030 @@
+/**************************************************************************
+
+Copyright (c) 2007-2008, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/fcntl.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/condvar.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+#include <sys/socket.h>
+#include <sys/syslog.h>
+#include <sys/uio.h>
+#include <sys/file.h>
+
+#include <machine/bus.h>
+#include <machine/cpu.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+
+#include <dev/cxgb/cxgb_config.h>
+#include <dev/cxgb/cxgb_osdep.h>
+#include <dev/cxgb/sys/mbufq.h>
+#include <dev/cxgb/ulp/tom/cxgb_tcp_offload.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_offload.h>
+#include <net/route.h>
+
+#include <dev/cxgb/t3cdev.h>
+#include <dev/cxgb/common/cxgb_firmware_exports.h>
+#include <dev/cxgb/common/cxgb_t3_cpl.h>
+#include <dev/cxgb/common/cxgb_tcb.h>
+#include <dev/cxgb/common/cxgb_ctl_defs.h>
+#include <dev/cxgb/cxgb_offload.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+
+#include <dev/cxgb/sys/mvec.h>
+#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
+#include <dev/cxgb/ulp/tom/cxgb_defs.h>
+#include <dev/cxgb/ulp/tom/cxgb_tom.h>
+#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
+#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
+#include <dev/cxgb/ulp/tom/cxgb_tcp.h>
+#include <dev/cxgb/ulp/tom/cxgb_vm.h>
+
+
+static int (*pru_sosend)(struct socket *so, struct sockaddr *addr,
+ struct uio *uio, struct mbuf *top, struct mbuf *control,
+ int flags, struct thread *td);
+
+static int (*pru_soreceive)(struct socket *so, struct sockaddr **paddr,
+ struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
+ int *flagsp);
+
+#define TMP_IOV_MAX 16
+#ifndef PG_FRAME
+#define PG_FRAME ~PAGE_MASK
+#endif
+#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
+
+void
+t3_init_socket_ops(void)
+{
+ struct protosw *prp;
+
+ prp = pffindtype(AF_INET, SOCK_STREAM);
+ pru_sosend = prp->pr_usrreqs->pru_sosend;
+ pru_soreceive = prp->pr_usrreqs->pru_soreceive;
+}
+
+struct cxgb_dma_info {
+ size_t cdi_mapped;
+ int cdi_nsegs;
+ bus_dma_segment_t *cdi_segs;
+
+};
+
+static void
+cxgb_dma_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
+ bus_size_t mapsize, int error)
+{
+ struct cxgb_dma_info *cdi = arg;
+
+ cdi->cdi_mapped = mapsize;
+ cdi->cdi_nsegs = nsegs;
+ cdi->cdi_segs = segs;
+}
+
+static void
+iov_adj(struct iovec **iov, int *iovcnt, size_t count)
+{
+ struct iovec *iovtmp;
+ int iovcnttmp;
+ caddr_t ptmp;
+
+ if (count > 0) {
+ iovtmp = *iov;
+ iovcnttmp = *iovcnt;
+ while (count > 0) {
+ if (count < iovtmp->iov_len) {
+ ptmp = iovtmp->iov_base;
+ ptmp += count;
+ iovtmp->iov_base = ptmp;
+ iovtmp->iov_len -= count;
+ break;
+ } else
+ count -= iovtmp->iov_len;
+ iovtmp++;
+ iovcnttmp--;
+ }
+ *iov = iovtmp;
+ *iovcnt = iovcnttmp;
+ } else if (count < 0) {
+ iovtmp = &(*iov)[*iovcnt - 1];
+ iovcnttmp = *iovcnt;
+ while (count < 0) {
+ if (-count < iovtmp->iov_len) {
+ iovtmp->iov_len += count;
+ break;
+ } else
+ count += iovtmp->iov_len;
+ iovtmp--;
+ iovcnttmp--;
+ }
+ *iovcnt = iovcnttmp;
+ }
+}
+
+static void
+cxgb_zero_copy_free(void *cl, void *arg)
+{
+ struct mbuf_vec *mv;
+ struct mbuf *m = (struct mbuf *)cl;
+
+ mv = mtomv(m);
+ /*
+ * Physical addresses, don't try to free should be unheld separately from sbdrop
+ *
+ */
+ mv->mv_count = 0;
+ m_free_iovec(m, m->m_type);
+}
+
+
+static int
+cxgb_hold_iovec_pages(struct uio *uio, vm_page_t *m, int *held, int flags)
+{
+ struct iovec *iov = uio->uio_iov;
+ int iovcnt = uio->uio_iovcnt;
+ int err, i, count, totcount, maxcount, totbytes, npages, curbytes;
+ uint64_t start, end;
+ vm_page_t *mp;
+
+ totbytes = totcount = 0;
+ maxcount = *held;
+
+ mp = m;
+ for (totcount = i = 0; (i < iovcnt) && (totcount < maxcount); i++, iov++) {
+ count = maxcount - totcount;
+
+ start = (uintptr_t)iov->iov_base;
+ end = (uintptr_t)((caddr_t)iov->iov_base + iov->iov_len);
+ start &= PG_FRAME;
+ end += PAGE_MASK;
+ end &= PG_FRAME;
+ npages = (end - start) >> PAGE_SHIFT;
+
+ count = min(count, npages);
+
+ err = vm_fault_hold_user_pages((vm_offset_t)iov->iov_base, mp, count, flags);
+ if (err) {
+ vm_fault_unhold_pages(m, totcount);
+ return (err);
+ }
+ mp += count;
+ totcount += count;
+ curbytes = iov->iov_len;
+ if (count != npages)
+ curbytes = count*PAGE_SIZE - (((uintptr_t)iov->iov_base)&PAGE_MASK);
+ totbytes += curbytes;
+ }
+ uio->uio_resid -= totbytes;
+
+ return (0);
+}
+
+/*
+ * Returns whether a connection should enable DDP. This happens when all of
+ * the following conditions are met:
+ * - the connection's ULP mode is DDP
+ * - DDP is not already enabled
+ * - the last receive was above the DDP threshold
+ * - receive buffers are in user space
+ * - receive side isn't shutdown (handled by caller)
+ * - the connection's receive window is big enough so that sizable buffers
+ * can be posted without closing the window in the middle of DDP (checked
+ * when the connection is offloaded)
+ */
+static int
+so_should_ddp(const struct toepcb *toep, int last_recv_len)
+{
+
+ DPRINTF("ulp_mode=%d last_recv_len=%d ddp_thresh=%d rcv_wnd=%ld ddp_copy_limit=%d\n",
+ toep->tp_ulp_mode, last_recv_len, TOM_TUNABLE(toep->tp_toedev, ddp_thres),
+ toep->tp_tp->rcv_wnd, (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) + DDP_RSVD_WIN));
+
+ return toep->tp_ulp_mode == ULP_MODE_TCPDDP && (toep->tp_ddp_state.kbuf[0] == NULL) &&
+ last_recv_len > TOM_TUNABLE(toep->tp_toedev, ddp_thres) &&
+ toep->tp_tp->rcv_wnd >
+ (TOM_TUNABLE(toep->tp_toedev, ddp_copy_limit) + DDP_RSVD_WIN);
+}
+
+static inline int
+is_ddp(const struct mbuf *m)
+{
+ return ((m->m_flags & M_DDP) != 0);
+}
+
+static inline int
+is_ddp_psh(const struct mbuf *m)
+{
+ return ((is_ddp(m) && (m->m_pkthdr.csum_flags & DDP_BF_PSH)) != 0);
+}
+
+static int
+m_uiomove(const struct mbuf *m, int offset, int len, struct uio *uio)
+{
+ int curlen, startlen, resid_init, err = 0;
+ caddr_t buf;
+
+ DPRINTF("m_uiomove(m=%p, offset=%d, len=%d, ...)\n",
+ m, offset, len);
+
+ startlen = len;
+ resid_init = uio->uio_resid;
+ while (m && len) {
+ buf = mtod(m, caddr_t);
+ curlen = m->m_len;
+ if (offset && (offset < curlen)) {
+ curlen -= offset;
+ buf += offset;
+ offset = 0;
+ } else if (offset) {
+ offset -= curlen;
+ m = m->m_next;
+ continue;
+ }
+ err = uiomove(buf, min(len, curlen), uio);
+ if (err) {
+ printf("uiomove returned %d\n", err);
+ return (err);
+ }
+
+ len -= min(len, curlen);
+ m = m->m_next;
+ }
+ DPRINTF("copied %d bytes - resid_init=%d uio_resid=%d\n",
+ startlen - len, resid_init, uio->uio_resid);
+ return (err);
+}
+
+/*
+ * Copy data from an sk_buff to an iovec. Deals with RX_DATA, which carry the
+ * data in the sk_buff body, and with RX_DATA_DDP, which place the data in a
+ * DDP buffer.
+ */
+static inline int
+copy_data(const struct mbuf *m, int offset, int len, struct uio *uio)
+{
+ struct iovec *to = uio->uio_iov;
+ int err;
+
+ if (__predict_true(!is_ddp(m))) /* RX_DATA */
+ return m_uiomove(m, offset, len, uio);
+ if (__predict_true(m->m_ddp_flags & DDP_BF_NOCOPY)) { /* user DDP */
+ to->iov_len -= len;
+ to->iov_base = ((caddr_t)to->iov_base) + len;
+ uio->uio_iov = to;
+ uio->uio_resid -= len;
+ return (0);
+ }
+ err = t3_ddp_copy(m, offset, uio, len); /* kernel DDP */
+ return (err);
+}
+
+static void
+cxgb_wait_dma_completion(struct toepcb *toep)
+{
+ struct rwlock *lock;
+
+ lock = &toep->tp_tp->t_inpcb->inp_lock;
+ inp_wlock(toep->tp_tp->t_inpcb);
+ cv_wait_unlock(&toep->tp_cv, lock);
+}
+
+static int
+cxgb_vm_page_to_miov(struct toepcb *toep, struct uio *uio, struct mbuf **m)
+{
+ int i, seg_count, err, type;
+ struct mbuf *m0;
+ struct cxgb_dma_info cdi;
+ struct mbuf_vec *mv;
+ struct mbuf_iovec *mi;
+ bus_dma_segment_t *segs;
+
+ err = bus_dmamap_load_uio(toep->tp_tx_dmat, toep->tp_dmamap, uio,
+ cxgb_dma_callback, &cdi, 0);
+
+ if (err)
+ return (err);
+ seg_count = cdi.cdi_nsegs;
+ if ((m0 = mcl_alloc(seg_count, &type)) == NULL) {
+ bus_dmamap_unload(toep->tp_tx_dmat, toep->tp_dmamap);
+ return (ENOMEM);
+ }
+ segs = cdi.cdi_segs;
+ m0->m_type = type;
+ m0->m_flags = (M_EXT|M_NOFREE);
+ m0->m_ext.ext_type = EXT_EXTREF;
+ m0->m_ext.ext_free = cxgb_zero_copy_free;
+ m0->m_ext.ext_arg1 = NULL; /* XXX: probably wrong /phk */
+ m0->m_ext.ext_arg2 = NULL;
+
+ mv = mtomv(m0);
+ mv->mv_count = seg_count;
+ mv->mv_first = 0;
+ for (i = 0, mi = mv->mv_vec; i < seg_count; mi++, segs++, i++)
+ mi_collapse_sge(mi, segs);
+
+ *m = m0;
+
+ /*
+ * This appears to be a no-op at the moment
+ * as busdma is all or nothing need to make
+ * sure the tag values are large enough
+ *
+ */
+ if (cdi.cdi_mapped < uio->uio_resid) {
+ uio->uio_resid -= cdi.cdi_mapped;
+ } else
+ uio->uio_resid = 0;
+
+ return (0);
+}
+
+static int
+t3_sosend(struct socket *so, struct uio *uio)
+{
+ int rv, count, hold_resid, sent, iovcnt;
+ struct iovec iovtmp[TMP_IOV_MAX], *iovtmpp, *iov;
+ struct tcpcb *tp = so_sototcpcb(so);
+ struct toepcb *toep = tp->t_toe;
+ struct mbuf *m;
+ struct uio uiotmp;
+ struct sockbuf *snd;
+
+ /*
+ * Events requiring iteration:
+ * - number of pages exceeds max hold pages for process or system
+ * - number of pages exceeds maximum sg entries for a single WR
+ *
+ * We're limited to holding 128 pages at once - and we're limited to
+ * 34 SG entries per work request, but each SG entry can be any number
+ * of contiguous pages
+ *
+ */
+
+ uiotmp = *uio;
+ iovcnt = uio->uio_iovcnt;
+ iov = uio->uio_iov;
+ sent = 0;
+ snd = so_sockbuf_snd(so);
+sendmore:
+ /*
+ * Make sure we don't exceed the socket buffer
+ */
+ count = min(toep->tp_page_count, (sockbuf_sbspace(snd) >> PAGE_SHIFT) + 2*PAGE_SIZE);
+ rv = cxgb_hold_iovec_pages(&uiotmp, toep->tp_pages, &count, 0);
+ hold_resid = uiotmp.uio_resid;
+ if (rv)
+ return (rv);
+
+ /*
+ * Bump past sent and shave off the unheld amount
+ */
+ if (hold_resid > 0) {
+ iovtmpp = iovtmp;
+ memcpy(iovtmp, iov, iovcnt*sizeof(*iov));
+ if (sent)
+ iov_adj(&iovtmpp, &iovcnt, sent);
+ iov_adj(&iovtmpp, &iovcnt, -hold_resid);
+ uiotmp.uio_iov = iovtmpp;
+ uiotmp.uio_iovcnt = iovcnt;
+
+ }
+ uiotmp.uio_resid = uio->uio_resid - hold_resid;
+
+ /*
+ * Push off all held pages
+ *
+ */
+ while (uiotmp.uio_resid > 0) {
+ rv = cxgb_vm_page_to_miov(toep, &uiotmp, &m);
+ if (rv) {
+ vm_fault_unhold_pages(toep->tp_pages, count);
+ return (rv);
+ }
+ uio->uio_resid -= m->m_pkthdr.len;
+ sent += m->m_pkthdr.len;
+ sbappend(snd, m);
+ t3_push_frames(so, TRUE);
+ iov_adj(&uiotmp.uio_iov, &iovcnt, uiotmp.uio_resid);
+ }
+
+ /*
+ * Wait for pending I/O to be DMA'd to the card
+ *
+ */
+ cxgb_wait_dma_completion(toep);
+ vm_fault_unhold_pages(toep->tp_pages, count);
+ /*
+ * If there is more data to send adjust local copy of iov
+ * to point to teh start
+ */
+ if (hold_resid) {
+ iovtmpp = iovtmp;
+ memcpy(iovtmp, iov, iovcnt*sizeof(*iov));
+ iov_adj(&iovtmpp, &iovcnt, sent);
+ uiotmp = *uio;
+ uiotmp.uio_iov = iovtmpp;
+ uiotmp.uio_iovcnt = iovcnt;
+ goto sendmore;
+ }
+
+ return (0);
+}
+
+static int
+cxgb_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
+ struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
+{
+ struct tcpcb *tp = so_sototcpcb(so);
+ struct toedev *tdev;
+ int zcopy_thres, zcopy_enabled, rv;
+
+ /*
+ * In order to use DMA direct from userspace the following
+ * conditions must be met:
+ * - the connection is currently offloaded
+ * - ddp is enabled
+ * - the number of bytes to be transferred exceeds the threshold
+ * - the number of bytes currently in flight won't exceed the in-flight
+ * threshold XXX TODO
+ * - vm_fault_hold_user_pages succeeds
+ * - blocking socket XXX for now
+ *
+ */
+ if (tp && tp->t_flags & TF_TOE) {
+ struct toepcb *toep = tp->t_toe;
+
+ tdev = toep->tp_toedev;
+ zcopy_thres = TOM_TUNABLE(tdev, zcopy_sosend_partial_thres);
+ zcopy_enabled = TOM_TUNABLE(tdev, zcopy_sosend_enabled);
+
+ if (uio && (uio->uio_resid > zcopy_thres) &&
+ (uio->uio_iovcnt < TMP_IOV_MAX) && ((so_state_get(so) & SS_NBIO) == 0)
+ && zcopy_enabled) {
+ rv = t3_sosend(so, uio);
+ if (rv != EAGAIN)
+ return (rv);
+ }
+ }
+ return pru_sosend(so, addr, uio, top, control, flags, td);
+}
+
+/*
+ * Following replacement or removal of the first mbuf on the first mbuf chain
+ * of a socket buffer, push necessary state changes back into the socket
+ * buffer so that other consumers see the values consistently. 'nextrecord'
+ * is the callers locally stored value of the original value of
+ * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes.
+ * NOTE: 'nextrecord' may be NULL.
+ */
+static __inline void
+sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
+{
+ sockbuf_lock_assert(sb);
+ /*
+ * First, update for the new value of nextrecord. If necessary, make
+ * it the first record.
+ */
+ if (sb->sb_mb != NULL)
+ sb->sb_mb->m_nextpkt = nextrecord;
+ else
+ sb->sb_mb = nextrecord;
+
+ /*
+ * Now update any dependent socket buffer fields to reflect the new
+ * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the
+ * addition of a second clause that takes care of the case where
+ * sb_mb has been updated, but remains the last record.
+ */
+ if (sb->sb_mb == NULL) {
+ sb->sb_mbtail = NULL;
+ sb->sb_lastrecord = NULL;
+ } else if (sb->sb_mb->m_nextpkt == NULL)
+ sb->sb_lastrecord = sb->sb_mb;
+}
+
+#define IS_NONBLOCKING(so) (so_state_get(so) & SS_NBIO)
+
+static int
+t3_soreceive(struct socket *so, int *flagsp, struct uio *uio)
+{
+ struct tcpcb *tp = so_sototcpcb(so);
+ struct toepcb *toep = tp->t_toe;
+ struct mbuf *m;
+ uint32_t offset;
+ int err, flags, avail, len, copied, copied_unacked;
+ int target; /* Read at least this many bytes */
+ int user_ddp_ok;
+ struct ddp_state *p;
+ struct inpcb *inp = so_sotoinpcb(so);
+ int socket_state, socket_error;
+ struct sockbuf *rcv;
+
+ avail = offset = copied = copied_unacked = 0;
+ flags = flagsp ? (*flagsp &~ MSG_EOR) : 0;
+ rcv = so_sockbuf_rcv(so);
+
+ err = sblock(rcv, SBLOCKWAIT(flags));
+ p = &toep->tp_ddp_state;
+
+ if (err)
+ return (err);
+
+ rcv = so_sockbuf_rcv(so);
+ sockbuf_lock(rcv);
+ if ((tp->t_flags & TF_TOE) == 0) {
+ sockbuf_unlock(rcv);
+ err = EAGAIN;
+ goto done_unlocked;
+ }
+
+ p->user_ddp_pending = 0;
+restart:
+ if ((tp->t_flags & TF_TOE) == 0) {
+ sockbuf_unlock(rcv);
+ err = EAGAIN;
+ goto done_unlocked;
+ }
+
+ len = uio->uio_resid;
+ m = rcv->sb_mb;
+ target = (flags & MSG_WAITALL) ? len : rcv->sb_lowat;
+ user_ddp_ok = p->ubuf_ddp_ready;
+ p->cancel_ubuf = 0;
+
+ if (len == 0)
+ goto done;
+ if (m)
+ goto got_mbuf;
+
+ /* empty receive queue */
+ if (copied >= target && (rcv->sb_mb == NULL) &&
+ !p->user_ddp_pending)
+ goto done;
+
+ socket_state = so_state_get(so);
+ socket_error = so_error_get(so);
+ rcv = so_sockbuf_rcv(so);
+
+ if (copied) {
+ if (socket_error || tp->t_state == TCPS_CLOSED ||
+ (socket_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)))
+ goto done;
+ } else {
+ if (socket_state & SS_NOFDREF)
+ goto done;
+ if (socket_error) {
+ err = socket_error;
+ socket_error = 0;
+ goto done;
+ }
+ if (rcv->sb_state & SBS_CANTRCVMORE)
+ goto done;
+ if (socket_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED))
+ goto done;
+ if (tp->t_state == TCPS_CLOSED) {
+ err = ENOTCONN;
+ goto done;
+ }
+ }
+ if (rcv->sb_mb && !p->user_ddp_pending) {
+ sockbuf_unlock(rcv);
+ inp_wlock(inp);
+ t3_cleanup_rbuf(tp, copied_unacked);
+ inp_wunlock(inp);
+ sockbuf_lock(rcv);
+ copied_unacked = 0;
+ goto restart;
+ }
+ if (p->kbuf[0] && user_ddp_ok && !p->user_ddp_pending &&
+ uio->uio_iov->iov_len > p->kbuf[0]->dgl_length &&
+ p->ubuf_ddp_ready) {
+ p->user_ddp_pending =
+ !t3_overlay_ubuf(toep, rcv, uio,
+ IS_NONBLOCKING(so), flags, 1, 1);
+ if (p->user_ddp_pending) {
+ p->kbuf_posted++;
+ user_ddp_ok = 0;
+ }
+ }
+ if (p->kbuf[0] && (p->kbuf_posted == 0)) {
+ t3_post_kbuf(toep, 1, IS_NONBLOCKING(so));
+ p->kbuf_posted++;
+ }
+ if (p->user_ddp_pending) {
+ /* One shot at DDP if we already have enough data */
+ if (copied >= target)
+ user_ddp_ok = 0;
+
+ if (rcv->sb_state & SBS_CANTRCVMORE)
+ goto done;
+ CTR0(KTR_TOM, "ddp pending -- waiting");
+ if ((err = sbwait(rcv)) != 0)
+ goto done;
+//for timers to work await_ddp_completion(sk, flags, &timeo);
+ } else if (copied >= target)
+ goto done;
+ else {
+ if (copied_unacked) {
+ int i = 0;
+
+ sockbuf_unlock(rcv);
+ inp_wlock(inp);
+ t3_cleanup_rbuf(tp, copied_unacked);
+ inp_wunlock(inp);
+ copied_unacked = 0;
+ if (mp_ncpus > 1)
+ while (i++ < 200 && rcv->sb_mb == NULL)
+ cpu_spinwait();
+ sockbuf_lock(rcv);
+ }
+ if (rcv->sb_mb)
+ goto restart;
+
+ if (rcv->sb_state & SBS_CANTRCVMORE)
+ goto done;
+
+ CTR0(KTR_TOM, "no buffers -- waiting");
+
+ if ((err = sbwait(rcv)) != 0)
+ goto done;
+ }
+ goto restart;
+got_mbuf:
+ /*
+ * Adjust the mbuf seqno if it has already been partially processed by
+ * soreceive_generic
+ */
+ if (m->m_pkthdr.len != m->m_len) {
+ m->m_seq += m->m_pkthdr.len - m->m_len;
+ m->m_pkthdr.len = m->m_len;
+ }
+
+ CTR6(KTR_TOM, "t3_soreceive: ddp_flags=0x%x m_len=%u resid=%u "
+ "m_seq=0x%08x c_seq=0x%08x c_unack=%u",
+ (is_ddp(m) ? m->m_ddp_flags : 0), m->m_pkthdr.len, len,
+ m->m_seq, toep->tp_copied_seq, copied_unacked);
+ KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || !(m->m_flags & M_EXT),
+ ("unexpected type M_EXT=%d ext_type=%d m_len=%d m_pktlen=%d\n", !!(m->m_flags & M_EXT),
+ m->m_ext.ext_type, m->m_len, m->m_pkthdr.len));
+ KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p"
+ " m_flags=0x%x m->m_len=%d", m->m_next, m->m_nextpkt, m->m_flags, m->m_len));
+ if (m->m_pkthdr.len == 0) {
+ if ((m->m_ddp_flags & DDP_BF_NOCOPY) == 0)
+ panic("empty mbuf and NOCOPY not set\n");
+ CTR0(KTR_TOM, "ddp done notification");
+ p->user_ddp_pending = 0;
+ sbdroprecord_locked(rcv);
+ goto done;
+ }
+
+ KASSERT((int32_t)(toep->tp_copied_seq + copied_unacked - m->m_seq) >= 0,
+ ("offset will go negative: offset=%d copied_seq=0x%08x copied_unacked=%d m_seq=0x%08x",
+ offset, toep->tp_copied_seq, copied_unacked, m->m_seq));
+ offset = toep->tp_copied_seq + copied_unacked - m->m_seq;
+
+ if (offset >= m->m_pkthdr.len)
+ panic("t3_soreceive: OFFSET >= LEN offset %d copied_seq 0x%x "
+ "seq 0x%x pktlen %d ddp flags 0x%x", offset,
+ toep->tp_copied_seq + copied_unacked, m->m_seq,
+ m->m_pkthdr.len, m->m_ddp_flags);
+
+ avail = m->m_pkthdr.len - offset;
+ if (len < avail) {
+ if (is_ddp(m) && (m->m_ddp_flags & DDP_BF_NOCOPY))
+ panic("bad state in t3_soreceive len=%d avail=%d offset=%d\n", len, avail, offset);
+ avail = len;
+ rcv->sb_flags |= SB_IN_TOE;
+ } else if (p->kbuf_posted == 0 && p->user_ddp_pending == 0)
+ rcv->sb_flags &= ~SB_IN_TOE;
+
+#ifdef URGENT_DATA_SUPPORTED
+ /*
+ * Check if the data we are preparing to copy contains urgent
+ * data. Either stop short of urgent data or skip it if it's
+ * first and we are not delivering urgent data inline.
+ */
+ if (__predict_false(toep->tp_urg_data)) {
+ uint32_t urg_offset = tp->rcv_up - tp->copied_seq + copied_unacked;
+
+ if (urg_offset < avail) {
+ if (urg_offset) {
+ /* stop short of the urgent data */
+ avail = urg_offset;
+ } else if ((so_options_get(so) & SO_OOBINLINE) == 0) {
+ /* First byte is urgent, skip */
+ toep->tp_copied_seq++;
+ offset++;
+ avail--;
+ if (!avail)
+ goto skip_copy;
+ }
+ }
+ }
+#endif
+ if (is_ddp_psh(m) || offset || (rcv->sb_mb && !is_ddp(m))) {
+ user_ddp_ok = 0;
+#ifdef T3_TRACE
+ T3_TRACE0(TIDTB(so), "t3_sosend: PSH");
+#endif
+ }
+
+ if (user_ddp_ok && !p->user_ddp_pending &&
+ uio->uio_iov->iov_len > p->kbuf[0]->dgl_length &&
+ p->ubuf_ddp_ready) {
+ p->user_ddp_pending =
+ !t3_overlay_ubuf(toep, rcv, uio,
+ IS_NONBLOCKING(so), flags, 1, 1);
+ if (p->user_ddp_pending) {
+ p->kbuf_posted++;
+ user_ddp_ok = 0;
+ }
+ DPRINTF("user_ddp_pending=%d\n", p->user_ddp_pending);
+ } else
+ DPRINTF("user_ddp_ok=%d user_ddp_pending=%d iov_len=%ld dgl_length=%d ubuf_ddp_ready=%d ulp_mode=%d is_ddp(m)=%d flags=0x%x ubuf=%p kbuf_posted=%d\n",
+ user_ddp_ok, p->user_ddp_pending, uio->uio_iov->iov_len, p->kbuf[0] ? p->kbuf[0]->dgl_length : 0,
+ p->ubuf_ddp_ready, toep->tp_ulp_mode, !!is_ddp(m), m->m_ddp_flags, p->ubuf, p->kbuf_posted);
+
+ /*
+ * If MSG_TRUNC is specified the data is discarded.
+ * XXX need to check pr_atomic
+ */
+ KASSERT(avail > 0, ("avail=%d resid=%d offset=%d", avail, uio->uio_resid, offset));
+ if (__predict_true(!(flags & MSG_TRUNC))) {
+ int resid = uio->uio_resid;
+
+ sockbuf_unlock(rcv);
+ if ((err = copy_data(m, offset, avail, uio))) {
+ if (err)
+ err = EFAULT;
+ goto done_unlocked;
+ }
+
+ sockbuf_lock(rcv);
+ if (avail != (resid - uio->uio_resid))
+ printf("didn't copy all bytes :-/ avail=%d offset=%d pktlen=%d resid=%d uio_resid=%d copied=%d copied_unacked=%d is_ddp(m)=%d\n",
+ avail, offset, m->m_pkthdr.len, resid, uio->uio_resid, copied, copied_unacked, is_ddp(m));
+
+ if ((tp->t_flags & TF_TOE) == 0) {
+ sockbuf_unlock(rcv);
+ err = EAGAIN;
+ goto done_unlocked;
+ }
+ }
+
+ copied += avail;
+ copied_unacked += avail;
+ len -= avail;
+
+#ifdef URGENT_DATA_SUPPORTED
+skip_copy:
+ if (tp->urg_data && after(tp->copied_seq + copied_unacked, tp->urg_seq))
+ tp->urg_data = 0;
+#endif
+ /*
+ * If the buffer is fully consumed free it. If it's a DDP
+ * buffer also handle any events it indicates.
+ */
+ if (avail + offset >= m->m_pkthdr.len) {
+ unsigned int fl = m->m_ddp_flags;
+ int exitnow, got_psh = 0, nomoredata = 0;
+ int count;
+ struct mbuf *nextrecord;
+
+ if (p->kbuf[0] != NULL && is_ddp(m) && (fl & 1)) {
+ if (is_ddp_psh(m) && p->user_ddp_pending)
+ got_psh = 1;
+
+ if (fl & DDP_BF_NOCOPY)
+ p->user_ddp_pending = 0;
+ else if ((fl & DDP_BF_NODATA) && IS_NONBLOCKING(so)) {
+ p->kbuf_posted--;
+ nomoredata = 1;
+ } else {
+ p->kbuf_posted--;
+ p->ubuf_ddp_ready = 1;
+ }
+ }
+
+ nextrecord = m->m_nextpkt;
+ count = m->m_pkthdr.len;
+ while (count > 0) {
+ count -= m->m_len;
+ KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len));
+ CTR2(KTR_TOM, "freeing mbuf m_len = %d pktlen = %d", m->m_len, m->m_pkthdr.len);
+ sbfree(rcv, m);
+ rcv->sb_mb = m_free(m);
+ m = rcv->sb_mb;
+ }
+ sockbuf_pushsync(rcv, nextrecord);
+#if 0
+ sbdrop_locked(rcv, m->m_pkthdr.len);
+#endif
+ exitnow = got_psh || nomoredata;
+ if (copied >= target && (rcv->sb_mb == NULL) && exitnow)
+ goto done;
+ if (copied_unacked > (rcv->sb_hiwat >> 2)) {
+ sockbuf_unlock(rcv);
+ inp_wlock(inp);
+ t3_cleanup_rbuf(tp, copied_unacked);
+ inp_wunlock(inp);
+ copied_unacked = 0;
+ sockbuf_lock(rcv);
+ }
+ }
+ if (len > 0)
+ goto restart;
+
+ done:
+ if ((tp->t_flags & TF_TOE) == 0) {
+ sockbuf_unlock(rcv);
+ err = EAGAIN;
+ goto done_unlocked;
+ }
+ /*
+ * If we can still receive decide what to do in preparation for the
+ * next receive. Note that RCV_SHUTDOWN is set if the connection
+ * transitioned to CLOSE but not if it was in that state to begin with.
+ */
+ if (__predict_true((so_state_get(so) & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) == 0)) {
+ if (p->user_ddp_pending) {
+ user_ddp_ok = 0;
+ t3_cancel_ubuf(toep, rcv);
+ if (rcv->sb_mb) {
+ if (copied < 0)
+ copied = 0;
+ if (len > 0)
+ goto restart;
+ }
+ p->user_ddp_pending = 0;
+ }
+ if ((p->kbuf[0] != NULL) && (p->kbuf_posted == 0)) {
+#ifdef T3_TRACE
+ T3_TRACE0(TIDTB(so),
+ "chelsio_recvmsg: about to exit, repost kbuf");
+#endif
+
+ t3_post_kbuf(toep, 1, IS_NONBLOCKING(so));
+ p->kbuf_posted++;
+ } else if (so_should_ddp(toep, copied) && uio->uio_iovcnt == 1) {
+ CTR1(KTR_TOM ,"entering ddp on tid=%u", toep->tp_tid);
+ if (!t3_enter_ddp(toep, TOM_TUNABLE(toep->tp_toedev,
+ ddp_copy_limit), 0, IS_NONBLOCKING(so))) {
+ rcv->sb_flags |= SB_IN_TOE;
+ p->kbuf_posted = 1;
+ }
+
+ }
+ }
+#ifdef T3_TRACE
+ T3_TRACE5(TIDTB(so),
+ "chelsio_recvmsg <-: copied %d len %d buffers_freed %d "
+ "kbuf_posted %d user_ddp_pending %u",
+ copied, len, buffers_freed, p ? p->kbuf_posted : -1,
+ p->user_ddp_pending);
+#endif
+ sockbuf_unlock(rcv);
+done_unlocked:
+ if (copied_unacked && (tp->t_flags & TF_TOE)) {
+ inp_wlock(inp);
+ t3_cleanup_rbuf(tp, copied_unacked);
+ inp_wunlock(inp);
+ }
+ sbunlock(rcv);
+
+ return (err);
+}
+
+static int
+cxgb_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
+ struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
+{
+ struct toedev *tdev;
+ int rv, zcopy_thres, zcopy_enabled, flags;
+ struct tcpcb *tp = so_sototcpcb(so);
+ struct sockbuf *rcv = so_sockbuf_rcv(so);
+
+ flags = flagsp ? *flagsp &~ MSG_EOR : 0;
+
+ /*
+ * In order to use DMA direct from userspace the following
+ * conditions must be met:
+ * - the connection is currently offloaded
+ * - ddp is enabled
+ * - the number of bytes to be transferred exceeds the threshold
+ * - the number of bytes currently in flight won't exceed the in-flight
+ * threshold XXX TODO
+ * - vm_fault_hold_user_pages succeeds
+ * - blocking socket XXX for now
+ * - iovcnt is 1
+ *
+ */
+ if (tp && (tp->t_flags & TF_TOE) && uio && ((flags & (MSG_OOB|MSG_PEEK|MSG_DONTWAIT)) == 0)
+ && (uio->uio_iovcnt == 1) && (mp0 == NULL) &&
+ ((rcv->sb_flags & SB_IN_TOE) || (uio->uio_iovcnt == 1))) {
+ struct toepcb *toep = tp->t_toe;
+
+ tdev = toep->tp_toedev;
+ zcopy_thres = TOM_TUNABLE(tdev, ddp_thres);
+ zcopy_enabled = TOM_TUNABLE(tdev, ddp);
+ if ((rcv->sb_flags & SB_IN_TOE) ||((uio->uio_resid > zcopy_thres) &&
+ (uio->uio_iovcnt == 1) && zcopy_enabled)) {
+ CTR4(KTR_TOM, "cxgb_soreceive: sb_flags=0x%x t_flags=0x%x flags=0x%x uio_resid=%d",
+ rcv->sb_flags, tp->t_flags, flags, uio->uio_resid);
+ rv = t3_soreceive(so, flagsp, uio);
+ if (rv != EAGAIN)
+ return (rv);
+ else
+ printf("returned EAGAIN\n");
+ }
+ } else if (tp && (tp->t_flags & TF_TOE) && uio && mp0 == NULL) {
+ struct sockbuf *rcv = so_sockbuf_rcv(so);
+
+ log(LOG_INFO, "skipping t3_soreceive flags=0x%x iovcnt=%d sb_state=0x%x\n",
+ flags, uio->uio_iovcnt, rcv->sb_state);
+ }
+
+ return pru_soreceive(so, psa, uio, mp0, controlp, flagsp);
+}
+
+struct protosw cxgb_protosw;
+struct pr_usrreqs cxgb_tcp_usrreqs;
+
+
+void
+t3_install_socket_ops(struct socket *so)
+{
+ static int copied = 0;
+ struct pr_usrreqs *pru;
+ struct protosw *psw;
+
+ if (copied == 0) {
+ psw = so_protosw_get(so);
+ pru = psw->pr_usrreqs;
+
+ bcopy(psw, &cxgb_protosw, sizeof(*psw));
+ bcopy(pru, &cxgb_tcp_usrreqs, sizeof(*pru));
+
+ cxgb_protosw.pr_ctloutput = t3_ctloutput;
+ cxgb_protosw.pr_usrreqs = &cxgb_tcp_usrreqs;
+ cxgb_tcp_usrreqs.pru_sosend = cxgb_sosend;
+ cxgb_tcp_usrreqs.pru_soreceive = cxgb_soreceive;
+ }
+ so_protosw_set(so, &cxgb_protosw);
+
+#if 0
+ so->so_proto->pr_usrreqs->pru_sosend = cxgb_sosend;
+ so->so_proto->pr_usrreqs->pru_soreceive = cxgb_soreceive;
+#endif
+}
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_ddp.c b/sys/dev/cxgb/ulp/tom/cxgb_ddp.c
new file mode 100644
index 0000000000000..86e1e91b98271
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_ddp.c
@@ -0,0 +1,738 @@
+/**************************************************************************
+
+Copyright (c) 2007-2008, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/fcntl.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/condvar.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/syslog.h>
+#include <sys/uio.h>
+
+#include <machine/bus.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+
+
+#include <dev/cxgb/cxgb_osdep.h>
+#include <dev/cxgb/sys/mbufq.h>
+
+#include <dev/cxgb/ulp/tom/cxgb_tcp_offload.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_offload.h>
+#include <net/route.h>
+
+#include <dev/cxgb/t3cdev.h>
+#include <dev/cxgb/common/cxgb_firmware_exports.h>
+#include <dev/cxgb/common/cxgb_t3_cpl.h>
+#include <dev/cxgb/common/cxgb_tcb.h>
+#include <dev/cxgb/common/cxgb_ctl_defs.h>
+#include <dev/cxgb/cxgb_offload.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+
+#include <dev/cxgb/sys/mvec.h>
+#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
+#include <dev/cxgb/ulp/tom/cxgb_defs.h>
+#include <dev/cxgb/ulp/tom/cxgb_tom.h>
+#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
+#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
+#include <dev/cxgb/ulp/tom/cxgb_tcp.h>
+#include <dev/cxgb/ulp/tom/cxgb_vm.h>
+
+
+#define MAX_SCHEDULE_TIMEOUT 300
+
+/*
+ * Return the # of page pods needed to accommodate a # of pages.
+ */
+static inline unsigned int
+pages2ppods(unsigned int pages)
+{
+ return (pages + PPOD_PAGES - 1) / PPOD_PAGES + NUM_SENTINEL_PPODS;
+}
+
+/**
+ * t3_pin_pages - pin a user memory range and prepare it for DDP
+ * @addr - the starting address
+ * @len - the length of the range
+ * @newgl - contains the pages and physical addresses of the pinned range
+ * @gl - an existing gather list, may be %NULL
+ *
+ * Pins the pages in the user-space memory range [addr, addr + len) and
+ * maps them for DMA. Returns a gather list with the pinned pages and
+ * their physical addresses. If @gl is non NULL the pages it describes
+ * are compared against the pages for [addr, addr + len), and if the
+ * existing gather list already covers the range a new list is not
+ * allocated. Returns 0 on success, or a negative errno. On success if
+ * a new gather list was allocated it is returned in @newgl.
+ */
+static int
+t3_pin_pages(bus_dma_tag_t tag, bus_dmamap_t map, vm_offset_t addr,
+ size_t len, struct ddp_gather_list **newgl,
+ const struct ddp_gather_list *gl)
+{
+ int i = 0, err;
+ size_t pg_off;
+ unsigned int npages;
+ struct ddp_gather_list *p;
+
+ /*
+ * XXX need x86 agnostic check
+ */
+ if (addr + len > VM_MAXUSER_ADDRESS)
+ return (EFAULT);
+
+ pg_off = addr & PAGE_MASK;
+ npages = (pg_off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ p = malloc(sizeof(struct ddp_gather_list) + npages * sizeof(vm_page_t *),
+ M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (p == NULL)
+ return (ENOMEM);
+
+ err = vm_fault_hold_user_pages(addr, p->dgl_pages, npages, VM_HOLD_WRITEABLE);
+ if (err)
+ goto free_gl;
+
+ if (gl && gl->dgl_offset == pg_off && gl->dgl_nelem >= npages &&
+ gl->dgl_length >= len) {
+ for (i = 0; i < npages; i++)
+ if (p->dgl_pages[i] != gl->dgl_pages[i])
+ goto different_gl;
+ err = 0;
+ goto unpin;
+ }
+
+different_gl:
+ p->dgl_length = len;
+ p->dgl_offset = pg_off;
+ p->dgl_nelem = npages;
+#ifdef NEED_BUSDMA
+ p->phys_addr[0] = pci_map_page(pdev, p->pages[0], pg_off,
+ PAGE_SIZE - pg_off,
+ PCI_DMA_FROMDEVICE) - pg_off;
+ for (i = 1; i < npages; ++i)
+ p->phys_addr[i] = pci_map_page(pdev, p->pages[i], 0, PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
+#endif
+ *newgl = p;
+ return (0);
+unpin:
+ vm_fault_unhold_pages(p->dgl_pages, npages);
+
+free_gl:
+
+ free(p, M_DEVBUF);
+ *newgl = NULL;
+ return (err);
+}
+
+static void
+unmap_ddp_gl(const struct ddp_gather_list *gl)
+{
+#ifdef NEED_BUSDMA
+ int i;
+
+ if (!gl->nelem)
+ return;
+
+ pci_unmap_page(pdev, gl->phys_addr[0] + gl->offset,
+ PAGE_SIZE - gl->offset, PCI_DMA_FROMDEVICE);
+ for (i = 1; i < gl->nelem; ++i)
+ pci_unmap_page(pdev, gl->phys_addr[i], PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
+
+#endif
+}
+
+static void
+ddp_gl_free_pages(struct ddp_gather_list *gl, int dirty)
+{
+ /*
+ * XXX mark pages as dirty before unholding
+ */
+ vm_fault_unhold_pages(gl->dgl_pages, gl->dgl_nelem);
+}
+
+void
+t3_free_ddp_gl(struct ddp_gather_list *gl)
+{
+ unmap_ddp_gl(gl);
+ ddp_gl_free_pages(gl, 0);
+ free(gl, M_DEVBUF);
+}
+
+/* Max # of page pods for a buffer, enough for 1MB buffer at 4KB page size */
+#define MAX_PPODS 64U
+
+/*
+ * Allocate page pods for DDP buffer 1 (the user buffer) and set up the tag in
+ * the TCB. We allocate page pods in multiples of PPOD_CLUSTER_SIZE. First we
+ * try to allocate enough page pods to accommodate the whole buffer, subject to
+ * the MAX_PPODS limit. If that fails we try to allocate PPOD_CLUSTER_SIZE page
+ * pods before failing entirely.
+ */
+static int
+alloc_buf1_ppods(struct toepcb *toep, struct ddp_state *p,
+ unsigned long addr, unsigned int len)
+{
+ int err, tag, npages, nppods;
+ struct tom_data *d = TOM_DATA(toep->tp_toedev);
+
+#if 0
+ SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+#endif
+ npages = ((addr & PAGE_MASK) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ nppods = min(pages2ppods(npages), MAX_PPODS);
+ nppods = roundup2(nppods, PPOD_CLUSTER_SIZE);
+ err = t3_alloc_ppods(d, nppods, &tag);
+ if (err && nppods > PPOD_CLUSTER_SIZE) {
+ nppods = PPOD_CLUSTER_SIZE;
+ err = t3_alloc_ppods(d, nppods, &tag);
+ }
+ if (err)
+ return (ENOMEM);
+
+ p->ubuf_nppods = nppods;
+ p->ubuf_tag = tag;
+#if NUM_DDP_KBUF == 1
+ t3_set_ddp_tag(toep, 1, tag << 6);
+#endif
+ return (0);
+}
+
+/*
+ * Starting offset for the user DDP buffer. A non-0 value ensures a DDP flush
+ * won't block indefinitely if there's nothing to place (which should be rare).
+ */
+#define UBUF_OFFSET 1
+
+static __inline unsigned long
+select_ddp_flags(const struct toepcb *toep, int buf_idx,
+ int nonblock, int rcv_flags)
+{
+ if (buf_idx == 1) {
+ if (__predict_false(rcv_flags & MSG_WAITALL))
+ return V_TF_DDP_PSH_NO_INVALIDATE0(1) |
+ V_TF_DDP_PSH_NO_INVALIDATE1(1) |
+ V_TF_DDP_PUSH_DISABLE_1(1);
+ if (nonblock)
+ return V_TF_DDP_BUF1_FLUSH(1);
+
+ return V_TF_DDP_BUF1_FLUSH(!TOM_TUNABLE(toep->tp_toedev,
+ ddp_push_wait));
+ }
+
+ if (__predict_false(rcv_flags & MSG_WAITALL))
+ return V_TF_DDP_PSH_NO_INVALIDATE0(1) |
+ V_TF_DDP_PSH_NO_INVALIDATE1(1) |
+ V_TF_DDP_PUSH_DISABLE_0(1);
+ if (nonblock)
+ return V_TF_DDP_BUF0_FLUSH(1);
+
+ return V_TF_DDP_BUF0_FLUSH(!TOM_TUNABLE(toep->tp_toedev, ddp_push_wait));
+}
+
+/*
+ * Reposts the kernel DDP buffer after it has been previously become full and
+ * invalidated. We just need to reset the offset and adjust the DDP flags.
+ * Conveniently, we can set the flags and the offset with a single message.
+ * Note that this function does not set the buffer length. Again conveniently
+ * our kernel buffer is of fixed size. If the length needs to be changed it
+ * needs to be done separately.
+ */
+static void
+t3_repost_kbuf(struct toepcb *toep, unsigned int bufidx, int modulate,
+ int activate, int nonblock)
+{
+ struct ddp_state *p = &toep->tp_ddp_state;
+ unsigned long flags;
+
+#if 0
+ SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+#endif
+ p->buf_state[bufidx].cur_offset = p->kbuf[bufidx]->dgl_offset;
+ p->buf_state[bufidx].flags = p->kbuf_noinval ? DDP_BF_NOINVAL : 0;
+ p->buf_state[bufidx].gl = p->kbuf[bufidx];
+ p->cur_buf = bufidx;
+ p->kbuf_idx = bufidx;
+
+ flags = select_ddp_flags(toep, bufidx, nonblock, 0);
+ if (!bufidx)
+ t3_setup_ddpbufs(toep, 0, 0, 0, 0, flags |
+ V_TF_DDP_PSH_NO_INVALIDATE0(p->kbuf_noinval) |
+ V_TF_DDP_PSH_NO_INVALIDATE1(p->kbuf_noinval) |
+ V_TF_DDP_BUF0_VALID(1),
+ V_TF_DDP_BUF0_FLUSH(1) |
+ V_TF_DDP_PSH_NO_INVALIDATE0(1) |
+ V_TF_DDP_PSH_NO_INVALIDATE1(1) | V_TF_DDP_OFF(1) |
+ V_TF_DDP_BUF0_VALID(1) |
+ V_TF_DDP_ACTIVE_BUF(activate), modulate);
+ else
+ t3_setup_ddpbufs(toep, 0, 0, 0, 0, flags |
+ V_TF_DDP_PSH_NO_INVALIDATE0(p->kbuf_noinval) |
+ V_TF_DDP_PSH_NO_INVALIDATE1(p->kbuf_noinval) |
+ V_TF_DDP_BUF1_VALID(1) |
+ V_TF_DDP_ACTIVE_BUF(activate),
+ V_TF_DDP_BUF1_FLUSH(1) |
+ V_TF_DDP_PSH_NO_INVALIDATE0(1) |
+ V_TF_DDP_PSH_NO_INVALIDATE1(1) | V_TF_DDP_OFF(1) |
+ V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
+ modulate);
+
+}
+
+/**
+ * setup_uio_ppods - setup HW page pods for a user iovec
+ * @sk: the associated socket
+ * @uio: the uio
+ * @oft: additional bytes to map before the start of the buffer
+ *
+ * Pins a user iovec and sets up HW page pods for DDP into it. We allocate
+ * page pods for user buffers on the first call per socket. Afterwards we
+ * limit the buffer length to whatever the existing page pods can accommodate.
+ * Returns a negative error code or the length of the mapped buffer.
+ *
+ * The current implementation handles iovecs with only one entry.
+ */
+static int
+setup_uio_ppods(struct toepcb *toep, const struct uio *uio, int oft, int *length)
+{
+ int err;
+ unsigned int len;
+ struct ddp_gather_list *gl = NULL;
+ struct ddp_state *p = &toep->tp_ddp_state;
+ struct iovec *iov = uio->uio_iov;
+ vm_offset_t addr = (vm_offset_t)iov->iov_base - oft;
+
+#ifdef notyet
+ SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+#endif
+ if (__predict_false(p->ubuf_nppods == 0)) {
+ err = alloc_buf1_ppods(toep, p, addr, iov->iov_len + oft);
+ if (err)
+ return (err);
+ }
+
+ len = (p->ubuf_nppods - NUM_SENTINEL_PPODS) * PPOD_PAGES * PAGE_SIZE;
+ len -= addr & PAGE_MASK;
+ if (len > M_TCB_RX_DDP_BUF0_LEN)
+ len = M_TCB_RX_DDP_BUF0_LEN;
+ len = min(len, toep->tp_tp->rcv_wnd - 32768);
+ len = min(len, iov->iov_len + oft);
+
+ if (len <= p->kbuf[0]->dgl_length) {
+ printf("length too short\n");
+ return (EINVAL);
+ }
+
+ err = t3_pin_pages(toep->tp_rx_dmat, toep->tp_dmamap, addr, len, &gl, p->ubuf);
+ if (err)
+ return (err);
+ if (gl) {
+ if (p->ubuf)
+ t3_free_ddp_gl(p->ubuf);
+ p->ubuf = gl;
+ t3_setup_ppods(toep, gl, pages2ppods(gl->dgl_nelem), p->ubuf_tag, len,
+ gl->dgl_offset, 0);
+ }
+ *length = len;
+ return (0);
+}
+
+/*
+ *
+ */
+void
+t3_cancel_ubuf(struct toepcb *toep, struct sockbuf *rcv)
+{
+ struct ddp_state *p = &toep->tp_ddp_state;
+ int ubuf_pending = t3_ddp_ubuf_pending(toep);
+ int err = 0, count = 0;
+
+ if (p->ubuf == NULL)
+ return;
+
+ sockbuf_lock_assert(rcv);
+
+ p->cancel_ubuf = 1;
+ while (ubuf_pending && !(rcv->sb_state & SBS_CANTRCVMORE)) {
+ CTR3(KTR_TOM,
+ "t3_cancel_ubuf: flags0 0x%x flags1 0x%x get_tcb_count %d",
+ p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY),
+ p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY),
+ p->get_tcb_count);
+ if (p->get_tcb_count == 0)
+ t3_cancel_ddpbuf(toep, p->cur_buf);
+ else
+ CTR5(KTR_TOM, "waiting err=%d get_tcb_count=%d timeo=%d rcv=%p SBS_CANTRCVMORE=%d",
+ err, p->get_tcb_count, rcv->sb_timeo, rcv,
+ !!(rcv->sb_state & SBS_CANTRCVMORE));
+
+ while (p->get_tcb_count && !(rcv->sb_state & SBS_CANTRCVMORE)) {
+ if (count & 0xfffffff)
+ CTR5(KTR_TOM, "waiting err=%d get_tcb_count=%d timeo=%d rcv=%p count=%d",
+ err, p->get_tcb_count, rcv->sb_timeo, rcv, count);
+ count++;
+ err = sbwait(rcv);
+ }
+ ubuf_pending = t3_ddp_ubuf_pending(toep);
+ }
+ p->cancel_ubuf = 0;
+ p->user_ddp_pending = 0;
+
+}
+
+#define OVERLAY_MASK (V_TF_DDP_PSH_NO_INVALIDATE0(1) | \
+ V_TF_DDP_PSH_NO_INVALIDATE1(1) | \
+ V_TF_DDP_BUF1_FLUSH(1) | \
+ V_TF_DDP_BUF0_FLUSH(1) | \
+ V_TF_DDP_PUSH_DISABLE_1(1) | \
+ V_TF_DDP_PUSH_DISABLE_0(1) | \
+ V_TF_DDP_INDICATE_OUT(1))
+
+/*
+ * Post a user buffer as an overlay on top of the current kernel buffer.
+ */
+int
+t3_overlay_ubuf(struct toepcb *toep, struct sockbuf *rcv,
+ const struct uio *uio, int nonblock, int rcv_flags,
+ int modulate, int post_kbuf)
+{
+ int err, len, ubuf_idx;
+ unsigned long flags;
+ struct ddp_state *p = &toep->tp_ddp_state;
+
+ if (p->kbuf[0] == NULL) {
+ return (EINVAL);
+ }
+ sockbuf_unlock(rcv);
+ err = setup_uio_ppods(toep, uio, 0, &len);
+ sockbuf_lock(rcv);
+ if (err)
+ return (err);
+
+ if ((rcv->sb_state & SBS_CANTRCVMORE) ||
+ (toep->tp_tp->t_flags & TF_TOE) == 0)
+ return (EINVAL);
+
+ ubuf_idx = p->kbuf_idx;
+ p->buf_state[ubuf_idx].flags = DDP_BF_NOFLIP;
+ /* Use existing offset */
+ /* Don't need to update .gl, user buffer isn't copied. */
+ p->cur_buf = ubuf_idx;
+
+ flags = select_ddp_flags(toep, ubuf_idx, nonblock, rcv_flags);
+
+ if (post_kbuf) {
+ struct ddp_buf_state *dbs = &p->buf_state[ubuf_idx ^ 1];
+
+ dbs->cur_offset = 0;
+ dbs->flags = 0;
+ dbs->gl = p->kbuf[ubuf_idx ^ 1];
+ p->kbuf_idx ^= 1;
+ flags |= p->kbuf_idx ?
+ V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_PUSH_DISABLE_1(0) :
+ V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_PUSH_DISABLE_0(0);
+ }
+
+ if (ubuf_idx == 0) {
+ t3_overlay_ddpbuf(toep, 0, p->ubuf_tag << 6, p->kbuf_tag[1] << 6,
+ len);
+ t3_setup_ddpbufs(toep, 0, 0, p->kbuf[1]->dgl_length, 0,
+ flags,
+ OVERLAY_MASK | flags, 1);
+ } else {
+ t3_overlay_ddpbuf(toep, 1, p->kbuf_tag[0] << 6, p->ubuf_tag << 6,
+ len);
+ t3_setup_ddpbufs(toep, p->kbuf[0]->dgl_length, 0, 0, 0,
+ flags,
+ OVERLAY_MASK | flags, 1);
+ }
+#ifdef T3_TRACE
+ T3_TRACE5(TIDTB(so),
+ "t3_overlay_ubuf: tag %u flags 0x%x mask 0x%x ubuf_idx %d "
+ " kbuf_idx %d",
+ p->ubuf_tag, flags, OVERLAY_MASK, ubuf_idx, p->kbuf_idx);
+#endif
+ CTR3(KTR_TOM,
+ "t3_overlay_ubuf: tag %u flags 0x%x mask 0x%x",
+ p->ubuf_tag, flags, OVERLAY_MASK);
+ CTR3(KTR_TOM,
+ "t3_overlay_ubuf: ubuf_idx %d kbuf_idx %d post_kbuf %d",
+ ubuf_idx, p->kbuf_idx, post_kbuf);
+
+ return (0);
+}
+
+/*
+ * Clean up DDP state that needs to survive until socket close time, such as the
+ * DDP buffers. The buffers are already unmapped at this point as unmapping
+ * needs the PCI device and a socket may close long after the device is removed.
+ */
+void
+t3_cleanup_ddp(struct toepcb *toep)
+{
+ struct ddp_state *p = &toep->tp_ddp_state;
+ int idx;
+
+ for (idx = 0; idx < NUM_DDP_KBUF; idx++)
+ if (p->kbuf[idx]) {
+ ddp_gl_free_pages(p->kbuf[idx], 0);
+ free(p->kbuf[idx], M_DEVBUF);
+ }
+ if (p->ubuf) {
+ ddp_gl_free_pages(p->ubuf, 0);
+ free(p->ubuf, M_DEVBUF);
+ p->ubuf = NULL;
+ }
+ toep->tp_ulp_mode = 0;
+}
+
+/*
+ * This is a companion to t3_cleanup_ddp() and releases the HW resources
+ * associated with a connection's DDP state, such as the page pods.
+ * It's called when HW is done with a connection. The rest of the state
+ * remains available until both HW and the app are done with the connection.
+ */
+void
+t3_release_ddp_resources(struct toepcb *toep)
+{
+ struct ddp_state *p = &toep->tp_ddp_state;
+ struct tom_data *d = TOM_DATA(toep->tp_toedev);
+ int idx;
+
+ for (idx = 0; idx < NUM_DDP_KBUF; idx++) {
+ t3_free_ppods(d, p->kbuf_tag[idx],
+ p->kbuf_nppods[idx]);
+ unmap_ddp_gl(p->kbuf[idx]);
+ }
+
+ if (p->ubuf_nppods) {
+ t3_free_ppods(d, p->ubuf_tag, p->ubuf_nppods);
+ p->ubuf_nppods = 0;
+ }
+ if (p->ubuf)
+ unmap_ddp_gl(p->ubuf);
+
+}
+
+void
+t3_post_kbuf(struct toepcb *toep, int modulate, int nonblock)
+{
+ struct ddp_state *p = &toep->tp_ddp_state;
+
+ t3_set_ddp_tag(toep, p->cur_buf, p->kbuf_tag[p->cur_buf] << 6);
+ t3_set_ddp_buf(toep, p->cur_buf, 0, p->kbuf[p->cur_buf]->dgl_length);
+ t3_repost_kbuf(toep, p->cur_buf, modulate, 1, nonblock);
+#ifdef T3_TRACE
+ T3_TRACE1(TIDTB(so),
+ "t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf);
+#endif
+ CTR1(KTR_TOM,
+ "t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf);
+}
+
+/*
+ * Prepare a socket for DDP. Must be called when the socket is known to be
+ * open.
+ */
+int
+t3_enter_ddp(struct toepcb *toep, unsigned int kbuf_size, unsigned int waitall, int nonblock)
+{
+ int i, err = ENOMEM;
+ static vm_pindex_t color;
+ unsigned int nppods, kbuf_pages, idx = 0;
+ struct ddp_state *p = &toep->tp_ddp_state;
+ struct tom_data *d = TOM_DATA(toep->tp_toedev);
+
+
+ if (kbuf_size > M_TCB_RX_DDP_BUF0_LEN)
+ return (EINVAL);
+
+#ifdef notyet
+ SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+#endif
+ kbuf_pages = (kbuf_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ nppods = pages2ppods(kbuf_pages);
+
+ p->kbuf_noinval = !!waitall;
+ p->kbuf_tag[NUM_DDP_KBUF - 1] = -1;
+ for (idx = 0; idx < NUM_DDP_KBUF; idx++) {
+ p->kbuf[idx] =
+ malloc(sizeof (struct ddp_gather_list) + kbuf_pages *
+ sizeof(vm_page_t *), M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (p->kbuf[idx] == NULL)
+ goto err;
+ err = t3_alloc_ppods(d, nppods, &p->kbuf_tag[idx]);
+ if (err) {
+ printf("t3_alloc_ppods failed err=%d\n", err);
+ goto err;
+ }
+
+ p->kbuf_nppods[idx] = nppods;
+ p->kbuf[idx]->dgl_length = kbuf_size;
+ p->kbuf[idx]->dgl_offset = 0;
+ p->kbuf[idx]->dgl_nelem = kbuf_pages;
+
+ for (i = 0; i < kbuf_pages; ++i) {
+ p->kbuf[idx]->dgl_pages[i] = vm_page_alloc(NULL, color,
+ VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED |
+ VM_ALLOC_ZERO);
+ if (p->kbuf[idx]->dgl_pages[i] == NULL) {
+ p->kbuf[idx]->dgl_nelem = i;
+ printf("failed to allocate kbuf pages\n");
+ goto err;
+ }
+ }
+#ifdef NEED_BUSDMA
+ /*
+ * XXX we'll need this for VT-d or any platform with an iommu :-/
+ *
+ */
+ for (i = 0; i < kbuf_pages; ++i)
+ p->kbuf[idx]->phys_addr[i] =
+ pci_map_page(p->pdev, p->kbuf[idx]->pages[i],
+ 0, PAGE_SIZE, PCI_DMA_FROMDEVICE);
+#endif
+ t3_setup_ppods(toep, p->kbuf[idx], nppods, p->kbuf_tag[idx],
+ p->kbuf[idx]->dgl_length, 0, 0);
+ }
+ cxgb_log_tcb(TOEP_T3C_DEV(toep)->adapter, toep->tp_tid);
+
+ t3_set_ddp_tag(toep, 0, p->kbuf_tag[0] << 6);
+ t3_set_ddp_buf(toep, 0, 0, p->kbuf[0]->dgl_length);
+ t3_repost_kbuf(toep, 0, 0, 1, nonblock);
+
+ t3_set_rcv_coalesce_enable(toep,
+ TOM_TUNABLE(toep->tp_toedev, ddp_rcvcoalesce));
+ t3_set_dack_mss(toep, TOM_TUNABLE(toep->tp_toedev, delack)>>1);
+
+#ifdef T3_TRACE
+ T3_TRACE4(TIDTB(so),
+ "t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d",
+ kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]);
+#endif
+ CTR4(KTR_TOM,
+ "t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d",
+ kbuf_size, waitall, p->kbuf_tag[0], p->kbuf_tag[1]);
+ cxgb_log_tcb(TOEP_T3C_DEV(toep)->adapter, toep->tp_tid);
+ return (0);
+
+err:
+ t3_release_ddp_resources(toep);
+ t3_cleanup_ddp(toep);
+ return (err);
+}
+
+int
+t3_ddp_copy(const struct mbuf *m, int offset, struct uio *uio, int len)
+{
+ int resid_init, err;
+ struct ddp_gather_list *gl = (struct ddp_gather_list *)m->m_ddp_gl;
+
+ resid_init = uio->uio_resid;
+
+ if (!gl->dgl_pages)
+ panic("pages not set\n");
+
+ CTR4(KTR_TOM, "t3_ddp_copy: offset=%d dgl_offset=%d cur_offset=%d len=%d",
+ offset, gl->dgl_offset, m->m_cur_offset, len);
+ offset += gl->dgl_offset + m->m_cur_offset;
+ KASSERT(len <= gl->dgl_length,
+ ("len=%d > dgl_length=%d in ddp_copy\n", len, gl->dgl_length));
+
+
+ err = uiomove_fromphys(gl->dgl_pages, offset, len, uio);
+ return (err);
+}
+
+
+/*
+ * Allocate n page pods. Returns -1 on failure or the page pod tag.
+ */
+int
+t3_alloc_ppods(struct tom_data *td, unsigned int n, int *ptag)
+{
+ unsigned int i, j;
+
+ if (__predict_false(!td->ppod_map)) {
+ printf("ppod_map not set\n");
+ return (EINVAL);
+ }
+
+ mtx_lock(&td->ppod_map_lock);
+ for (i = 0; i < td->nppods; ) {
+
+ for (j = 0; j < n; ++j) /* scan ppod_map[i..i+n-1] */
+ if (td->ppod_map[i + j]) {
+ i = i + j + 1;
+ goto next;
+ }
+ memset(&td->ppod_map[i], 1, n); /* allocate range */
+ mtx_unlock(&td->ppod_map_lock);
+ CTR2(KTR_TOM,
+ "t3_alloc_ppods: n=%u tag=%u", n, i);
+ *ptag = i;
+ return (0);
+ next: ;
+ }
+ mtx_unlock(&td->ppod_map_lock);
+ return (0);
+}
+
+void
+t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n)
+{
+ /* No need to take ppod_lock here */
+ memset(&td->ppod_map[tag], 0, n);
+}
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_defs.h b/sys/dev/cxgb/ulp/tom/cxgb_defs.h
new file mode 100644
index 0000000000000..8c14f5ae89c87
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_defs.h
@@ -0,0 +1,90 @@
+
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+
+$FreeBSD$
+
+***************************************************************************/
+#ifndef CXGB_DEFS_H_
+#define CXGB_DEFS_H_
+
+#define VALIDATE_TID 0
+
+#define TOEPCB(so) ((struct toepcb *)(sototcpcb((so))->t_toe))
+#define TOE_DEV(so) (TOEPCB((so))->tp_toedev)
+#define toeptoso(toep) ((toep)->tp_tp->t_inpcb->inp_socket)
+#define sototoep(so) (sototcpcb((so))->t_toe)
+
+#define TRACE_ENTER printf("%s:%s entered\n", __FUNCTION__, __FILE__)
+#define TRACE_EXIT printf("%s:%s:%d exited\n", __FUNCTION__, __FILE__, __LINE__)
+
+#define KTR_TOM KTR_SPARE2
+#define KTR_TCB KTR_SPARE3
+
+struct toepcb;
+struct listen_ctx;
+
+typedef void (*defer_handler_t)(struct toedev *dev, struct mbuf *m);
+
+void t3tom_register_cpl_handler(unsigned int opcode, cxgb_cpl_handler_func h);
+void t3_listen_start(struct toedev *dev, struct socket *so, struct t3cdev *cdev);
+void t3_listen_stop(struct toedev *dev, struct socket *so, struct t3cdev *cdev);
+int t3_push_frames(struct socket *so, int req_completion);
+int t3_connect(struct toedev *tdev, struct socket *so, struct rtentry *rt,
+ struct sockaddr *nam);
+void t3_init_listen_cpl_handlers(void);
+int t3_init_cpl_io(void);
+void t3_init_wr_tab(unsigned int wr_len);
+uint32_t t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail);
+void t3_send_rx_modulate(struct toepcb *toep);
+void t3_cleanup_rbuf(struct tcpcb *tp, int copied);
+
+void t3_init_socket_ops(void);
+void t3_install_socket_ops(struct socket *so);
+
+
+void t3_disconnect_acceptq(struct socket *listen_so);
+void t3_reset_synq(struct listen_ctx *ctx);
+void t3_defer_reply(struct mbuf *m, struct toedev *dev, defer_handler_t handler);
+
+struct toepcb *toepcb_alloc(void);
+void toepcb_hold(struct toepcb *);
+void toepcb_release(struct toepcb *);
+void toepcb_init(struct toepcb *);
+
+void t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off);
+void t3_set_dack_mss(struct toepcb *toep, int on);
+void t3_set_keepalive(struct toepcb *toep, int on_off);
+void t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag);
+void t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset,
+ unsigned int len);
+int t3_get_tcb(struct toepcb *toep);
+
+int t3_ctloutput(struct socket *so, struct sockopt *sopt);
+
+#endif
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_l2t.c b/sys/dev/cxgb/ulp/tom/cxgb_l2t.c
new file mode 100644
index 0000000000000..ab5fbe740114b
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_l2t.c
@@ -0,0 +1,542 @@
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/bus.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#if __FreeBSD_version > 700000
+#include <sys/rwlock.h>
+#endif
+
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <net/if.h>
+#include <net/ethernet.h>
+#include <net/if_vlan_var.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+
+#ifdef CONFIG_DEFINED
+#include <cxgb_include.h>
+#else
+#include <dev/cxgb/cxgb_include.h>
+#endif
+
+#define VLAN_NONE 0xfff
+#define SDL(s) ((struct sockaddr_dl *)s)
+#define RT_ENADDR(sa) ((u_char *)LLADDR(SDL((sa))))
+#define rt_expire rt_rmx.rmx_expire
+
+struct llinfo_arp {
+ struct callout la_timer;
+ struct rtentry *la_rt;
+ struct mbuf *la_hold; /* last packet until resolved/timeout */
+ u_short la_preempt; /* countdown for pre-expiry arps */
+ u_short la_asked; /* # requests sent */
+};
+
+/*
+ * Module locking notes: There is a RW lock protecting the L2 table as a
+ * whole plus a spinlock per L2T entry. Entry lookups and allocations happen
+ * under the protection of the table lock, individual entry changes happen
+ * while holding that entry's spinlock. The table lock nests outside the
+ * entry locks. Allocations of new entries take the table lock as writers so
+ * no other lookups can happen while allocating new entries. Entry updates
+ * take the table lock as readers so multiple entries can be updated in
+ * parallel. An L2T entry can be dropped by decrementing its reference count
+ * and therefore can happen in parallel with entry allocation but no entry
+ * can change state or increment its ref count during allocation as both of
+ * these perform lookups.
+ */
+
+static inline unsigned int
+vlan_prio(const struct l2t_entry *e)
+{
+ return e->vlan >> 13;
+}
+
+static inline unsigned int
+arp_hash(u32 key, int ifindex, const struct l2t_data *d)
+{
+ return jhash_2words(key, ifindex, 0) & (d->nentries - 1);
+}
+
+static inline void
+neigh_replace(struct l2t_entry *e, struct rtentry *rt)
+{
+ RT_LOCK(rt);
+ RT_ADDREF(rt);
+ RT_UNLOCK(rt);
+
+ if (e->neigh)
+ RTFREE(e->neigh);
+ e->neigh = rt;
+}
+
+/*
+ * Set up an L2T entry and send any packets waiting in the arp queue. The
+ * supplied mbuf is used for the CPL_L2T_WRITE_REQ. Must be called with the
+ * entry locked.
+ */
+static int
+setup_l2e_send_pending(struct t3cdev *dev, struct mbuf *m,
+ struct l2t_entry *e)
+{
+ struct cpl_l2t_write_req *req;
+
+ if (!m) {
+ if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
+ return (ENOMEM);
+ }
+ /*
+ * XXX MH_ALIGN
+ */
+ req = mtod(m, struct cpl_l2t_write_req *);
+ m->m_pkthdr.len = m->m_len = sizeof(*req);
+
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx));
+ req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) |
+ V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) |
+ V_L2T_W_PRIO(vlan_prio(e)));
+
+ memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
+ m_set_priority(m, CPL_PRIORITY_CONTROL);
+ cxgb_ofld_send(dev, m);
+ while (e->arpq_head) {
+ m = e->arpq_head;
+ e->arpq_head = m->m_next;
+ m->m_next = NULL;
+ cxgb_ofld_send(dev, m);
+ }
+ e->arpq_tail = NULL;
+ e->state = L2T_STATE_VALID;
+
+ return 0;
+}
+
+/*
+ * Add a packet to the an L2T entry's queue of packets awaiting resolution.
+ * Must be called with the entry's lock held.
+ */
+static inline void
+arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
+{
+ m->m_next = NULL;
+ if (e->arpq_head)
+ e->arpq_tail->m_next = m;
+ else
+ e->arpq_head = m;
+ e->arpq_tail = m;
+}
+
+int
+t3_l2t_send_slow(struct t3cdev *dev, struct mbuf *m, struct l2t_entry *e)
+{
+ struct rtentry *rt = e->neigh;
+ struct sockaddr_in sin;
+
+ bzero(&sin, sizeof(struct sockaddr_in));
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(struct sockaddr_in);
+ sin.sin_addr.s_addr = e->addr;
+
+ CTR2(KTR_CXGB, "send slow on rt=%p eaddr=0x%08x\n", rt, e->addr);
+again:
+ switch (e->state) {
+ case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
+ arpresolve(rt->rt_ifp, rt, NULL,
+ (struct sockaddr *)&sin, e->dmac);
+ mtx_lock(&e->lock);
+ if (e->state == L2T_STATE_STALE)
+ e->state = L2T_STATE_VALID;
+ mtx_unlock(&e->lock);
+ case L2T_STATE_VALID: /* fast-path, send the packet on */
+ return cxgb_ofld_send(dev, m);
+ case L2T_STATE_RESOLVING:
+ mtx_lock(&e->lock);
+ if (e->state != L2T_STATE_RESOLVING) { // ARP already completed
+ mtx_unlock(&e->lock);
+ goto again;
+ }
+ arpq_enqueue(e, m);
+ mtx_unlock(&e->lock);
+ /*
+ * Only the first packet added to the arpq should kick off
+ * resolution. However, because the m_gethdr below can fail,
+ * we allow each packet added to the arpq to retry resolution
+ * as a way of recovering from transient memory exhaustion.
+ * A better way would be to use a work request to retry L2T
+ * entries when there's no memory.
+ */
+ if (arpresolve(rt->rt_ifp, rt, NULL,
+ (struct sockaddr *)&sin, e->dmac) == 0) {
+ CTR6(KTR_CXGB, "mac=%x:%x:%x:%x:%x:%x\n",
+ e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]);
+
+ if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
+ return (ENOMEM);
+
+ mtx_lock(&e->lock);
+ if (e->arpq_head)
+ setup_l2e_send_pending(dev, m, e);
+ else
+ m_freem(m);
+ mtx_unlock(&e->lock);
+ }
+ }
+ return 0;
+}
+
+void
+t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e)
+{
+ struct rtentry *rt;
+ struct mbuf *m0;
+ struct sockaddr_in sin;
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(struct sockaddr_in);
+ sin.sin_addr.s_addr = e->addr;
+
+ if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
+ return;
+
+ rt = e->neigh;
+again:
+ switch (e->state) {
+ case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
+ arpresolve(rt->rt_ifp, rt, NULL,
+ (struct sockaddr *)&sin, e->dmac);
+ mtx_lock(&e->lock);
+ if (e->state == L2T_STATE_STALE) {
+ e->state = L2T_STATE_VALID;
+ }
+ mtx_unlock(&e->lock);
+ return;
+ case L2T_STATE_VALID: /* fast-path, send the packet on */
+ return;
+ case L2T_STATE_RESOLVING:
+ mtx_lock(&e->lock);
+ if (e->state != L2T_STATE_RESOLVING) { // ARP already completed
+ mtx_unlock(&e->lock);
+ goto again;
+ }
+ mtx_unlock(&e->lock);
+
+ /*
+ * Only the first packet added to the arpq should kick off
+ * resolution. However, because the alloc_skb below can fail,
+ * we allow each packet added to the arpq to retry resolution
+ * as a way of recovering from transient memory exhaustion.
+ * A better way would be to use a work request to retry L2T
+ * entries when there's no memory.
+ */
+ arpresolve(rt->rt_ifp, rt, NULL,
+ (struct sockaddr *)&sin, e->dmac);
+
+ }
+ return;
+}
+/*
+ * Allocate a free L2T entry. Must be called with l2t_data.lock held.
+ */
+static struct l2t_entry *
+alloc_l2e(struct l2t_data *d)
+{
+ struct l2t_entry *end, *e, **p;
+
+ if (!atomic_load_acq_int(&d->nfree))
+ return NULL;
+
+ /* there's definitely a free entry */
+ for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e)
+ if (atomic_load_acq_int(&e->refcnt) == 0)
+ goto found;
+
+ for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e) ;
+found:
+ d->rover = e + 1;
+ atomic_add_int(&d->nfree, -1);
+
+ /*
+ * The entry we found may be an inactive entry that is
+ * presently in the hash table. We need to remove it.
+ */
+ if (e->state != L2T_STATE_UNUSED) {
+ int hash = arp_hash(e->addr, e->ifindex, d);
+
+ for (p = &d->l2tab[hash].first; *p; p = &(*p)->next)
+ if (*p == e) {
+ *p = e->next;
+ break;
+ }
+ e->state = L2T_STATE_UNUSED;
+ }
+
+ return e;
+}
+
+/*
+ * Called when an L2T entry has no more users. The entry is left in the hash
+ * table since it is likely to be reused but we also bump nfree to indicate
+ * that the entry can be reallocated for a different neighbor. We also drop
+ * the existing neighbor reference in case the neighbor is going away and is
+ * waiting on our reference.
+ *
+ * Because entries can be reallocated to other neighbors once their ref count
+ * drops to 0 we need to take the entry's lock to avoid races with a new
+ * incarnation.
+ */
+void
+t3_l2e_free(struct l2t_data *d, struct l2t_entry *e)
+{
+ struct rtentry *rt = NULL;
+
+ mtx_lock(&e->lock);
+ if (atomic_load_acq_int(&e->refcnt) == 0) { /* hasn't been recycled */
+ rt = e->neigh;
+ e->neigh = NULL;
+ }
+
+ mtx_unlock(&e->lock);
+ atomic_add_int(&d->nfree, 1);
+ if (rt)
+ RTFREE(rt);
+}
+
+
+/*
+ * Update an L2T entry that was previously used for the same next hop as neigh.
+ * Must be called with softirqs disabled.
+ */
+static inline void
+reuse_entry(struct l2t_entry *e, struct rtentry *neigh)
+{
+ struct llinfo_arp *la;
+
+ la = (struct llinfo_arp *)neigh->rt_llinfo;
+
+ mtx_lock(&e->lock); /* avoid race with t3_l2t_free */
+ if (neigh != e->neigh)
+ neigh_replace(e, neigh);
+
+ if (memcmp(e->dmac, RT_ENADDR(neigh->rt_gateway), sizeof(e->dmac)) ||
+ (neigh->rt_expire > time_uptime))
+ e->state = L2T_STATE_RESOLVING;
+ else if (la->la_hold == NULL)
+ e->state = L2T_STATE_VALID;
+ else
+ e->state = L2T_STATE_STALE;
+ mtx_unlock(&e->lock);
+}
+
+struct l2t_entry *
+t3_l2t_get(struct t3cdev *dev, struct rtentry *neigh, struct ifnet *ifp,
+ struct sockaddr *sa)
+{
+ struct l2t_entry *e;
+ struct l2t_data *d = L2DATA(dev);
+ u32 addr = ((struct sockaddr_in *)sa)->sin_addr.s_addr;
+ int ifidx = neigh->rt_ifp->if_index;
+ int hash = arp_hash(addr, ifidx, d);
+ unsigned int smt_idx = ((struct port_info *)ifp->if_softc)->port_id;
+
+ rw_wlock(&d->lock);
+ for (e = d->l2tab[hash].first; e; e = e->next)
+ if (e->addr == addr && e->ifindex == ifidx &&
+ e->smt_idx == smt_idx) {
+ l2t_hold(d, e);
+ if (atomic_load_acq_int(&e->refcnt) == 1)
+ reuse_entry(e, neigh);
+ goto done;
+ }
+
+ /* Need to allocate a new entry */
+ e = alloc_l2e(d);
+ if (e) {
+ mtx_lock(&e->lock); /* avoid race with t3_l2t_free */
+ e->next = d->l2tab[hash].first;
+ d->l2tab[hash].first = e;
+ rw_wunlock(&d->lock);
+
+ e->state = L2T_STATE_RESOLVING;
+ e->addr = addr;
+ e->ifindex = ifidx;
+ e->smt_idx = smt_idx;
+ atomic_store_rel_int(&e->refcnt, 1);
+ e->neigh = NULL;
+
+
+ neigh_replace(e, neigh);
+#ifdef notyet
+ /*
+ * XXX need to add accessor function for vlan tag
+ */
+ if (neigh->rt_ifp->if_vlantrunk)
+ e->vlan = VLAN_DEV_INFO(neigh->dev)->vlan_id;
+ else
+#endif
+ e->vlan = VLAN_NONE;
+ mtx_unlock(&e->lock);
+
+ return (e);
+ }
+
+done:
+ rw_wunlock(&d->lock);
+ return e;
+}
+
+/*
+ * Called when address resolution fails for an L2T entry to handle packets
+ * on the arpq head. If a packet specifies a failure handler it is invoked,
+ * otherwise the packets is sent to the TOE.
+ *
+ * XXX: maybe we should abandon the latter behavior and just require a failure
+ * handler.
+ */
+static void
+handle_failed_resolution(struct t3cdev *dev, struct mbuf *arpq)
+{
+
+ while (arpq) {
+ struct mbuf *m = arpq;
+#ifdef notyet
+ struct l2t_mbuf_cb *cb = L2T_MBUF_CB(m);
+#endif
+ arpq = m->m_next;
+ m->m_next = NULL;
+#ifdef notyet
+ if (cb->arp_failure_handler)
+ cb->arp_failure_handler(dev, m);
+ else
+#endif
+ cxgb_ofld_send(dev, m);
+ }
+
+}
+
+void
+t3_l2t_update(struct t3cdev *dev, struct rtentry *neigh,
+ uint8_t *enaddr, struct sockaddr *sa)
+{
+ struct l2t_entry *e;
+ struct mbuf *arpq = NULL;
+ struct l2t_data *d = L2DATA(dev);
+ u32 addr = *(u32 *) &((struct sockaddr_in *)sa)->sin_addr;
+ int ifidx = neigh->rt_ifp->if_index;
+ int hash = arp_hash(addr, ifidx, d);
+ struct llinfo_arp *la;
+
+ rw_rlock(&d->lock);
+ for (e = d->l2tab[hash].first; e; e = e->next)
+ if (e->addr == addr && e->ifindex == ifidx) {
+ mtx_lock(&e->lock);
+ goto found;
+ }
+ rw_runlock(&d->lock);
+ CTR1(KTR_CXGB, "t3_l2t_update: addr=0x%08x not found", addr);
+ return;
+
+found:
+ printf("found 0x%08x\n", addr);
+
+ rw_runlock(&d->lock);
+ memcpy(e->dmac, enaddr, ETHER_ADDR_LEN);
+ printf("mac=%x:%x:%x:%x:%x:%x\n",
+ e->dmac[0], e->dmac[1], e->dmac[2], e->dmac[3], e->dmac[4], e->dmac[5]);
+
+ if (atomic_load_acq_int(&e->refcnt)) {
+ if (neigh != e->neigh)
+ neigh_replace(e, neigh);
+
+ la = (struct llinfo_arp *)neigh->rt_llinfo;
+ if (e->state == L2T_STATE_RESOLVING) {
+
+ if (la->la_asked >= 5 /* arp_maxtries */) {
+ arpq = e->arpq_head;
+ e->arpq_head = e->arpq_tail = NULL;
+ } else
+ setup_l2e_send_pending(dev, NULL, e);
+ } else {
+ e->state = L2T_STATE_VALID;
+ if (memcmp(e->dmac, RT_ENADDR(neigh->rt_gateway), 6))
+ setup_l2e_send_pending(dev, NULL, e);
+ }
+ }
+ mtx_unlock(&e->lock);
+
+ if (arpq)
+ handle_failed_resolution(dev, arpq);
+}
+
+struct l2t_data *
+t3_init_l2t(unsigned int l2t_capacity)
+{
+ struct l2t_data *d;
+ int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry);
+
+ d = cxgb_alloc_mem(size);
+ if (!d)
+ return NULL;
+
+ d->nentries = l2t_capacity;
+ d->rover = &d->l2tab[1]; /* entry 0 is not used */
+ atomic_store_rel_int(&d->nfree, l2t_capacity - 1);
+ rw_init(&d->lock, "L2T");
+
+ for (i = 0; i < l2t_capacity; ++i) {
+ d->l2tab[i].idx = i;
+ d->l2tab[i].state = L2T_STATE_UNUSED;
+ mtx_init(&d->l2tab[i].lock, "L2TAB", NULL, MTX_DEF);
+ atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
+ }
+ return d;
+}
+
+void
+t3_free_l2t(struct l2t_data *d)
+{
+ int i;
+
+ rw_destroy(&d->lock);
+ for (i = 0; i < d->nentries; ++i)
+ mtx_destroy(&d->l2tab[i].lock);
+
+ cxgb_free_mem(d);
+}
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_l2t.h b/sys/dev/cxgb/ulp/tom/cxgb_l2t.h
new file mode 100644
index 0000000000000..3575f6fa98b14
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_l2t.h
@@ -0,0 +1,161 @@
+/**************************************************************************
+
+Copyright (c) 2007-2008, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+$FreeBSD$
+
+***************************************************************************/
+#ifndef _CHELSIO_L2T_H
+#define _CHELSIO_L2T_H
+
+#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
+#include <sys/lock.h>
+
+#if __FreeBSD_version > 700000
+#include <sys/rwlock.h>
+#else
+#define rwlock mtx
+#define rw_wlock(x) mtx_lock((x))
+#define rw_wunlock(x) mtx_unlock((x))
+#define rw_rlock(x) mtx_lock((x))
+#define rw_runlock(x) mtx_unlock((x))
+#define rw_init(x, str) mtx_init((x), (str), NULL, MTX_DEF)
+#define rw_destroy(x) mtx_destroy((x))
+#endif
+
+enum {
+ L2T_STATE_VALID, /* entry is up to date */
+ L2T_STATE_STALE, /* entry may be used but needs revalidation */
+ L2T_STATE_RESOLVING, /* entry needs address resolution */
+ L2T_STATE_UNUSED /* entry not in use */
+};
+
+/*
+ * Each L2T entry plays multiple roles. First of all, it keeps state for the
+ * corresponding entry of the HW L2 table and maintains a queue of offload
+ * packets awaiting address resolution. Second, it is a node of a hash table
+ * chain, where the nodes of the chain are linked together through their next
+ * pointer. Finally, each node is a bucket of a hash table, pointing to the
+ * first element in its chain through its first pointer.
+ */
+struct l2t_entry {
+ uint16_t state; /* entry state */
+ uint16_t idx; /* entry index */
+ uint32_t addr; /* dest IP address */
+ int ifindex; /* neighbor's net_device's ifindex */
+ uint16_t smt_idx; /* SMT index */
+ uint16_t vlan; /* VLAN TCI (id: bits 0-11, prio: 13-15 */
+ struct rtentry *neigh; /* associated neighbour */
+ struct l2t_entry *first; /* start of hash chain */
+ struct l2t_entry *next; /* next l2t_entry on chain */
+ struct mbuf *arpq_head; /* queue of packets awaiting resolution */
+ struct mbuf *arpq_tail;
+ struct mtx lock;
+ volatile uint32_t refcnt; /* entry reference count */
+ uint8_t dmac[6]; /* neighbour's MAC address */
+};
+
+struct l2t_data {
+ unsigned int nentries; /* number of entries */
+ struct l2t_entry *rover; /* starting point for next allocation */
+ volatile uint32_t nfree; /* number of free entries */
+ struct rwlock lock;
+ struct l2t_entry l2tab[0];
+};
+
+typedef void (*arp_failure_handler_func)(struct t3cdev *dev,
+ struct mbuf *m);
+
+typedef void (*opaque_arp_failure_handler_func)(void *dev,
+ struct mbuf *m);
+
+/*
+ * Callback stored in an skb to handle address resolution failure.
+ */
+struct l2t_mbuf_cb {
+ arp_failure_handler_func arp_failure_handler;
+};
+
+/*
+ * XXX
+ */
+#define L2T_MBUF_CB(skb) ((struct l2t_mbuf_cb *)(skb)->cb)
+
+
+static __inline void set_arp_failure_handler(struct mbuf *m,
+ arp_failure_handler_func hnd)
+{
+ m->m_pkthdr.header = (opaque_arp_failure_handler_func)hnd;
+
+}
+
+/*
+ * Getting to the L2 data from an offload device.
+ */
+#define L2DATA(dev) ((dev)->l2opt)
+
+void t3_l2e_free(struct l2t_data *d, struct l2t_entry *e);
+void t3_l2t_update(struct t3cdev *dev, struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa);
+struct l2t_entry *t3_l2t_get(struct t3cdev *dev, struct rtentry *neigh,
+ struct ifnet *ifp, struct sockaddr *sa);
+int t3_l2t_send_slow(struct t3cdev *dev, struct mbuf *m,
+ struct l2t_entry *e);
+void t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e);
+struct l2t_data *t3_init_l2t(unsigned int l2t_capacity);
+void t3_free_l2t(struct l2t_data *d);
+
+#ifdef CONFIG_PROC_FS
+int t3_l2t_proc_setup(struct proc_dir_entry *dir, struct l2t_data *d);
+void t3_l2t_proc_free(struct proc_dir_entry *dir);
+#else
+#define l2t_proc_setup(dir, d) 0
+#define l2t_proc_free(dir)
+#endif
+
+int cxgb_ofld_send(struct t3cdev *dev, struct mbuf *m);
+
+static inline int l2t_send(struct t3cdev *dev, struct mbuf *m,
+ struct l2t_entry *e)
+{
+ if (__predict_true(e->state == L2T_STATE_VALID)) {
+ return cxgb_ofld_send(dev, (struct mbuf *)m);
+ }
+ return t3_l2t_send_slow(dev, (struct mbuf *)m, e);
+}
+
+static inline void l2t_release(struct l2t_data *d, struct l2t_entry *e)
+{
+ if (atomic_fetchadd_int(&e->refcnt, -1) == 1)
+ t3_l2e_free(d, e);
+}
+
+static inline void l2t_hold(struct l2t_data *d, struct l2t_entry *e)
+{
+ if (atomic_fetchadd_int(&e->refcnt, 1) == 1) /* 0 -> 1 transition */
+ atomic_add_int(&d->nfree, 1);
+}
+
+#endif
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_listen.c b/sys/dev/cxgb/ulp/tom/cxgb_listen.c
new file mode 100644
index 0000000000000..1d15cf292dcd3
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_listen.c
@@ -0,0 +1,338 @@
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/fcntl.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/syslog.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+
+
+#include <dev/cxgb/cxgb_osdep.h>
+#include <dev/cxgb/sys/mbufq.h>
+
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_fsm.h>
+
+#include <netinet/tcp_offload.h>
+#include <net/route.h>
+
+#include <dev/cxgb/t3cdev.h>
+#include <dev/cxgb/common/cxgb_firmware_exports.h>
+#include <dev/cxgb/common/cxgb_t3_cpl.h>
+#include <dev/cxgb/common/cxgb_tcb.h>
+#include <dev/cxgb/common/cxgb_ctl_defs.h>
+#include <dev/cxgb/cxgb_offload.h>
+#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
+#include <dev/cxgb/ulp/tom/cxgb_defs.h>
+#include <dev/cxgb/ulp/tom/cxgb_tom.h>
+#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
+#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
+
+
+static struct listen_info *listen_hash_add(struct tom_data *d, struct socket *so, unsigned int stid);
+static int listen_hash_del(struct tom_data *d, struct socket *so);
+
+/*
+ * Process a CPL_CLOSE_LISTSRV_RPL message. If the status is good we release
+ * the STID.
+ */
+static int
+do_close_server_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+{
+ struct cpl_close_listserv_rpl *rpl = cplhdr(m);
+ unsigned int stid = GET_TID(rpl);
+
+ if (rpl->status != CPL_ERR_NONE)
+ log(LOG_ERR, "Unexpected CLOSE_LISTSRV_RPL status %u for "
+ "STID %u\n", rpl->status, stid);
+ else {
+ struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
+
+ cxgb_free_stid(cdev, stid);
+ free(listen_ctx, M_CXGB);
+ }
+
+ return (CPL_RET_BUF_DONE);
+}
+
+/*
+ * Process a CPL_PASS_OPEN_RPL message. Remove the socket from the listen hash
+ * table and free the STID if there was any error, otherwise nothing to do.
+ */
+static int
+do_pass_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+{
+ struct cpl_pass_open_rpl *rpl = cplhdr(m);
+
+ if (rpl->status != CPL_ERR_NONE) {
+ int stid = GET_TID(rpl);
+ struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
+ struct tom_data *d = listen_ctx->tom_data;
+ struct socket *lso = listen_ctx->lso;
+
+#if VALIDATE_TID
+ if (!lso)
+ return (CPL_RET_UNKNOWN_TID | CPL_RET_BUF_DONE);
+#endif
+ /*
+ * Note: It is safe to unconditionally call listen_hash_del()
+ * at this point without risking unhashing a reincarnation of
+ * an already closed socket (i.e., there is no listen, close,
+ * listen, free the sock for the second listen while processing
+ * a message for the first race) because we are still holding
+ * a reference on the socket. It is possible that the unhash
+ * will fail because the socket is already closed, but we can't
+ * unhash the wrong socket because it is impossible for the
+ * socket to which this message refers to have reincarnated.
+ */
+ listen_hash_del(d, lso);
+ cxgb_free_stid(cdev, stid);
+#ifdef notyet
+ /*
+ * XXX need to unreference the inpcb
+ * but we have no way of knowing that other TOMs aren't referencing it
+ */
+ sock_put(lso);
+#endif
+ free(listen_ctx, M_CXGB);
+ }
+ return CPL_RET_BUF_DONE;
+}
+
+void
+t3_init_listen_cpl_handlers(void)
+{
+ t3tom_register_cpl_handler(CPL_PASS_OPEN_RPL, do_pass_open_rpl);
+ t3tom_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
+}
+
+static inline int
+listen_hashfn(const struct socket *so)
+{
+ return ((unsigned long)so >> 10) & (LISTEN_INFO_HASH_SIZE - 1);
+}
+
+/*
+ * Create and add a listen_info entry to the listen hash table. This and the
+ * listen hash table functions below cannot be called from softirqs.
+ */
+static struct listen_info *
+listen_hash_add(struct tom_data *d, struct socket *so, unsigned int stid)
+{
+ struct listen_info *p;
+
+ p = malloc(sizeof(*p), M_CXGB, M_NOWAIT|M_ZERO);
+ if (p) {
+ int bucket = listen_hashfn(so);
+
+ p->so = so; /* just a key, no need to take a reference */
+ p->stid = stid;
+ mtx_lock(&d->listen_lock);
+ p->next = d->listen_hash_tab[bucket];
+ d->listen_hash_tab[bucket] = p;
+ mtx_unlock(&d->listen_lock);
+ }
+ return p;
+}
+
+/*
+ * Given a pointer to a listening socket return its server TID by consulting
+ * the socket->stid map. Returns -1 if the socket is not in the map.
+ */
+static int
+listen_hash_find(struct tom_data *d, struct socket *so)
+{
+ int stid = -1, bucket = listen_hashfn(so);
+ struct listen_info *p;
+
+ mtx_lock(&d->listen_lock);
+ for (p = d->listen_hash_tab[bucket]; p; p = p->next)
+ if (p->so == so) {
+ stid = p->stid;
+ break;
+ }
+ mtx_unlock(&d->listen_lock);
+ return stid;
+}
+
+/*
+ * Delete the listen_info structure for a listening socket. Returns the server
+ * TID for the socket if it is present in the socket->stid map, or -1.
+ */
+static int
+listen_hash_del(struct tom_data *d, struct socket *so)
+{
+ int bucket, stid = -1;
+ struct listen_info *p, **prev;
+
+ bucket = listen_hashfn(so);
+ prev = &d->listen_hash_tab[bucket];
+
+ mtx_lock(&d->listen_lock);
+ for (p = *prev; p; prev = &p->next, p = p->next)
+ if (p->so == so) {
+ stid = p->stid;
+ *prev = p->next;
+ free(p, M_CXGB);
+ break;
+ }
+ mtx_unlock(&d->listen_lock);
+
+ return (stid);
+}
+
+/*
+ * Start a listening server by sending a passive open request to HW.
+ */
+void
+t3_listen_start(struct toedev *dev, struct socket *so, struct t3cdev *cdev)
+{
+ int stid;
+ struct mbuf *m;
+ struct cpl_pass_open_req *req;
+ struct tom_data *d = TOM_DATA(dev);
+ struct inpcb *inp = sotoinpcb(so);
+ struct listen_ctx *ctx;
+
+ if (!TOM_TUNABLE(dev, activated))
+ return;
+
+ if (listen_hash_find(d, so) != -1)
+ return;
+
+ CTR1(KTR_TOM, "start listen on port %u", ntohs(inp->inp_lport));
+ ctx = malloc(sizeof(*ctx), M_CXGB, M_NOWAIT|M_ZERO);
+
+ if (!ctx)
+ return;
+
+ ctx->tom_data = d;
+ ctx->lso = so;
+ ctx->ulp_mode = TOM_TUNABLE(dev, ddp) && !(so->so_options & SO_NO_DDP) ? ULP_MODE_TCPDDP : 0;
+ LIST_INIT(&ctx->synq_head);
+
+ stid = cxgb_alloc_stid(d->cdev, d->client, ctx);
+ if (stid < 0)
+ goto free_ctx;
+
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (m == NULL)
+ goto free_stid;
+ m->m_pkthdr.len = m->m_len = sizeof(*req);
+
+ if (!listen_hash_add(d, so, stid))
+ goto free_all;
+
+ req = mtod(m, struct cpl_pass_open_req *);
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, stid));
+ req->local_port = inp->inp_lport;
+ memcpy(&req->local_ip, &inp->inp_laddr, 4);
+ req->peer_port = 0;
+ req->peer_ip = 0;
+ req->peer_netmask = 0;
+ req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
+ req->opt0l = htonl(V_RCV_BUFSIZ(16));
+ req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
+
+ m_set_priority(m, CPL_PRIORITY_LISTEN);
+ cxgb_ofld_send(cdev, m);
+ return;
+
+free_all:
+ m_free(m);
+free_stid:
+ cxgb_free_stid(cdev, stid);
+#if 0
+ sock_put(sk);
+#endif
+free_ctx:
+ free(ctx, M_CXGB);
+}
+
+/*
+ * Stop a listening server by sending a close_listsvr request to HW.
+ * The server TID is freed when we get the reply.
+ */
+void
+t3_listen_stop(struct toedev *dev, struct socket *so, struct t3cdev *cdev)
+{
+ struct mbuf *m;
+ struct cpl_close_listserv_req *req;
+ struct listen_ctx *lctx;
+ int stid = listen_hash_del(TOM_DATA(dev), so);
+
+ if (stid < 0)
+ return;
+
+ lctx = cxgb_get_lctx(cdev, stid);
+ /*
+ * Do this early so embryonic connections are marked as being aborted
+ * while the stid is still open. This ensures pass_establish messages
+ * that arrive while we are closing the server will be able to locate
+ * the listening socket.
+ */
+ t3_reset_synq(lctx);
+
+ /* Send the close ASAP to stop further passive opens */
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (m == NULL) {
+ /*
+ * XXX allocate from lowmem cache
+ */
+ }
+ m->m_pkthdr.len = m->m_len = sizeof(*req);
+
+ req = mtod(m, struct cpl_close_listserv_req *);
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, stid));
+ req->cpu_idx = 0;
+ m_set_priority(m, CPL_PRIORITY_LISTEN);
+ cxgb_ofld_send(cdev, m);
+
+ t3_disconnect_acceptq(so);
+}
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h b/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h
new file mode 100644
index 0000000000000..2cbfa7b38b28f
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h
@@ -0,0 +1,181 @@
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+
+$FreeBSD$
+
+***************************************************************************/
+
+#ifndef T3_DDP_H
+#define T3_DDP_H
+
+/* Should be 1 or 2 indicating single or double kernel buffers. */
+#define NUM_DDP_KBUF 2
+
+/* min receive window for a connection to be considered for DDP */
+#define MIN_DDP_RCV_WIN (48 << 10)
+
+/* amount of Rx window not available to DDP to avoid window exhaustion */
+#define DDP_RSVD_WIN (16 << 10)
+
+/* # of sentinel invalid page pods at the end of a group of valid page pods */
+#define NUM_SENTINEL_PPODS 0
+
+/* # of pages a pagepod can hold without needing another pagepod */
+#define PPOD_PAGES 4
+
+/* page pods are allocated in groups of this size (must be power of 2) */
+#define PPOD_CLUSTER_SIZE 16
+
+/* for each TID we reserve this many page pods up front */
+#define RSVD_PPODS_PER_TID 1
+
+struct pagepod {
+ uint32_t pp_vld_tid;
+ uint32_t pp_pgsz_tag_color;
+ uint32_t pp_max_offset;
+ uint32_t pp_page_offset;
+ uint64_t pp_rsvd;
+ uint64_t pp_addr[5];
+};
+
+#define PPOD_SIZE sizeof(struct pagepod)
+
+#define S_PPOD_TID 0
+#define M_PPOD_TID 0xFFFFFF
+#define V_PPOD_TID(x) ((x) << S_PPOD_TID)
+
+#define S_PPOD_VALID 24
+#define V_PPOD_VALID(x) ((x) << S_PPOD_VALID)
+#define F_PPOD_VALID V_PPOD_VALID(1U)
+
+#define S_PPOD_COLOR 0
+#define M_PPOD_COLOR 0x3F
+#define V_PPOD_COLOR(x) ((x) << S_PPOD_COLOR)
+
+#define S_PPOD_TAG 6
+#define M_PPOD_TAG 0xFFFFFF
+#define V_PPOD_TAG(x) ((x) << S_PPOD_TAG)
+
+#define S_PPOD_PGSZ 30
+#define M_PPOD_PGSZ 0x3
+#define V_PPOD_PGSZ(x) ((x) << S_PPOD_PGSZ)
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <machine/bus.h>
+
+/* DDP gather lists can specify an offset only for the first page. */
+struct ddp_gather_list {
+ unsigned int dgl_length;
+ unsigned int dgl_offset;
+ unsigned int dgl_nelem;
+ vm_page_t dgl_pages[0];
+};
+
+struct ddp_buf_state {
+ unsigned int cur_offset; /* offset of latest DDP notification */
+ unsigned int flags;
+ struct ddp_gather_list *gl;
+};
+
+struct ddp_state {
+ struct ddp_buf_state buf_state[2]; /* per buffer state */
+ int cur_buf;
+ unsigned short kbuf_noinval;
+ unsigned short kbuf_idx; /* which HW buffer is used for kbuf */
+ struct ddp_gather_list *ubuf;
+ int user_ddp_pending;
+ unsigned int ubuf_nppods; /* # of page pods for buffer 1 */
+ unsigned int ubuf_tag;
+ unsigned int ubuf_ddp_ready;
+ int cancel_ubuf;
+ int get_tcb_count;
+ unsigned int kbuf_posted;
+ unsigned int kbuf_nppods[NUM_DDP_KBUF];
+ unsigned int kbuf_tag[NUM_DDP_KBUF];
+ struct ddp_gather_list *kbuf[NUM_DDP_KBUF]; /* kernel buffer for DDP prefetch */
+};
+
+/* buf_state flags */
+enum {
+ DDP_BF_NOINVAL = 1 << 0, /* buffer is set to NO_INVALIDATE */
+ DDP_BF_NOCOPY = 1 << 1, /* DDP to final dest, no copy needed */
+ DDP_BF_NOFLIP = 1 << 2, /* buffer flips after GET_TCB_RPL */
+ DDP_BF_PSH = 1 << 3, /* set in skb->flags if the a DDP was
+ completed with a segment having the
+ PSH flag set */
+ DDP_BF_NODATA = 1 << 4, /* buffer completed before filling */
+};
+
+#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
+struct sockbuf;
+
+/*
+ * Returns 1 if a UBUF DMA buffer might be active.
+ */
+static inline int
+t3_ddp_ubuf_pending(struct toepcb *toep)
+{
+ struct ddp_state *p = &toep->tp_ddp_state;
+
+ /* When the TOM_TUNABLE(ddp) is enabled, we're always in ULP_MODE DDP,
+ * but DDP_STATE() is only valid if the connection actually enabled
+ * DDP.
+ */
+ if (p->kbuf[0] == NULL)
+ return (0);
+
+ return (p->buf_state[0].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY)) ||
+ (p->buf_state[1].flags & (DDP_BF_NOFLIP | DDP_BF_NOCOPY));
+}
+
+int t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl,
+ unsigned int nppods, unsigned int tag, unsigned int maxoff,
+ unsigned int pg_off, unsigned int color);
+int t3_alloc_ppods(struct tom_data *td, unsigned int n, int *tag);
+void t3_free_ppods(struct tom_data *td, unsigned int tag, unsigned int n);
+void t3_free_ddp_gl(struct ddp_gather_list *gl);
+int t3_ddp_copy(const struct mbuf *m, int offset, struct uio *uio, int len);
+//void t3_repost_kbuf(struct socket *so, int modulate, int activate);
+void t3_post_kbuf(struct toepcb *toep, int modulate, int nonblock);
+int t3_post_ubuf(struct toepcb *toep, const struct uio *uio, int nonblock,
+ int rcv_flags, int modulate, int post_kbuf);
+void t3_cancel_ubuf(struct toepcb *toep, struct sockbuf *rcv);
+int t3_overlay_ubuf(struct toepcb *toep, struct sockbuf *rcv,
+ const struct uio *uio, int nonblock,
+ int rcv_flags, int modulate, int post_kbuf);
+int t3_enter_ddp(struct toepcb *toep, unsigned int kbuf_size, unsigned int waitall, int nonblock);
+void t3_cleanup_ddp(struct toepcb *toep);
+void t3_release_ddp_resources(struct toepcb *toep);
+void t3_cancel_ddpbuf(struct toepcb *, unsigned int bufidx);
+void t3_overlay_ddpbuf(struct toepcb *, unsigned int bufidx, unsigned int tag0,
+ unsigned int tag1, unsigned int len);
+void t3_setup_ddpbufs(struct toepcb *, unsigned int len0, unsigned int offset0,
+ unsigned int len1, unsigned int offset1,
+ uint64_t ddp_flags, uint64_t flag_mask, int modulate);
+#endif /* T3_DDP_H */
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tcp.h b/sys/dev/cxgb/ulp/tom/cxgb_tcp.h
new file mode 100644
index 0000000000000..3042ef00b0f1b
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_tcp.h
@@ -0,0 +1,47 @@
+
+/*-
+ * Copyright (c) 2007, Chelsio Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Neither the name of the Chelsio Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef CXGB_TCP_H_
+#define CXGB_TCP_H_
+#ifdef TCP_USRREQS_OVERLOAD
+struct tcpcb *cxgb_tcp_drop(struct tcpcb *tp, int errno);
+#else
+#define cxgb_tcp_drop tcp_drop
+#endif
+void cxgb_tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip);
+struct tcpcb *cxgb_tcp_close(struct tcpcb *tp);
+
+extern struct pr_usrreqs cxgb_tcp_usrreqs;
+#ifdef INET6
+extern struct pr_usrreqs cxgb_tcp6_usrreqs;
+#endif
+
+#include <sys/sysctl.h>
+SYSCTL_DECL(_net_inet_tcp_cxgb);
+#endif /* CXGB_TCP_H_ */
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.c b/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.c
new file mode 100644
index 0000000000000..b61e1aca2c9ea
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.c
@@ -0,0 +1,95 @@
+/*-
+ * Copyright (c) 2007, Chelsio Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Neither the name of the Chelsio Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * grab bag of accessor routines that will either be moved to netinet
+ * or removed
+ */
+
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_offload.h>
+#include <netinet/tcp_syncache.h>
+#include <netinet/toedev.h>
+
+#include <dev/cxgb/ulp/tom/cxgb_tcp_offload.h>
+
+
+/*
+ * This file contains code as a short-term staging area before it is moved in
+ * to sys/netinet/tcp_offload.c
+ */
+
+void
+sockbuf_lock(struct sockbuf *sb)
+{
+
+ SOCKBUF_LOCK(sb);
+}
+
+void
+sockbuf_lock_assert(struct sockbuf *sb)
+{
+
+ SOCKBUF_LOCK_ASSERT(sb);
+}
+
+void
+sockbuf_unlock(struct sockbuf *sb)
+{
+
+ SOCKBUF_UNLOCK(sb);
+}
+
+int
+sockbuf_sbspace(struct sockbuf *sb)
+{
+
+ return (sbspace(sb));
+}
+
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h b/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h
new file mode 100644
index 0000000000000..bf0568c5e7c94
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h
@@ -0,0 +1,155 @@
+/* $FreeBSD$ */
+
+#ifndef CXGB_TCP_OFFLOAD_H_
+#define CXGB_TCP_OFFLOAD_H_
+
+struct socket;
+struct sockbuf;
+
+void sockbuf_lock(struct sockbuf *);
+void sockbuf_lock_assert(struct sockbuf *);
+void sockbuf_unlock(struct sockbuf *);
+int sockbuf_sbspace(struct sockbuf *);
+
+
+#ifndef _SYS_SOCKETVAR_H_
+#include <sys/selinfo.h>
+#include <sys/sx.h>
+
+/*
+ * Constants for sb_flags field of struct sockbuf.
+ */
+#define SB_MAX (256*1024) /* default for max chars in sockbuf */
+/*
+ * Constants for sb_flags field of struct sockbuf.
+ */
+#define SB_WAIT 0x04 /* someone is waiting for data/space */
+#define SB_SEL 0x08 /* someone is selecting */
+#define SB_ASYNC 0x10 /* ASYNC I/O, need signals */
+#define SB_UPCALL 0x20 /* someone wants an upcall */
+#define SB_NOINTR 0x40 /* operations not interruptible */
+#define SB_AIO 0x80 /* AIO operations queued */
+#define SB_KNOTE 0x100 /* kernel note attached */
+#define SB_NOCOALESCE 0x200 /* don't coalesce new data into existing mbufs */
+#define SB_IN_TOE 0x400 /* socket buffer is in the middle of an operation */
+#define SB_AUTOSIZE 0x800 /* automatically size socket buffer */
+
+
+struct sockbuf {
+ struct selinfo sb_sel; /* process selecting read/write */
+ struct mtx sb_mtx; /* sockbuf lock */
+ struct sx sb_sx; /* prevent I/O interlacing */
+ short sb_state; /* (c/d) socket state on sockbuf */
+#define sb_startzero sb_mb
+ struct mbuf *sb_mb; /* (c/d) the mbuf chain */
+ struct mbuf *sb_mbtail; /* (c/d) the last mbuf in the chain */
+ struct mbuf *sb_lastrecord; /* (c/d) first mbuf of last
+ * record in socket buffer */
+ struct mbuf *sb_sndptr; /* (c/d) pointer into mbuf chain */
+ u_int sb_sndptroff; /* (c/d) byte offset of ptr into chain */
+ u_int sb_cc; /* (c/d) actual chars in buffer */
+ u_int sb_hiwat; /* (c/d) max actual char count */
+ u_int sb_mbcnt; /* (c/d) chars of mbufs used */
+ u_int sb_mbmax; /* (c/d) max chars of mbufs to use */
+ u_int sb_ctl; /* (c/d) non-data chars in buffer */
+ int sb_lowat; /* (c/d) low water mark */
+ int sb_timeo; /* (c/d) timeout for read/write */
+ short sb_flags; /* (c/d) flags, see below */
+};
+
+void sbappend(struct sockbuf *sb, struct mbuf *m);
+void sbappend_locked(struct sockbuf *sb, struct mbuf *m);
+void sbappendstream(struct sockbuf *sb, struct mbuf *m);
+void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m);
+void sbdrop(struct sockbuf *sb, int len);
+void sbdrop_locked(struct sockbuf *sb, int len);
+void sbdroprecord(struct sockbuf *sb);
+void sbdroprecord_locked(struct sockbuf *sb);
+void sbflush(struct sockbuf *sb);
+void sbflush_locked(struct sockbuf *sb);
+int sbwait(struct sockbuf *sb);
+int sblock(struct sockbuf *, int);
+void sbunlock(struct sockbuf *);
+
+
+
+/* adjust counters in sb reflecting allocation of m */
+#define sballoc(sb, m) { \
+ (sb)->sb_cc += (m)->m_len; \
+ if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \
+ (sb)->sb_ctl += (m)->m_len; \
+ (sb)->sb_mbcnt += MSIZE; \
+ if ((m)->m_flags & M_EXT) \
+ (sb)->sb_mbcnt += (m)->m_ext.ext_size; \
+}
+
+/* adjust counters in sb reflecting freeing of m */
+#define sbfree(sb, m) { \
+ (sb)->sb_cc -= (m)->m_len; \
+ if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \
+ (sb)->sb_ctl -= (m)->m_len; \
+ (sb)->sb_mbcnt -= MSIZE; \
+ if ((m)->m_flags & M_EXT) \
+ (sb)->sb_mbcnt -= (m)->m_ext.ext_size; \
+ if ((sb)->sb_sndptr == (m)) { \
+ (sb)->sb_sndptr = NULL; \
+ (sb)->sb_sndptroff = 0; \
+ } \
+ if ((sb)->sb_sndptroff != 0) \
+ (sb)->sb_sndptroff -= (m)->m_len; \
+}
+
+#define SS_NOFDREF 0x0001 /* no file table ref any more */
+#define SS_ISCONNECTED 0x0002 /* socket connected to a peer */
+#define SS_ISCONNECTING 0x0004 /* in process of connecting to peer */
+#define SS_ISDISCONNECTING 0x0008 /* in process of disconnecting */
+#define SS_NBIO 0x0100 /* non-blocking ops */
+#define SS_ASYNC 0x0200 /* async i/o notify */
+#define SS_ISCONFIRMING 0x0400 /* deciding to accept connection req */
+#define SS_ISDISCONNECTED 0x2000 /* socket disconnected from peer */
+/*
+ * Protocols can mark a socket as SS_PROTOREF to indicate that, following
+ * pru_detach, they still want the socket to persist, and will free it
+ * themselves when they are done. Protocols should only ever call sofree()
+ * following setting this flag in pru_detach(), and never otherwise, as
+ * sofree() bypasses socket reference counting.
+ */
+#define SS_PROTOREF 0x4000 /* strong protocol reference */
+
+/*
+ * Socket state bits now stored in the socket buffer state field.
+ */
+#define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */
+#define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */
+#define SBS_RCVATMARK 0x0040 /* at mark on input */
+
+
+
+enum sopt_dir { SOPT_GET, SOPT_SET };
+struct sockopt {
+ enum sopt_dir sopt_dir; /* is this a get or a set? */
+ int sopt_level; /* second arg of [gs]etsockopt */
+ int sopt_name; /* third arg of [gs]etsockopt */
+ void *sopt_val; /* fourth arg of [gs]etsockopt */
+ size_t sopt_valsize; /* (almost) fifth arg of [gs]etsockopt */
+ struct thread *sopt_td; /* calling thread or null if kernel */
+};
+
+
+int sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen);
+int sooptcopyout(struct sockopt *sopt, const void *buf, size_t len);
+
+
+void soisconnected(struct socket *so);
+void soisconnecting(struct socket *so);
+void soisdisconnected(struct socket *so);
+void soisdisconnecting(struct socket *so);
+void socantrcvmore(struct socket *so);
+void socantrcvmore_locked(struct socket *so);
+void socantsendmore(struct socket *so);
+void socantsendmore_locked(struct socket *so);
+
+#endif /* !NET_CORE */
+
+
+#endif /* CXGB_TCP_OFFLOAD_H_ */
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h b/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h
new file mode 100644
index 0000000000000..7c4bd0c06c414
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h
@@ -0,0 +1,119 @@
+/*-
+ * Copyright (c) 2007-2008, Chelsio Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Neither the name of the Chelsio Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef CXGB_TOEPCB_H_
+#define CXGB_TOEPCB_H_
+#include <sys/bus.h>
+#include <sys/condvar.h>
+#include <dev/cxgb/sys/mbufq.h>
+
+struct toepcb {
+ struct toedev *tp_toedev;
+ struct l2t_entry *tp_l2t;
+ unsigned int tp_tid;
+ int tp_wr_max;
+ int tp_wr_avail;
+ int tp_wr_unacked;
+ int tp_delack_mode;
+ int tp_mtu_idx;
+ int tp_ulp_mode;
+ int tp_qset_idx;
+ int tp_mss_clamp;
+ int tp_qset;
+ int tp_flags;
+ int tp_enqueued_bytes;
+ int tp_page_count;
+ int tp_state;
+
+ tcp_seq tp_iss;
+ tcp_seq tp_delack_seq;
+ tcp_seq tp_rcv_wup;
+ tcp_seq tp_copied_seq;
+ uint64_t tp_write_seq;
+
+ volatile int tp_refcount;
+ vm_page_t *tp_pages;
+
+ struct tcpcb *tp_tp;
+ struct mbuf *tp_m_last;
+ bus_dma_tag_t tp_tx_dmat;
+ bus_dma_tag_t tp_rx_dmat;
+ bus_dmamap_t tp_dmamap;
+
+ LIST_ENTRY(toepcb) synq_entry;
+ struct mbuf_head wr_list;
+ struct mbuf_head out_of_order_queue;
+ struct ddp_state tp_ddp_state;
+ struct cv tp_cv;
+
+};
+
+static inline void
+reset_wr_list(struct toepcb *toep)
+{
+
+ mbufq_init(&toep->wr_list);
+}
+
+static inline void
+purge_wr_queue(struct toepcb *toep)
+{
+ struct mbuf *m;
+
+ while ((m = mbufq_dequeue(&toep->wr_list)) != NULL)
+ m_freem(m);
+}
+
+static inline void
+enqueue_wr(struct toepcb *toep, struct mbuf *m)
+{
+
+ mbufq_tail(&toep->wr_list, m);
+}
+
+static inline struct mbuf *
+peek_wr(const struct toepcb *toep)
+{
+
+ return (mbufq_peek(&toep->wr_list));
+}
+
+static inline struct mbuf *
+dequeue_wr(struct toepcb *toep)
+{
+
+ return (mbufq_dequeue(&toep->wr_list));
+}
+
+#define wr_queue_walk(toep, m) \
+ for (m = peek_wr(toep); m; m = m->m_nextpkt)
+
+
+
+#endif
+
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tom.c b/sys/dev/cxgb/ulp/tom/cxgb_tom.c
new file mode 100644
index 0000000000000..751b1cd0b051e
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_tom.c
@@ -0,0 +1,1510 @@
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/fcntl.h>
+#include <sys/ktr.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/eventhandler.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/condvar.h>
+#include <sys/mutex.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/taskqueue.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+
+#include <dev/cxgb/cxgb_osdep.h>
+#include <dev/cxgb/sys/mbufq.h>
+
+#include <netinet/in_pcb.h>
+
+#include <dev/cxgb/ulp/tom/cxgb_tcp_offload.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_offload.h>
+#include <netinet/tcp_fsm.h>
+
+#ifdef CONFIG_DEFINED
+#include <cxgb_include.h>
+#else
+#include <dev/cxgb/cxgb_include.h>
+#endif
+
+#include <net/if_vlan_var.h>
+#include <net/route.h>
+
+
+#include <dev/cxgb/t3cdev.h>
+#include <dev/cxgb/common/cxgb_firmware_exports.h>
+#include <dev/cxgb/common/cxgb_tcb.h>
+#include <dev/cxgb/cxgb_include.h>
+#include <dev/cxgb/common/cxgb_ctl_defs.h>
+#include <dev/cxgb/common/cxgb_t3_cpl.h>
+#include <dev/cxgb/cxgb_offload.h>
+#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
+#include <dev/cxgb/ulp/tom/cxgb_tom.h>
+#include <dev/cxgb/ulp/tom/cxgb_defs.h>
+#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
+#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
+#include <dev/cxgb/ulp/tom/cxgb_tcp.h>
+
+
+
+
+
+static int activated = 1;
+TUNABLE_INT("hw.t3toe.activated", &activated);
+SYSCTL_NODE(_hw, OID_AUTO, t3toe, CTLFLAG_RD, 0, "T3 toe driver parameters");
+SYSCTL_UINT(_hw_t3toe, OID_AUTO, activated, CTLFLAG_RDTUN, &activated, 0,
+ "enable TOE at init time");
+
+
+TAILQ_HEAD(, adapter) adapter_list;
+static struct rwlock adapter_list_lock;
+
+static TAILQ_HEAD(, tom_data) cxgb_list;
+static struct mtx cxgb_list_lock;
+static const unsigned int MAX_ATIDS = 64 * 1024;
+static const unsigned int ATID_BASE = 0x100000;
+
+static int t3_toe_attach(struct toedev *dev, const struct offload_id *entry);
+static void cxgb_register_listeners(void);
+static void t3c_tom_add(struct t3cdev *cdev);
+
+/*
+ * Handlers for each CPL opcode
+ */
+static cxgb_cpl_handler_func tom_cpl_handlers[256];
+
+
+static eventhandler_tag listen_tag;
+
+static struct offload_id t3_toe_id_tab[] = {
+ { TOE_ID_CHELSIO_T3, 0 },
+ { TOE_ID_CHELSIO_T3B, 0 },
+ { TOE_ID_CHELSIO_T3C, 0 },
+ { 0 }
+};
+
+static struct tom_info t3_tom_info = {
+ .ti_attach = t3_toe_attach,
+ .ti_id_table = t3_toe_id_tab,
+ .ti_name = "Chelsio-T3"
+};
+
+struct cxgb_client t3c_tom_client = {
+ .name = "tom_cxgb3",
+ .add = t3c_tom_add,
+ .remove = NULL,
+ .handlers = tom_cpl_handlers,
+ .redirect = NULL
+};
+
+/*
+ * Add an skb to the deferred skb queue for processing from process context.
+ */
+void
+t3_defer_reply(struct mbuf *m, struct toedev *dev, defer_handler_t handler)
+{
+ struct tom_data *td = TOM_DATA(dev);
+
+ m_set_handler(m, handler);
+ mtx_lock(&td->deferq.lock);
+
+ mbufq_tail(&td->deferq, m);
+ if (mbufq_len(&td->deferq) == 1)
+ taskqueue_enqueue(td->tq, &td->deferq_task);
+ mtx_lock(&td->deferq.lock);
+}
+
+struct toepcb *
+toepcb_alloc(void)
+{
+ struct toepcb *toep;
+
+ toep = malloc(sizeof(struct toepcb), M_CXGB, M_NOWAIT|M_ZERO);
+
+ if (toep == NULL)
+ return (NULL);
+
+ toepcb_init(toep);
+ return (toep);
+}
+
+void
+toepcb_init(struct toepcb *toep)
+{
+ toep->tp_refcount = 1;
+ cv_init(&toep->tp_cv, "toep cv");
+}
+
+void
+toepcb_hold(struct toepcb *toep)
+{
+ atomic_add_acq_int(&toep->tp_refcount, 1);
+}
+
+void
+toepcb_release(struct toepcb *toep)
+{
+ if (toep->tp_refcount == 1) {
+ free(toep, M_CXGB);
+ return;
+ }
+ atomic_add_acq_int(&toep->tp_refcount, -1);
+}
+
+
+/*
+ * Add a T3 offload device to the list of devices we are managing.
+ */
+static void
+t3cdev_add(struct tom_data *t)
+{
+ mtx_lock(&cxgb_list_lock);
+ TAILQ_INSERT_TAIL(&cxgb_list, t, entry);
+ mtx_unlock(&cxgb_list_lock);
+}
+
+static inline int
+cdev2type(struct t3cdev *cdev)
+{
+ int type = 0;
+
+ switch (cdev->type) {
+ case T3A:
+ type = TOE_ID_CHELSIO_T3;
+ break;
+ case T3B:
+ type = TOE_ID_CHELSIO_T3B;
+ break;
+ case T3C:
+ type = TOE_ID_CHELSIO_T3C;
+ break;
+ }
+ return (type);
+}
+
+/*
+ * Allocate and initialize the TID tables. Returns 0 on success.
+ */
+static int
+init_tid_tabs(struct tid_info *t, unsigned int ntids,
+ unsigned int natids, unsigned int nstids,
+ unsigned int atid_base, unsigned int stid_base)
+{
+ unsigned long size = ntids * sizeof(*t->tid_tab) +
+ natids * sizeof(*t->atid_tab) + nstids * sizeof(*t->stid_tab);
+
+ t->tid_tab = cxgb_alloc_mem(size);
+ if (!t->tid_tab)
+ return (ENOMEM);
+
+ t->stid_tab = (union listen_entry *)&t->tid_tab[ntids];
+ t->atid_tab = (union active_open_entry *)&t->stid_tab[nstids];
+ t->ntids = ntids;
+ t->nstids = nstids;
+ t->stid_base = stid_base;
+ t->sfree = NULL;
+ t->natids = natids;
+ t->atid_base = atid_base;
+ t->afree = NULL;
+ t->stids_in_use = t->atids_in_use = 0;
+ atomic_set_int(&t->tids_in_use, 0);
+ mtx_init(&t->stid_lock, "stid", NULL, MTX_DUPOK|MTX_DEF);
+ mtx_init(&t->atid_lock, "atid", NULL, MTX_DUPOK|MTX_DEF);
+
+ /*
+ * Setup the free lists for stid_tab and atid_tab.
+ */
+ if (nstids) {
+ while (--nstids)
+ t->stid_tab[nstids - 1].next = &t->stid_tab[nstids];
+ t->sfree = t->stid_tab;
+ }
+ if (natids) {
+ while (--natids)
+ t->atid_tab[natids - 1].next = &t->atid_tab[natids];
+ t->afree = t->atid_tab;
+ }
+ return 0;
+}
+
+static void
+free_tid_maps(struct tid_info *t)
+{
+ mtx_destroy(&t->stid_lock);
+ mtx_destroy(&t->atid_lock);
+ cxgb_free_mem(t->tid_tab);
+}
+
+static inline void
+add_adapter(adapter_t *adap)
+{
+ rw_wlock(&adapter_list_lock);
+ TAILQ_INSERT_TAIL(&adapter_list, adap, adapter_entry);
+ rw_wunlock(&adapter_list_lock);
+}
+
+static inline void
+remove_adapter(adapter_t *adap)
+{
+ rw_wlock(&adapter_list_lock);
+ TAILQ_REMOVE(&adapter_list, adap, adapter_entry);
+ rw_wunlock(&adapter_list_lock);
+}
+
+/*
+ * Populate a TID_RELEASE WR. The mbuf must be already propely sized.
+ */
+static inline void
+mk_tid_release(struct mbuf *m, unsigned int tid)
+{
+ struct cpl_tid_release *req;
+
+ m_set_priority(m, CPL_PRIORITY_SETUP);
+ req = mtod(m, struct cpl_tid_release *);
+ m->m_pkthdr.len = m->m_len = sizeof(*req);
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
+}
+
+static void
+t3_process_tid_release_list(void *data, int pending)
+{
+ struct mbuf *m;
+ struct t3cdev *tdev = data;
+ struct t3c_data *td = T3C_DATA (tdev);
+
+ mtx_lock(&td->tid_release_lock);
+ while (td->tid_release_list) {
+ struct toe_tid_entry *p = td->tid_release_list;
+
+ td->tid_release_list = (struct toe_tid_entry *)p->ctx;
+ mtx_unlock(&td->tid_release_lock);
+ m = m_get(M_WAIT, MT_DATA);
+ mk_tid_release(m, p - td->tid_maps.tid_tab);
+ cxgb_ofld_send(tdev, m);
+ p->ctx = NULL;
+ mtx_lock(&td->tid_release_lock);
+ }
+ mtx_unlock(&td->tid_release_lock);
+}
+
+int
+cxgb_offload_activate(struct adapter *adapter)
+{
+ struct t3cdev *dev = &adapter->tdev;
+ int natids, err;
+ struct t3c_data *t;
+ struct tid_range stid_range, tid_range;
+ struct mtutab mtutab;
+ unsigned int l2t_capacity;
+
+ t = malloc(sizeof(*t), M_CXGB, M_NOWAIT|M_ZERO);
+ if (!t)
+ return (ENOMEM);
+ dev->adapter = adapter;
+
+ err = (EOPNOTSUPP);
+ if (dev->ctl(dev, GET_TX_MAX_CHUNK, &t->tx_max_chunk) < 0 ||
+ dev->ctl(dev, GET_MAX_OUTSTANDING_WR, &t->max_wrs) < 0 ||
+ dev->ctl(dev, GET_L2T_CAPACITY, &l2t_capacity) < 0 ||
+ dev->ctl(dev, GET_MTUS, &mtutab) < 0 ||
+ dev->ctl(dev, GET_TID_RANGE, &tid_range) < 0 ||
+ dev->ctl(dev, GET_STID_RANGE, &stid_range) < 0) {
+ device_printf(adapter->dev, "%s: dev->ctl check failed\n", __FUNCTION__);
+ goto out_free;
+ }
+
+ err = (ENOMEM);
+ L2DATA(dev) = t3_init_l2t(l2t_capacity);
+ if (!L2DATA(dev)) {
+ device_printf(adapter->dev, "%s: t3_init_l2t failed\n", __FUNCTION__);
+ goto out_free;
+ }
+ natids = min(tid_range.num / 2, MAX_ATIDS);
+ err = init_tid_tabs(&t->tid_maps, tid_range.num, natids,
+ stid_range.num, ATID_BASE, stid_range.base);
+ if (err) {
+ device_printf(adapter->dev, "%s: init_tid_tabs failed\n", __FUNCTION__);
+ goto out_free_l2t;
+ }
+
+ t->mtus = mtutab.mtus;
+ t->nmtus = mtutab.size;
+
+ TASK_INIT(&t->tid_release_task, 0 /* XXX? */, t3_process_tid_release_list, dev);
+ mtx_init(&t->tid_release_lock, "tid release", NULL, MTX_DUPOK|MTX_DEF);
+ t->dev = dev;
+
+ T3C_DATA (dev) = t;
+ dev->recv = process_rx;
+ dev->arp_update = t3_l2t_update;
+ /* Register netevent handler once */
+ if (TAILQ_EMPTY(&adapter_list)) {
+#if defined(CONFIG_CHELSIO_T3_MODULE)
+ if (prepare_arp_with_t3core())
+ log(LOG_ERR, "Unable to set offload capabilities\n");
+#endif
+ }
+ CTR1(KTR_CXGB, "adding adapter %p", adapter);
+ add_adapter(adapter);
+ device_printf(adapter->dev, "offload started\n");
+ adapter->flags |= CXGB_OFLD_INIT;
+ return (0);
+
+out_free_l2t:
+ t3_free_l2t(L2DATA(dev));
+ L2DATA(dev) = NULL;
+out_free:
+ free(t, M_CXGB);
+ return (err);
+}
+
+void
+cxgb_offload_deactivate(struct adapter *adapter)
+{
+ struct t3cdev *tdev = &adapter->tdev;
+ struct t3c_data *t = T3C_DATA(tdev);
+
+ printf("removing adapter %p\n", adapter);
+ remove_adapter(adapter);
+ if (TAILQ_EMPTY(&adapter_list)) {
+#if defined(CONFIG_CHELSIO_T3_MODULE)
+ restore_arp_sans_t3core();
+#endif
+ }
+ free_tid_maps(&t->tid_maps);
+ T3C_DATA(tdev) = NULL;
+ t3_free_l2t(L2DATA(tdev));
+ L2DATA(tdev) = NULL;
+ mtx_destroy(&t->tid_release_lock);
+ free(t, M_CXGB);
+}
+
+/*
+ * Sends an sk_buff to a T3C driver after dealing with any active network taps.
+ */
+int
+cxgb_ofld_send(struct t3cdev *dev, struct mbuf *m)
+{
+ int r;
+
+ r = dev->send(dev, m);
+ return r;
+}
+
+static struct ifnet *
+get_iff_from_mac(adapter_t *adapter, const uint8_t *mac, unsigned int vlan)
+{
+ int i;
+
+ for_each_port(adapter, i) {
+#ifdef notyet
+ const struct vlan_group *grp;
+#endif
+ const struct port_info *p = &adapter->port[i];
+ struct ifnet *ifp = p->ifp;
+
+ if (!memcmp(p->hw_addr, mac, ETHER_ADDR_LEN)) {
+#ifdef notyet
+
+ if (vlan && vlan != EVL_VLID_MASK) {
+ grp = p->vlan_grp;
+ dev = grp ? grp->vlan_devices[vlan] : NULL;
+ } else
+ while (dev->master)
+ dev = dev->master;
+#endif
+ return (ifp);
+ }
+ }
+ return (NULL);
+}
+
+static inline void
+failover_fixup(adapter_t *adapter, int port)
+{
+ if (adapter->params.rev == 0) {
+ struct ifnet *ifp = adapter->port[port].ifp;
+ struct cmac *mac = &adapter->port[port].mac;
+ if (!(ifp->if_flags & IFF_UP)) {
+ /* Failover triggered by the interface ifdown */
+ t3_write_reg(adapter, A_XGM_TX_CTRL + mac->offset,
+ F_TXEN);
+ t3_read_reg(adapter, A_XGM_TX_CTRL + mac->offset);
+ } else {
+ /* Failover triggered by the interface link down */
+ t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset, 0);
+ t3_read_reg(adapter, A_XGM_RX_CTRL + mac->offset);
+ t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset,
+ F_RXEN);
+ }
+ }
+}
+
+static int
+cxgb_ulp_iscsi_ctl(adapter_t *adapter, unsigned int req, void *data)
+{
+ int ret = 0;
+ struct ulp_iscsi_info *uiip = data;
+
+ switch (req) {
+ case ULP_ISCSI_GET_PARAMS:
+ uiip->llimit = t3_read_reg(adapter, A_ULPRX_ISCSI_LLIMIT);
+ uiip->ulimit = t3_read_reg(adapter, A_ULPRX_ISCSI_ULIMIT);
+ uiip->tagmask = t3_read_reg(adapter, A_ULPRX_ISCSI_TAGMASK);
+ /*
+ * On tx, the iscsi pdu has to be <= tx page size and has to
+ * fit into the Tx PM FIFO.
+ */
+ uiip->max_txsz = min(adapter->params.tp.tx_pg_size,
+ t3_read_reg(adapter, A_PM1_TX_CFG) >> 17);
+ /* on rx, the iscsi pdu has to be < rx page size and the
+ whole pdu + cpl headers has to fit into one sge buffer */
+ /* also check the max rx data length programmed in TP */
+ uiip->max_rxsz = min(uiip->max_rxsz,
+ ((t3_read_reg(adapter, A_TP_PARA_REG2))
+ >> S_MAXRXDATA) & M_MAXRXDATA);
+ break;
+ case ULP_ISCSI_SET_PARAMS:
+ t3_write_reg(adapter, A_ULPRX_ISCSI_TAGMASK, uiip->tagmask);
+ break;
+ default:
+ ret = (EOPNOTSUPP);
+ }
+ return ret;
+}
+
+/* Response queue used for RDMA events. */
+#define ASYNC_NOTIF_RSPQ 0
+
+static int
+cxgb_rdma_ctl(adapter_t *adapter, unsigned int req, void *data)
+{
+ int ret = 0;
+
+ switch (req) {
+ case RDMA_GET_PARAMS: {
+ struct rdma_info *req = data;
+
+ req->udbell_physbase = rman_get_start(adapter->udbs_res);
+ req->udbell_len = rman_get_size(adapter->udbs_res);
+ req->tpt_base = t3_read_reg(adapter, A_ULPTX_TPT_LLIMIT);
+ req->tpt_top = t3_read_reg(adapter, A_ULPTX_TPT_ULIMIT);
+ req->pbl_base = t3_read_reg(adapter, A_ULPTX_PBL_LLIMIT);
+ req->pbl_top = t3_read_reg(adapter, A_ULPTX_PBL_ULIMIT);
+ req->rqt_base = t3_read_reg(adapter, A_ULPRX_RQ_LLIMIT);
+ req->rqt_top = t3_read_reg(adapter, A_ULPRX_RQ_ULIMIT);
+ req->kdb_addr = (void *)((unsigned long)rman_get_virtual(adapter->regs_res) + A_SG_KDOORBELL); break;
+ }
+ case RDMA_CQ_OP: {
+ struct rdma_cq_op *req = data;
+
+ /* may be called in any context */
+ mtx_lock_spin(&adapter->sge.reg_lock);
+ ret = t3_sge_cqcntxt_op(adapter, req->id, req->op,
+ req->credits);
+ mtx_unlock_spin(&adapter->sge.reg_lock);
+ break;
+ }
+ case RDMA_GET_MEM: {
+ struct ch_mem_range *t = data;
+ struct mc7 *mem;
+
+ if ((t->addr & 7) || (t->len & 7))
+ return (EINVAL);
+ if (t->mem_id == MEM_CM)
+ mem = &adapter->cm;
+ else if (t->mem_id == MEM_PMRX)
+ mem = &adapter->pmrx;
+ else if (t->mem_id == MEM_PMTX)
+ mem = &adapter->pmtx;
+ else
+ return (EINVAL);
+
+ ret = t3_mc7_bd_read(mem, t->addr/8, t->len/8, (u64 *)t->buf);
+ if (ret)
+ return (ret);
+ break;
+ }
+ case RDMA_CQ_SETUP: {
+ struct rdma_cq_setup *req = data;
+
+ mtx_lock_spin(&adapter->sge.reg_lock);
+ ret = t3_sge_init_cqcntxt(adapter, req->id, req->base_addr,
+ req->size, ASYNC_NOTIF_RSPQ,
+ req->ovfl_mode, req->credits,
+ req->credit_thres);
+ mtx_unlock_spin(&adapter->sge.reg_lock);
+ break;
+ }
+ case RDMA_CQ_DISABLE:
+ mtx_lock_spin(&adapter->sge.reg_lock);
+ ret = t3_sge_disable_cqcntxt(adapter, *(unsigned int *)data);
+ mtx_unlock_spin(&adapter->sge.reg_lock);
+ break;
+ case RDMA_CTRL_QP_SETUP: {
+ struct rdma_ctrlqp_setup *req = data;
+
+ mtx_lock_spin(&adapter->sge.reg_lock);
+ ret = t3_sge_init_ecntxt(adapter, FW_RI_SGEEC_START, 0,
+ SGE_CNTXT_RDMA, ASYNC_NOTIF_RSPQ,
+ req->base_addr, req->size,
+ FW_RI_TID_START, 1, 0);
+ mtx_unlock_spin(&adapter->sge.reg_lock);
+ break;
+ }
+ default:
+ ret = EOPNOTSUPP;
+ }
+ return (ret);
+}
+
+static int
+cxgb_offload_ctl(struct t3cdev *tdev, unsigned int req, void *data)
+{
+ struct adapter *adapter = tdev2adap(tdev);
+ struct tid_range *tid;
+ struct mtutab *mtup;
+ struct iff_mac *iffmacp;
+ struct ddp_params *ddpp;
+ struct adap_ports *ports;
+ struct ofld_page_info *rx_page_info;
+ struct tp_params *tp = &adapter->params.tp;
+ int port;
+
+ switch (req) {
+ case GET_MAX_OUTSTANDING_WR:
+ *(unsigned int *)data = FW_WR_NUM;
+ break;
+ case GET_WR_LEN:
+ *(unsigned int *)data = WR_FLITS;
+ break;
+ case GET_TX_MAX_CHUNK:
+ *(unsigned int *)data = 1 << 20; /* 1MB */
+ break;
+ case GET_TID_RANGE:
+ tid = data;
+ tid->num = t3_mc5_size(&adapter->mc5) -
+ adapter->params.mc5.nroutes -
+ adapter->params.mc5.nfilters -
+ adapter->params.mc5.nservers;
+ tid->base = 0;
+ break;
+ case GET_STID_RANGE:
+ tid = data;
+ tid->num = adapter->params.mc5.nservers;
+ tid->base = t3_mc5_size(&adapter->mc5) - tid->num -
+ adapter->params.mc5.nfilters -
+ adapter->params.mc5.nroutes;
+ break;
+ case GET_L2T_CAPACITY:
+ *(unsigned int *)data = 2048;
+ break;
+ case GET_MTUS:
+ mtup = data;
+ mtup->size = NMTUS;
+ mtup->mtus = adapter->params.mtus;
+ break;
+ case GET_IFF_FROM_MAC:
+ iffmacp = data;
+ iffmacp->dev = get_iff_from_mac(adapter, iffmacp->mac_addr,
+ iffmacp->vlan_tag & EVL_VLID_MASK);
+ break;
+ case GET_DDP_PARAMS:
+ ddpp = data;
+ ddpp->llimit = t3_read_reg(adapter, A_ULPRX_TDDP_LLIMIT);
+ ddpp->ulimit = t3_read_reg(adapter, A_ULPRX_TDDP_ULIMIT);
+ ddpp->tag_mask = t3_read_reg(adapter, A_ULPRX_TDDP_TAGMASK);
+ break;
+ case GET_PORTS:
+ ports = data;
+ ports->nports = adapter->params.nports;
+ for_each_port(adapter, port)
+ ports->lldevs[port] = adapter->port[port].ifp;
+ break;
+ case FAILOVER:
+ port = *(int *)data;
+ t3_port_failover(adapter, port);
+ failover_fixup(adapter, port);
+ break;
+ case FAILOVER_DONE:
+ port = *(int *)data;
+ t3_failover_done(adapter, port);
+ break;
+ case FAILOVER_CLEAR:
+ t3_failover_clear(adapter);
+ break;
+ case GET_RX_PAGE_INFO:
+ rx_page_info = data;
+ rx_page_info->page_size = tp->rx_pg_size;
+ rx_page_info->num = tp->rx_num_pgs;
+ break;
+ case ULP_ISCSI_GET_PARAMS:
+ case ULP_ISCSI_SET_PARAMS:
+ if (!offload_running(adapter))
+ return (EAGAIN);
+ return cxgb_ulp_iscsi_ctl(adapter, req, data);
+ case RDMA_GET_PARAMS:
+ case RDMA_CQ_OP:
+ case RDMA_CQ_SETUP:
+ case RDMA_CQ_DISABLE:
+ case RDMA_CTRL_QP_SETUP:
+ case RDMA_GET_MEM:
+ if (!offload_running(adapter))
+ return (EAGAIN);
+ return cxgb_rdma_ctl(adapter, req, data);
+ default:
+ return (EOPNOTSUPP);
+ }
+ return 0;
+}
+
+/*
+ * Allocate a TOM data structure,
+ * initialize its cpl_handlers
+ * and register it as a T3C client
+ */
+static void
+t3c_tom_add(struct t3cdev *cdev)
+{
+ int i;
+ unsigned int wr_len;
+ struct tom_data *t;
+ struct toedev *tdev;
+ struct adap_ports *port_info;
+
+ t = malloc(sizeof(*t), M_CXGB, M_NOWAIT|M_ZERO);
+ if (t == NULL)
+ return;
+
+ cdev->send = t3_offload_tx;
+ cdev->ctl = cxgb_offload_ctl;
+
+ if (cdev->ctl(cdev, GET_WR_LEN, &wr_len) < 0)
+ goto out_free_tom;
+
+ port_info = malloc(sizeof(*port_info), M_CXGB, M_NOWAIT|M_ZERO);
+ if (!port_info)
+ goto out_free_tom;
+
+ if (cdev->ctl(cdev, GET_PORTS, port_info) < 0)
+ goto out_free_all;
+
+ t3_init_wr_tab(wr_len);
+ t->cdev = cdev;
+ t->client = &t3c_tom_client;
+
+ /* Register TCP offload device */
+ tdev = &t->tdev;
+ tdev->tod_ttid = cdev2type(cdev);
+ tdev->tod_lldev = cdev->lldev;
+
+ if (register_toedev(tdev, "toe%d")) {
+ printf("unable to register offload device");
+ goto out_free_all;
+ }
+ TOM_DATA(tdev) = t;
+
+ for (i = 0; i < port_info->nports; i++) {
+ struct ifnet *ifp = port_info->lldevs[i];
+ TOEDEV(ifp) = tdev;
+
+ CTR1(KTR_TOM, "enabling toe on %p", ifp);
+ ifp->if_capabilities |= IFCAP_TOE4;
+ ifp->if_capenable |= IFCAP_TOE4;
+ }
+ t->ports = port_info;
+
+ /* Add device to the list of offload devices */
+ t3cdev_add(t);
+
+ /* Activate TCP offload device */
+ cxgb_offload_activate(TOM_DATA(tdev)->cdev->adapter);
+
+ activate_offload(tdev);
+ cxgb_register_listeners();
+ return;
+
+out_free_all:
+ printf("out_free_all fail\n");
+ free(port_info, M_CXGB);
+out_free_tom:
+ printf("out_free_tom fail\n");
+ free(t, M_CXGB);
+ return;
+}
+
+
+
+static int
+do_act_open_rpl(struct t3cdev *dev, struct mbuf *m)
+{
+ struct cpl_act_open_rpl *rpl = cplhdr(m);
+ unsigned int atid = G_TID(ntohl(rpl->atid));
+ struct toe_tid_entry *toe_tid;
+
+ toe_tid = lookup_atid(&(T3C_DATA (dev))->tid_maps, atid);
+ if (toe_tid->ctx && toe_tid->client && toe_tid->client->handlers &&
+ toe_tid->client->handlers[CPL_ACT_OPEN_RPL]) {
+ return toe_tid->client->handlers[CPL_ACT_OPEN_RPL] (dev, m,
+ toe_tid->ctx);
+ } else {
+ log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
+ dev->name, CPL_ACT_OPEN_RPL);
+ return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
+ }
+}
+
+static int
+do_stid_rpl(struct t3cdev *dev, struct mbuf *m)
+{
+ union opcode_tid *p = cplhdr(m);
+ unsigned int stid = G_TID(ntohl(p->opcode_tid));
+ struct toe_tid_entry *toe_tid;
+
+ toe_tid = lookup_stid(&(T3C_DATA (dev))->tid_maps, stid);
+ if (toe_tid->ctx && toe_tid->client->handlers &&
+ toe_tid->client->handlers[p->opcode]) {
+ return toe_tid->client->handlers[p->opcode] (dev, m, toe_tid->ctx);
+ } else {
+ log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
+ dev->name, p->opcode);
+ return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
+ }
+}
+
+static int
+do_hwtid_rpl(struct t3cdev *dev, struct mbuf *m)
+{
+ union opcode_tid *p = cplhdr(m);
+ unsigned int hwtid;
+ struct toe_tid_entry *toe_tid;
+
+ DPRINTF("do_hwtid_rpl opcode=0x%x\n", p->opcode);
+ hwtid = G_TID(ntohl(p->opcode_tid));
+
+ toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
+ if (toe_tid->ctx && toe_tid->client->handlers &&
+ toe_tid->client->handlers[p->opcode]) {
+ return toe_tid->client->handlers[p->opcode]
+ (dev, m, toe_tid->ctx);
+ } else {
+ log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
+ dev->name, p->opcode);
+ return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
+ }
+}
+
+static int
+do_cr(struct t3cdev *dev, struct mbuf *m)
+{
+ struct cpl_pass_accept_req *req = cplhdr(m);
+ unsigned int stid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
+ struct toe_tid_entry *toe_tid;
+
+ toe_tid = lookup_stid(&(T3C_DATA (dev))->tid_maps, stid);
+ if (toe_tid->ctx && toe_tid->client->handlers &&
+ toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ]) {
+ return toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ]
+ (dev, m, toe_tid->ctx);
+ } else {
+ log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
+ dev->name, CPL_PASS_ACCEPT_REQ);
+ return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
+ }
+}
+
+static int
+do_abort_req_rss(struct t3cdev *dev, struct mbuf *m)
+{
+ union opcode_tid *p = cplhdr(m);
+ unsigned int hwtid = G_TID(ntohl(p->opcode_tid));
+ struct toe_tid_entry *toe_tid;
+
+ toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
+ if (toe_tid->ctx && toe_tid->client->handlers &&
+ toe_tid->client->handlers[p->opcode]) {
+ return toe_tid->client->handlers[p->opcode]
+ (dev, m, toe_tid->ctx);
+ } else {
+ struct cpl_abort_req_rss *req = cplhdr(m);
+ struct cpl_abort_rpl *rpl;
+
+ struct mbuf *m = m_get(M_NOWAIT, MT_DATA);
+ if (!m) {
+ log(LOG_NOTICE, "do_abort_req_rss: couldn't get mbuf!\n");
+ goto out;
+ }
+
+ m_set_priority(m, CPL_PRIORITY_DATA);
+ rpl = cplhdr(m);
+ rpl->wr.wr_hi =
+ htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
+ rpl->wr.wr_lo = htonl(V_WR_TID(GET_TID(req)));
+ OPCODE_TID(rpl) =
+ htonl(MK_OPCODE_TID(CPL_ABORT_RPL, GET_TID(req)));
+ rpl->cmd = req->status;
+ cxgb_ofld_send(dev, m);
+ out:
+ return (CPL_RET_BUF_DONE);
+ }
+}
+
+static int
+do_act_establish(struct t3cdev *dev, struct mbuf *m)
+{
+ struct cpl_act_establish *req;
+ unsigned int atid;
+ struct toe_tid_entry *toe_tid;
+
+ req = cplhdr(m);
+ atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
+ toe_tid = lookup_atid(&(T3C_DATA (dev))->tid_maps, atid);
+ if (toe_tid && toe_tid->ctx && toe_tid->client->handlers &&
+ toe_tid->client->handlers[CPL_ACT_ESTABLISH]) {
+
+ return toe_tid->client->handlers[CPL_ACT_ESTABLISH]
+ (dev, m, toe_tid->ctx);
+ } else {
+
+ log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
+ dev->name, CPL_PASS_ACCEPT_REQ);
+ return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
+ }
+}
+
+
+static int
+do_term(struct t3cdev *dev, struct mbuf *m)
+{
+ unsigned int hwtid = ntohl(m_get_priority(m)) >> 8 & 0xfffff;
+ unsigned int opcode = G_OPCODE(ntohl(m->m_pkthdr.csum_data));
+ struct toe_tid_entry *toe_tid;
+
+ toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
+ if (toe_tid && toe_tid->ctx && toe_tid->client->handlers &&
+ toe_tid->client->handlers[opcode]) {
+ return toe_tid->client->handlers[opcode](dev, m, toe_tid->ctx);
+ } else {
+ log(LOG_ERR, "%s: received clientless CPL command 0x%x\n",
+ dev->name, opcode);
+ return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG;
+ }
+ return (0);
+}
+
+/*
+ * Process a received packet with an unknown/unexpected CPL opcode.
+ */
+static int
+do_bad_cpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
+{
+ log(LOG_ERR, "%s: received bad CPL command %u\n", cdev->name,
+ 0xFF & *mtod(m, unsigned int *));
+ return (CPL_RET_BUF_DONE | CPL_RET_BAD_MSG);
+}
+
+/*
+ * Add a new handler to the CPL dispatch table. A NULL handler may be supplied
+ * to unregister an existing handler.
+ */
+void
+t3tom_register_cpl_handler(unsigned int opcode, cxgb_cpl_handler_func h)
+{
+ if (opcode < UCHAR_MAX)
+ tom_cpl_handlers[opcode] = h ? h : do_bad_cpl;
+ else
+ log(LOG_ERR, "Chelsio T3 TOM: handler registration for "
+ "opcode %u failed\n", opcode);
+}
+
+/*
+ * Make a preliminary determination if a connection can be offloaded. It's OK
+ * to fail the offload later if we say we can offload here. For now this
+ * always accepts the offload request unless there are IP options.
+ */
+static int
+can_offload(struct toedev *dev, struct socket *so)
+{
+ struct tom_data *tomd = TOM_DATA(dev);
+ struct t3cdev *cdev = T3CDEV(dev->tod_lldev);
+ struct tid_info *t = &(T3C_DATA(cdev))->tid_maps;
+
+ return so_sotoinpcb(so)->inp_depend4.inp4_options == NULL &&
+ tomd->conf.activated &&
+ (tomd->conf.max_conn < 0 ||
+ atomic_load_acq_int(&t->tids_in_use) + t->atids_in_use < tomd->conf.max_conn);
+}
+
+static int
+tom_ctl(struct toedev *dev, unsigned int req, void *data)
+{
+ struct tom_data *t = TOM_DATA(dev);
+ struct t3cdev *cdev = t->cdev;
+
+ if (cdev->ctl)
+ return cdev->ctl(cdev, req, data);
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Free an active-open TID.
+ */
+void *
+cxgb_free_atid(struct t3cdev *tdev, int atid)
+{
+ struct tid_info *t = &(T3C_DATA(tdev))->tid_maps;
+ union active_open_entry *p = atid2entry(t, atid);
+ void *ctx = p->toe_tid.ctx;
+
+ mtx_lock(&t->atid_lock);
+ p->next = t->afree;
+ t->afree = p;
+ t->atids_in_use--;
+ mtx_unlock(&t->atid_lock);
+
+ return ctx;
+}
+
+/*
+ * Free a server TID and return it to the free pool.
+ */
+void
+cxgb_free_stid(struct t3cdev *tdev, int stid)
+{
+ struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
+ union listen_entry *p = stid2entry(t, stid);
+
+ mtx_lock(&t->stid_lock);
+ p->next = t->sfree;
+ t->sfree = p;
+ t->stids_in_use--;
+ mtx_unlock(&t->stid_lock);
+}
+
+/*
+ * Free a server TID and return it to the free pool.
+ */
+void *
+cxgb_get_lctx(struct t3cdev *tdev, int stid)
+{
+ struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
+ union listen_entry *p = stid2entry(t, stid);
+
+ return (p->toe_tid.ctx);
+}
+
+void
+cxgb_insert_tid(struct t3cdev *tdev, struct cxgb_client *client,
+ void *ctx, unsigned int tid)
+{
+ struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
+
+ t->tid_tab[tid].client = client;
+ t->tid_tab[tid].ctx = ctx;
+ atomic_add_int(&t->tids_in_use, 1);
+}
+
+/* use ctx as a next pointer in the tid release list */
+void
+cxgb_queue_tid_release(struct t3cdev *tdev, unsigned int tid)
+{
+ struct t3c_data *td = T3C_DATA (tdev);
+ struct toe_tid_entry *p = &td->tid_maps.tid_tab[tid];
+
+ CTR0(KTR_TOM, "queuing tid release\n");
+
+ mtx_lock(&td->tid_release_lock);
+ p->ctx = td->tid_release_list;
+ td->tid_release_list = p;
+
+ if (!p->ctx)
+ taskqueue_enqueue(tdev->adapter->tq, &td->tid_release_task);
+
+ mtx_unlock(&td->tid_release_lock);
+}
+
+/*
+ * Remove a tid from the TID table. A client may defer processing its last
+ * CPL message if it is locked at the time it arrives, and while the message
+ * sits in the client's backlog the TID may be reused for another connection.
+ * To handle this we atomically switch the TID association if it still points
+ * to the original client context.
+ */
+void
+cxgb_remove_tid(struct t3cdev *tdev, void *ctx, unsigned int tid)
+{
+ struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
+
+ if (tid >= t->ntids)
+ panic("tid=%d >= t->ntids=%d", tid, t->ntids);
+
+ if (tdev->type == T3A)
+ atomic_cmpset_ptr((uintptr_t *)&t->tid_tab[tid].ctx, (long)NULL, (long)ctx);
+ else {
+ struct mbuf *m;
+
+ m = m_get(M_NOWAIT, MT_DATA);
+ if (__predict_true(m != NULL)) {
+ mk_tid_release(m, tid);
+ CTR1(KTR_CXGB, "releasing tid=%u", tid);
+
+ cxgb_ofld_send(tdev, m);
+ t->tid_tab[tid].ctx = NULL;
+ } else
+ cxgb_queue_tid_release(tdev, tid);
+ }
+ atomic_add_int(&t->tids_in_use, -1);
+}
+
+int
+cxgb_alloc_atid(struct t3cdev *tdev, struct cxgb_client *client,
+ void *ctx)
+{
+ int atid = -1;
+ struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
+
+ mtx_lock(&t->atid_lock);
+ if (t->afree) {
+ union active_open_entry *p = t->afree;
+
+ atid = (p - t->atid_tab) + t->atid_base;
+ t->afree = p->next;
+ p->toe_tid.ctx = ctx;
+ p->toe_tid.client = client;
+ t->atids_in_use++;
+ }
+ mtx_unlock(&t->atid_lock);
+ return atid;
+}
+
+int
+cxgb_alloc_stid(struct t3cdev *tdev, struct cxgb_client *client,
+ void *ctx)
+{
+ int stid = -1;
+ struct tid_info *t = &(T3C_DATA (tdev))->tid_maps;
+
+ mtx_lock(&t->stid_lock);
+ if (t->sfree) {
+ union listen_entry *p = t->sfree;
+
+ stid = (p - t->stid_tab) + t->stid_base;
+ t->sfree = p->next;
+ p->toe_tid.ctx = ctx;
+ p->toe_tid.client = client;
+ t->stids_in_use++;
+ }
+ mtx_unlock(&t->stid_lock);
+ return stid;
+}
+
+
+static int
+is_offloading(struct ifnet *ifp)
+{
+ struct adapter *adapter;
+ int port;
+
+ rw_rlock(&adapter_list_lock);
+ TAILQ_FOREACH(adapter, &adapter_list, adapter_entry) {
+ for_each_port(adapter, port) {
+ if (ifp == adapter->port[port].ifp) {
+ rw_runlock(&adapter_list_lock);
+ return 1;
+ }
+ }
+ }
+ rw_runlock(&adapter_list_lock);
+ return 0;
+}
+
+
+static void
+cxgb_arp_update_event(void *unused, struct rtentry *rt0,
+ uint8_t *enaddr, struct sockaddr *sa)
+{
+
+ if (!is_offloading(rt0->rt_ifp))
+ return;
+
+ RT_ADDREF(rt0);
+ RT_UNLOCK(rt0);
+ cxgb_neigh_update(rt0, enaddr, sa);
+ RT_LOCK(rt0);
+ RT_REMREF(rt0);
+}
+
+static void
+cxgb_redirect_event(void *unused, int event, struct rtentry *rt0,
+ struct rtentry *rt1, struct sockaddr *sa)
+{
+ /*
+ * ignore events on non-offloaded interfaces
+ */
+ if (!is_offloading(rt0->rt_ifp))
+ return;
+
+ /*
+ * Cannot redirect to non-offload device.
+ */
+ if (!is_offloading(rt1->rt_ifp)) {
+ log(LOG_WARNING, "%s: Redirect to non-offload"
+ "device ignored.\n", __FUNCTION__);
+ return;
+ }
+
+ /*
+ * avoid LORs by dropping the route lock but keeping a reference
+ *
+ */
+ RT_ADDREF(rt0);
+ RT_UNLOCK(rt0);
+ RT_ADDREF(rt1);
+ RT_UNLOCK(rt1);
+
+ cxgb_redirect(rt0, rt1, sa);
+ cxgb_neigh_update(rt1, NULL, sa);
+
+ RT_LOCK(rt0);
+ RT_REMREF(rt0);
+ RT_LOCK(rt1);
+ RT_REMREF(rt1);
+}
+
+void
+cxgb_neigh_update(struct rtentry *rt, uint8_t *enaddr, struct sockaddr *sa)
+{
+
+ if (rt->rt_ifp && is_offloading(rt->rt_ifp) && (rt->rt_ifp->if_flags & IFCAP_TOE)) {
+ struct t3cdev *tdev = T3CDEV(rt->rt_ifp);
+
+ PANIC_IF(!tdev);
+ t3_l2t_update(tdev, rt, enaddr, sa);
+ }
+}
+
+static void
+set_l2t_ix(struct t3cdev *tdev, u32 tid, struct l2t_entry *e)
+{
+ struct mbuf *m;
+ struct cpl_set_tcb_field *req;
+
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (!m) {
+ log(LOG_ERR, "%s: cannot allocate mbuf!\n", __FUNCTION__);
+ return;
+ }
+
+ m_set_priority(m, CPL_PRIORITY_CONTROL);
+ req = mtod(m, struct cpl_set_tcb_field *);
+ m->m_pkthdr.len = m->m_len = sizeof(*req);
+
+ req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
+ req->reply = 0;
+ req->cpu_idx = 0;
+ req->word = htons(W_TCB_L2T_IX);
+ req->mask = htobe64(V_TCB_L2T_IX(M_TCB_L2T_IX));
+ req->val = htobe64(V_TCB_L2T_IX(e->idx));
+ tdev->send(tdev, m);
+}
+
+void
+cxgb_redirect(struct rtentry *old, struct rtentry *new, struct sockaddr *sa)
+{
+ struct ifnet *olddev, *newdev;
+ struct tid_info *ti;
+ struct t3cdev *tdev;
+ u32 tid;
+ int update_tcb;
+ struct l2t_entry *e;
+ struct toe_tid_entry *te;
+
+ olddev = old->rt_ifp;
+ newdev = new->rt_ifp;
+ if (!is_offloading(olddev))
+ return;
+ if (!is_offloading(newdev)) {
+ log(LOG_WARNING, "%s: Redirect to non-offload"
+ "device ignored.\n", __FUNCTION__);
+ return;
+ }
+ tdev = T3CDEV(olddev);
+ PANIC_IF(!tdev);
+ if (tdev != T3CDEV(newdev)) {
+ log(LOG_WARNING, "%s: Redirect to different "
+ "offload device ignored.\n", __FUNCTION__);
+ return;
+ }
+
+ /* Add new L2T entry */
+ e = t3_l2t_get(tdev, new, new->rt_ifp, sa);
+ if (!e) {
+ log(LOG_ERR, "%s: couldn't allocate new l2t entry!\n",
+ __FUNCTION__);
+ return;
+ }
+
+ /* Walk tid table and notify clients of dst change. */
+ ti = &(T3C_DATA (tdev))->tid_maps;
+ for (tid=0; tid < ti->ntids; tid++) {
+ te = lookup_tid(ti, tid);
+ PANIC_IF(!te);
+ if (te->ctx && te->client && te->client->redirect) {
+ update_tcb = te->client->redirect(te->ctx, old, new,
+ e);
+ if (update_tcb) {
+ l2t_hold(L2DATA(tdev), e);
+ set_l2t_ix(tdev, tid, e);
+ }
+ }
+ }
+ l2t_release(L2DATA(tdev), e);
+}
+
+/*
+ * Initialize the CPL dispatch table.
+ */
+static void
+init_cpl_handlers(void)
+{
+ int i;
+
+ for (i = 0; i < 256; ++i)
+ tom_cpl_handlers[i] = do_bad_cpl;
+
+ t3_init_listen_cpl_handlers();
+}
+
+static int
+t3_toe_attach(struct toedev *dev, const struct offload_id *entry)
+{
+ struct tom_data *t = TOM_DATA(dev);
+ struct t3cdev *cdev = t->cdev;
+ struct ddp_params ddp;
+ struct ofld_page_info rx_page_info;
+ int err;
+
+ t3_init_tunables(t);
+ mtx_init(&t->listen_lock, "tom data listeners", NULL, MTX_DEF);
+ CTR2(KTR_TOM, "t3_toe_attach dev=%p entry=%p", dev, entry);
+ /* Adjust TOE activation for this module */
+ t->conf.activated = activated;
+
+ dev->tod_can_offload = can_offload;
+ dev->tod_connect = t3_connect;
+ dev->tod_ctl = tom_ctl;
+#if 0
+ dev->tod_failover = t3_failover;
+#endif
+ err = cdev->ctl(cdev, GET_DDP_PARAMS, &ddp);
+ if (err)
+ return err;
+
+ err = cdev->ctl(cdev, GET_RX_PAGE_INFO, &rx_page_info);
+ if (err)
+ return err;
+
+ t->ddp_llimit = ddp.llimit;
+ t->ddp_ulimit = ddp.ulimit;
+ t->pdev = ddp.pdev;
+ t->rx_page_size = rx_page_info.page_size;
+ /* OK if this fails, we just can't do DDP */
+ t->nppods = (ddp.ulimit + 1 - ddp.llimit) / PPOD_SIZE;
+ t->ppod_map = malloc(t->nppods, M_DEVBUF, M_NOWAIT|M_ZERO);
+
+ mtx_init(&t->ppod_map_lock, "ppod map", NULL, MTX_DEF);
+
+
+ t3_sysctl_register(cdev->adapter, &t->conf);
+ return (0);
+}
+
+static void
+cxgb_toe_listen_start(void *unused, struct tcpcb *tp)
+{
+ struct socket *so = inp_inpcbtosocket(tp->t_inpcb);
+ struct tom_data *p;
+
+ mtx_lock(&cxgb_list_lock);
+ TAILQ_FOREACH(p, &cxgb_list, entry) {
+ t3_listen_start(&p->tdev, so, p->cdev);
+ }
+ mtx_unlock(&cxgb_list_lock);
+}
+
+static void
+cxgb_toe_listen_stop(void *unused, struct tcpcb *tp)
+{
+ struct socket *so = inp_inpcbtosocket(tp->t_inpcb);
+ struct tom_data *p;
+
+ mtx_lock(&cxgb_list_lock);
+ TAILQ_FOREACH(p, &cxgb_list, entry) {
+ if (tp->t_state == TCPS_LISTEN)
+ t3_listen_stop(&p->tdev, so, p->cdev);
+ }
+ mtx_unlock(&cxgb_list_lock);
+}
+
+static void
+cxgb_toe_listen_start_handler(struct inpcb *inp, void *arg)
+{
+ struct tcpcb *tp = intotcpcb(inp);
+
+ if (tp->t_state == TCPS_LISTEN)
+ cxgb_toe_listen_start(NULL, tp);
+}
+
+static void
+cxgb_register_listeners(void)
+{
+
+ inp_apply_all(cxgb_toe_listen_start_handler, NULL);
+}
+
+static int
+t3_tom_init(void)
+{
+ init_cpl_handlers();
+ if (t3_init_cpl_io() < 0) {
+ log(LOG_ERR,
+ "Unable to initialize cpl io ops\n");
+ return -1;
+ }
+ t3_init_socket_ops();
+
+ /* Register with the TOE device layer. */
+
+ if (register_tom(&t3_tom_info) != 0) {
+ log(LOG_ERR,
+ "Unable to register Chelsio T3 TCP offload module.\n");
+ return -1;
+ }
+
+ rw_init(&adapter_list_lock, "ofld adap list");
+ TAILQ_INIT(&adapter_list);
+ EVENTHANDLER_REGISTER(route_arp_update_event, cxgb_arp_update_event,
+ NULL, EVENTHANDLER_PRI_ANY);
+ EVENTHANDLER_REGISTER(route_redirect_event, cxgb_redirect_event,
+ NULL, EVENTHANDLER_PRI_ANY);
+
+ mtx_init(&cxgb_list_lock, "cxgb tom list", NULL, MTX_DEF);
+ listen_tag = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
+ cxgb_toe_listen_start, NULL, EVENTHANDLER_PRI_ANY);
+ listen_tag = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
+ cxgb_toe_listen_stop, NULL, EVENTHANDLER_PRI_ANY);
+ TAILQ_INIT(&cxgb_list);
+
+
+
+ t3_register_cpl_handler(CPL_PASS_OPEN_RPL, do_stid_rpl);
+ t3_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_stid_rpl);
+ t3_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_cr);
+ t3_register_cpl_handler(CPL_PASS_ESTABLISH, do_hwtid_rpl);
+ t3_register_cpl_handler(CPL_ABORT_RPL_RSS, do_hwtid_rpl);
+ t3_register_cpl_handler(CPL_ABORT_RPL, do_hwtid_rpl);
+ t3_register_cpl_handler(CPL_RX_URG_NOTIFY, do_hwtid_rpl);
+ t3_register_cpl_handler(CPL_RX_DATA, do_hwtid_rpl);
+ t3_register_cpl_handler(CPL_TX_DATA_ACK, do_hwtid_rpl);
+ t3_register_cpl_handler(CPL_TX_DMA_ACK, do_hwtid_rpl);
+ t3_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl);
+ t3_register_cpl_handler(CPL_PEER_CLOSE, do_hwtid_rpl);
+ t3_register_cpl_handler(CPL_CLOSE_CON_RPL, do_hwtid_rpl);
+ t3_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req_rss);
+ t3_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish);
+ t3_register_cpl_handler(CPL_RDMA_TERMINATE, do_term);
+ t3_register_cpl_handler(CPL_RDMA_EC_STATUS, do_hwtid_rpl);
+ t3_register_cpl_handler(CPL_RX_DATA_DDP, do_hwtid_rpl);
+ t3_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_hwtid_rpl);
+ t3_register_cpl_handler(CPL_ISCSI_HDR, do_hwtid_rpl);
+ t3_register_cpl_handler(CPL_GET_TCB_RPL, do_hwtid_rpl);
+ t3_register_cpl_handler(CPL_SET_TCB_RPL, do_hwtid_rpl);
+
+ /* Register to offloading devices */
+ cxgb_register_client(&t3c_tom_client);
+
+ return (0);
+}
+
+static int
+t3_tom_load(module_t mod, int cmd, void *arg)
+{
+ int err = 0;
+
+ switch (cmd) {
+ case MOD_LOAD:
+ t3_tom_init();
+ break;
+ case MOD_QUIESCE:
+ break;
+ case MOD_UNLOAD:
+ printf("uhm, ... unloading isn't really supported for toe\n");
+ break;
+ case MOD_SHUTDOWN:
+ break;
+ default:
+ err = EOPNOTSUPP;
+ break;
+ }
+
+ return (err);
+}
+
+static moduledata_t mod_data= {
+ "t3_tom",
+ t3_tom_load,
+ 0
+};
+MODULE_VERSION(t3_tom, 1);
+MODULE_DEPEND(t3_tom, toecore, 1, 1, 1);
+MODULE_DEPEND(t3_tom, if_cxgb, 1, 1, 1);
+DECLARE_MODULE(t3_tom, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
+
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tom.h b/sys/dev/cxgb/ulp/tom/cxgb_tom.h
new file mode 100644
index 0000000000000..bcda2c3c57aaa
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_tom.h
@@ -0,0 +1,159 @@
+
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+
+$FreeBSD$
+
+***************************************************************************/
+#ifndef CXGB_TOM_H_
+#define CXGB_TOM_H_
+#include <sys/protosw.h>
+
+#define LISTEN_INFO_HASH_SIZE 32
+
+struct listen_info {
+ struct listen_info *next; /* Link to next entry */
+ struct socket *so; /* The listening socket */
+ unsigned int stid; /* The server TID */
+};
+
+
+/*
+ * TOM tunable parameters. They can be manipulated through sysctl(2) or /proc.
+ */
+struct tom_tunables {
+ int max_host_sndbuf; // max host RAM consumed by a sndbuf
+ int tx_hold_thres; // push/pull threshold for non-full TX sk_buffs
+ int max_wrs; // max # of outstanding WRs per connection
+ int rx_credit_thres; // min # of RX credits needed for RX_DATA_ACK
+ int cong_alg; // Congestion control algorithm
+ int mss; // max TX_DATA WR payload size
+ int delack; // delayed ACK control
+ int max_conn; // maximum number of offloaded connections
+ int soft_backlog_limit; // whether the listen backlog limit is soft
+ int ddp; // whether to put new connections in DDP mode
+ int ddp_thres; // min recvmsg size before activating DDP
+ int ddp_copy_limit; // capacity of kernel DDP buffer
+ int ddp_push_wait; // whether blocking DDP waits for PSH flag
+ int ddp_rcvcoalesce; // whether receive coalescing is enabled
+ int zcopy_sosend_enabled; // < is never zcopied
+ int zcopy_sosend_partial_thres; // < is never zcopied
+ int zcopy_sosend_partial_copy; // bytes copied in partial zcopy
+ int zcopy_sosend_thres;// >= are mostly zcopied
+ int zcopy_sosend_copy; // bytes coped in zcopied
+ int zcopy_sosend_ret_pending_dma;// pot. return while pending DMA
+ int activated; // TOE engine activation state
+};
+
+struct tom_data {
+ TAILQ_ENTRY(tom_data) entry;
+
+ struct t3cdev *cdev;
+ struct pci_dev *pdev;
+ struct toedev tdev;
+
+ struct cxgb_client *client;
+ struct tom_tunables conf;
+ struct tom_sysctl_table *sysctl;
+
+ /*
+ * The next three locks listen_lock, deferq.lock, and tid_release_lock
+ * are used rarely so we let them potentially share a cacheline.
+ */
+
+ struct listen_info *listen_hash_tab[LISTEN_INFO_HASH_SIZE];
+ struct mtx listen_lock;
+
+ struct mbuf_head deferq;
+ struct task deferq_task;
+
+ struct socket **tid_release_list;
+ struct mtx tid_release_lock;
+ struct task tid_release_task;
+
+ volatile int tx_dma_pending;
+
+ unsigned int ddp_llimit;
+ unsigned int ddp_ulimit;
+
+ unsigned int rx_page_size;
+
+ u8 *ppod_map;
+ unsigned int nppods;
+ struct mtx ppod_map_lock;
+
+ struct adap_ports *ports;
+ struct taskqueue *tq;
+};
+
+
+struct listen_ctx {
+ struct socket *lso;
+ struct tom_data *tom_data;
+ int ulp_mode;
+ LIST_HEAD(, toepcb) synq_head;
+
+};
+
+#define TOM_DATA(dev) (*(struct tom_data **)&(dev)->tod_l4opt)
+#define T3C_DEV(sk) ((TOM_DATA(TOE_DEV(sk)))->cdev)
+#define TOEP_T3C_DEV(toep) (TOM_DATA(toep->tp_toedev)->cdev)
+#define TOM_TUNABLE(dev, param) (TOM_DATA(dev)->conf.param)
+
+#define TP_DATASENT (1 << 0)
+#define TP_TX_WAIT_IDLE (1 << 1)
+#define TP_FIN_SENT (1 << 2)
+#define TP_ABORT_RPL_PENDING (1 << 3)
+#define TP_ABORT_SHUTDOWN (1 << 4)
+#define TP_ABORT_RPL_RCVD (1 << 5)
+#define TP_ABORT_REQ_RCVD (1 << 6)
+#define TP_CLOSE_CON_REQUESTED (1 << 7)
+#define TP_SYN_RCVD (1 << 8)
+#define TP_ESTABLISHED (1 << 9)
+
+void t3_init_tunables(struct tom_data *t);
+
+void t3_sysctl_register(struct adapter *sc, const struct tom_tunables *p);
+
+static __inline struct mbuf *
+m_gethdr_nofail(int len)
+{
+ struct mbuf *m;
+
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (m == NULL) {
+ panic("implement lowmem cache\n");
+ }
+
+ KASSERT(len < MHLEN, ("requested header size too large for mbuf"));
+ m->m_pkthdr.len = m->m_len = len;
+ return (m);
+}
+
+
+#endif
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c b/sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c
new file mode 100644
index 0000000000000..1490bfbdc29bd
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_tom_sysctl.c
@@ -0,0 +1,119 @@
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/fcntl.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/socketvar.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+
+#include <dev/cxgb/cxgb_osdep.h>
+#include <dev/cxgb/sys/mbufq.h>
+
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_fsm.h>
+#include <net/route.h>
+
+#include <dev/cxgb/t3cdev.h>
+#include <dev/cxgb/common/cxgb_firmware_exports.h>
+#include <dev/cxgb/common/cxgb_tcb.h>
+#include <dev/cxgb/common/cxgb_ctl_defs.h>
+#include <dev/cxgb/common/cxgb_t3_cpl.h>
+#include <dev/cxgb/cxgb_offload.h>
+#include <dev/cxgb/cxgb_include.h>
+#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
+#include <dev/cxgb/ulp/tom/cxgb_tom.h>
+#include <dev/cxgb/ulp/tom/cxgb_defs.h>
+#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
+
+static struct tom_tunables default_tunable_vals = {
+ .max_host_sndbuf = 32 * 1024,
+ .tx_hold_thres = 0,
+ .max_wrs = 15,
+ .rx_credit_thres = 15 * 1024,
+ .cong_alg = -1,
+ .mss = 16384,
+ .delack = 1,
+ .max_conn = -1,
+ .soft_backlog_limit = 0,
+ .ddp = 1,
+ .ddp_thres = 14 * 4096,
+ .ddp_copy_limit = 13 * 4096,
+ .ddp_push_wait = 1,
+ .ddp_rcvcoalesce = 0,
+ .zcopy_sosend_enabled = 0,
+ .zcopy_sosend_partial_thres = 40960,
+ .zcopy_sosend_partial_copy = 4096 * 3,
+ .zcopy_sosend_thres = 128 * 1024,
+ .zcopy_sosend_copy = 4096 * 2,
+ .zcopy_sosend_ret_pending_dma = 1,
+ .activated = 1,
+};
+
+void
+t3_init_tunables(struct tom_data *t)
+{
+ t->conf = default_tunable_vals;
+
+ /* Now apply device specific fixups. */
+ t->conf.mss = T3C_DATA(t->cdev)->tx_max_chunk;
+ t->conf.max_wrs = T3C_DATA(t->cdev)->max_wrs;
+}
+
+void
+t3_sysctl_register(struct adapter *sc, const struct tom_tunables *p)
+{
+ struct sysctl_ctx_list *ctx;
+ struct sysctl_oid_list *children;
+
+ ctx = device_get_sysctl_ctx(sc->dev);
+ children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
+
+}
+
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_vm.c b/sys/dev/cxgb/ulp/tom/cxgb_vm.c
new file mode 100644
index 0000000000000..7036005e93e04
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_vm.c
@@ -0,0 +1,180 @@
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/fcntl.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/condvar.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+#include <dev/cxgb/ulp/tom/cxgb_vm.h>
+
+#define TRACE_ENTER printf("%s:%s entered", __FUNCTION__, __FILE__)
+#define TRACE_EXIT printf("%s:%s:%d exited", __FUNCTION__, __FILE__, __LINE__)
+
+/*
+ * This routine takes a user address range and does the following:
+ * - validate that the user has access to those pages (flags indicates read or write) - if not fail
+ * - validate that count is enough to hold range number of pages - if not fail
+ * - fault in any non-resident pages
+ * - if the user is doing a read force a write fault for any COWed pages
+ * - if the user is doing a read mark all pages as dirty
+ * - hold all pages
+ * - return number of pages in count
+ */
+int
+vm_fault_hold_user_pages(vm_offset_t addr, vm_page_t *mp, int count, int flags)
+{
+
+ vm_offset_t end, va;
+ vm_paddr_t pa;
+ int faults, rv;
+
+ struct thread *td;
+ vm_map_t map;
+ pmap_t pmap;
+ vm_page_t m, *pages;
+ vm_prot_t prot;
+
+
+ /*
+ * Check that virtual address range is legal
+ * This check is somewhat bogus as on some architectures kernel
+ * and user do not share VA - however, it appears that all FreeBSD
+ * architectures define it
+ */
+ end = addr + (count * PAGE_SIZE);
+ if (end > VM_MAXUSER_ADDRESS) {
+ printf("bad address passed\n");
+ return (EFAULT);
+ }
+
+ td = curthread;
+ map = &td->td_proc->p_vmspace->vm_map;
+ pmap = &td->td_proc->p_vmspace->vm_pmap;
+ pages = mp;
+
+ prot = VM_PROT_READ;
+ prot |= (flags & VM_HOLD_WRITEABLE) ? VM_PROT_WRITE : 0;
+ bzero(pages, sizeof(vm_page_t *) * count);
+retry:
+
+ /*
+ * First optimistically assume that all pages are resident (and R/W if for write)
+ * if so just mark pages as held (and dirty if for write) and return
+ */
+ vm_page_lock_queues();
+ for (pages = mp, faults = 0, va = addr; va < end; va += PAGE_SIZE, pages++) {
+ /*
+ * Assure that we only hold the page once
+ */
+ if (*pages == NULL) {
+ /*
+ * page queue mutex is recursable so this is OK
+ * it would be really nice if we had an unlocked version of this so
+ * we were only acquiring the pmap lock 1 time as opposed to potentially
+ * many dozens of times
+ */
+ m = pmap_extract_and_hold(pmap, va, prot);
+ if (m == NULL) {
+ faults++;
+ continue;
+ }
+
+ *pages = m;
+ if (flags & VM_HOLD_WRITEABLE)
+ vm_page_dirty(m);
+ }
+ }
+ vm_page_unlock_queues();
+
+ if (faults == 0) {
+ return (0);
+ }
+
+ /*
+ * Pages either have insufficient permissions or are not present
+ * trigger a fault where neccessary
+ *
+ */
+ for (va = addr; va < end; va += PAGE_SIZE) {
+ m = NULL;
+ pa = pmap_extract(pmap, va);
+ rv = 0;
+ if (pa)
+ m = PHYS_TO_VM_PAGE(pa);
+ if (flags & VM_HOLD_WRITEABLE) {
+ if (m == NULL || (m->flags & PG_WRITEABLE) == 0)
+ rv = vm_fault(map, va, VM_PROT_WRITE, VM_FAULT_DIRTY);
+ } else if (m == NULL)
+ rv = vm_fault(map, va, VM_PROT_READ, VM_FAULT_NORMAL);
+ if (rv) {
+ printf("vm_fault bad return rv=%d va=0x%zx\n", rv, va);
+
+ goto error;
+ }
+ }
+
+ goto retry;
+
+error:
+ vm_page_lock_queues();
+ for (pages = mp, va = addr; va < end; va += PAGE_SIZE, pages++)
+ if (*pages)
+ vm_page_unhold(*pages);
+ vm_page_unlock_queues();
+ return (EFAULT);
+}
+
+void
+vm_fault_unhold_pages(vm_page_t *mp, int count)
+{
+
+ KASSERT(count >= 0, ("negative count %d", count));
+ vm_page_lock_queues();
+ while (count--) {
+ vm_page_unhold(*mp);
+ mp++;
+ }
+ vm_page_unlock_queues();
+}
diff --git a/sys/dev/cxgb/ulp/tom/cxgb_vm.h b/sys/dev/cxgb/ulp/tom/cxgb_vm.h
new file mode 100644
index 0000000000000..29418b616fd4f
--- /dev/null
+++ b/sys/dev/cxgb/ulp/tom/cxgb_vm.h
@@ -0,0 +1,40 @@
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+
+$FreeBSD$
+
+***************************************************************************/
+#ifndef CXGB_VM_H_
+#define CXGB_VM_H_
+
+#define VM_HOLD_WRITEABLE 0x1
+
+int vm_fault_hold_user_pages(vm_offset_t addr, vm_page_t *mp, int count, int flags);
+void vm_fault_unhold_pages(vm_page_t *mp, int count);
+
+#endif
diff --git a/sys/modules/cxgb/Makefile b/sys/modules/cxgb/Makefile
index 6e35a6ee0d86a..85c6f4875d5a1 100644
--- a/sys/modules/cxgb/Makefile
+++ b/sys/modules/cxgb/Makefile
@@ -1,5 +1,16 @@
# $FreeBSD$
SUBDIR= cxgb
+#SUBDIR+= toecore
+#SUBDIR+= tom
+#SUBDIR+= ${_iw_cxgb}
SUBDIR+= cxgb_t3fw
+.if ${MACHINE_ARCH} == "i386"
+_iw_cxgb = iw_cxgb
+.endif
+
+.if ${MACHINE_ARCH} == "amd64"
+_iw_cxgb = iw_cxgb
+.endif
+
.include <bsd.subdir.mk>
diff --git a/sys/modules/cxgb/cxgb/Makefile b/sys/modules/cxgb/cxgb/Makefile
index 039032da8f6a7..64044e86e4d00 100644
--- a/sys/modules/cxgb/cxgb/Makefile
+++ b/sys/modules/cxgb/cxgb/Makefile
@@ -4,15 +4,21 @@ CXGB = ${.CURDIR}/../../../dev/cxgb
.PATH: ${CXGB} ${CXGB}/common ${CXGB}/sys
KMOD= if_cxgb
-SRCS= cxgb_mc5.c cxgb_vsc8211.c cxgb_ael1002.c cxgb_mv88e1xxx.c
+SRCS= cxgb_mc5.c cxgb_vsc8211.c cxgb_ael1002.c cxgb_mv88e1xxx.c
SRCS+= cxgb_xgmac.c cxgb_vsc7323.c cxgb_t3_hw.c cxgb_main.c
-SRCS+= cxgb_sge.c cxgb_lro.c cxgb_offload.c
-SRCS+= device_if.h bus_if.h pci_if.h opt_zero.h
-SRCS+= uipc_mvec.c cxgb_support.c cxgb_multiq.c
-
-CFLAGS+= -DCONFIG_CHELSIO_T3_CORE -g -DCONFIG_DEFINED -DDEFAULT_JUMBO -I${CXGB}
+SRCS+= cxgb_sge.c cxgb_lro.c cxgb_offload.c cxgb_tn1010.c
+SRCS+= device_if.h bus_if.h pci_if.h opt_zero.h opt_sched.h
+SRCS+= uipc_mvec.c cxgb_support.c cxgb_multiq.c
+CFLAGS+= -DCONFIG_CHELSIO_T3_CORE -g -DCONFIG_DEFINED -DDEFAULT_JUMBO -I${CXGB} -DSMP
+CFLAGS+= -DDISABLE_MBUF_IOVEC
+#CFLAGS+= -DIFNET_MULTIQUEUE
+#CFLAGS+= -DDISABLE_MBUF_IOVEC
#CFLAGS+= -DDEBUG -DDEBUG_PRINT
+#CFLAGS+= -DINVARIANT_SUPPORT -DINVARIANTS
+#CFLAGS+= -DWITNESS
+#CFLAGS += -DLOCK_PROFILING
+#CFLAGS+= -DWITNESS
#CFLAGS+= -DINVARIANT_SUPPORT -DINVARIANTS -DWITNESS
-.include <bsd.kmod.mk> \ No newline at end of file
+.include <bsd.kmod.mk>
diff --git a/sys/modules/cxgb/cxgb_t3fw/Makefile b/sys/modules/cxgb/cxgb_t3fw/Makefile
index 787c9d41fbcc2..c35d73a41f025 100644
--- a/sys/modules/cxgb/cxgb_t3fw/Makefile
+++ b/sys/modules/cxgb/cxgb_t3fw/Makefile
@@ -3,6 +3,7 @@
CXGB = ${.CURDIR}/../../../dev/cxgb
.PATH: ${CXGB}
+KMOD= cxgb_t3fw
SRCS+= cxgb_t3fw.c
.include <bsd.kmod.mk>
diff --git a/sys/modules/cxgb/iw_cxgb/Makefile b/sys/modules/cxgb/iw_cxgb/Makefile
new file mode 100644
index 0000000000000..e1123bba3faff
--- /dev/null
+++ b/sys/modules/cxgb/iw_cxgb/Makefile
@@ -0,0 +1,14 @@
+# $FreeBSD$
+
+CXGB = ${.CURDIR}/../../../dev/cxgb
+.PATH: ${IW_CXGB} ${CXGB}/common ${CXGB}/ulp/iw_cxgb
+
+KMOD= iw_cxgb
+SRCS= iw_cxgb.c iw_cxgb_cm.c iw_cxgb_hal.c
+SRCS+= iw_cxgb_provider.c iw_cxgb_qp.c iw_cxgb_resource.c
+SRCS+= iw_cxgb_ev.c iw_cxgb_mem.c iw_cxgb_dbg.c iw_cxgb_cq.c
+SRCS+= bus_if.h device_if.h opt_sched.h pci_if.h pcib_if.h opt_ktr.h
+CFLAGS+= -DCONFIG_CHELSIO_T3_CORE -g -DCONFIG_DEFINED -I${CXGB} -DSMP
+#CFLAGS+= -DDEBUG
+
+.include <bsd.kmod.mk>
diff --git a/sys/modules/cxgb/toecore/Makefile b/sys/modules/cxgb/toecore/Makefile
new file mode 100644
index 0000000000000..1c05d799a5f88
--- /dev/null
+++ b/sys/modules/cxgb/toecore/Makefile
@@ -0,0 +1,8 @@
+# $FreeBSD$
+TOECORE = ${.CURDIR}/../../../dev/cxgb/ulp/toecore
+.PATH: ${TOECORE}
+
+KMOD= toecore
+SRCS= toedev.c
+SRCS+= device_if.h bus_if.h pci_if.h opt_sched.h
+.include <bsd.kmod.mk> \ No newline at end of file
diff --git a/sys/modules/cxgb/tom/Makefile b/sys/modules/cxgb/tom/Makefile
new file mode 100644
index 0000000000000..2417edf1fc40e
--- /dev/null
+++ b/sys/modules/cxgb/tom/Makefile
@@ -0,0 +1,14 @@
+# $FreeBSD$
+
+TOM = ${.CURDIR}/../../../dev/cxgb/ulp/tom
+.PATH: ${TOM}
+
+KMOD= tom
+SRCS= cxgb_tom.c cxgb_cpl_io.c cxgb_listen.c cxgb_tom_sysctl.c cxgb_cpl_socket.c
+SRCS+= cxgb_ddp.c cxgb_vm.c cxgb_l2t.c cxgb_tcp_offload.c
+SRCS+= opt_compat.h opt_inet.h opt_inet6.h opt_ipsec.h opt_mac.h
+SRCS+= opt_tcpdebug.h opt_ddb.h opt_sched.h opt_global.h opt_ktr.h
+SRCS+= device_if.h bus_if.h pci_if.h
+
+#CFLAGS+= -DDEBUG_PRINT -DDEBUG
+.include <bsd.kmod.mk>