summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorNavdeep Parhar <np@FreeBSD.org>2018-04-14 19:07:56 +0000
committerNavdeep Parhar <np@FreeBSD.org>2018-04-14 19:07:56 +0000
commit1131c927c4c388c75c728d443cab7ff0482a1c50 (patch)
tree61235e95df1bd31f983d62f3d73498251bb80c05 /sys
parent23084818ff0f840888490236a1a95bacd11a2a81 (diff)
downloadsrc-test2-1131c927c4c388c75c728d443cab7ff0482a1c50.tar.gz
src-test2-1131c927c4c388c75c728d443cab7ff0482a1c50.zip
Notes
Diffstat (limited to 'sys')
-rw-r--r--sys/dev/cxgbe/adapter.h5
-rw-r--r--sys/dev/cxgbe/offload.h1
-rw-r--r--sys/dev/cxgbe/t4_ioctl.h40
-rw-r--r--sys/dev/cxgbe/t4_main.c137
-rw-r--r--sys/dev/cxgbe/t4_sge.c6
-rw-r--r--sys/dev/cxgbe/tom/t4_connect.c110
-rw-r--r--sys/dev/cxgbe/tom/t4_cpl_io.c17
-rw-r--r--sys/dev/cxgbe/tom/t4_listen.c138
-rw-r--r--sys/dev/cxgbe/tom/t4_tom.c244
-rw-r--r--sys/dev/cxgbe/tom/t4_tom.h11
10 files changed, 605 insertions, 104 deletions
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h
index bf0dda5a0f43..34db6e8218ac 100644
--- a/sys/dev/cxgbe/adapter.h
+++ b/sys/dev/cxgbe/adapter.h
@@ -804,8 +804,11 @@ struct adapter {
void *tom_softc; /* (struct tom_data *) */
struct tom_tunables tt;
- struct iw_tunables iwt;
+ struct t4_offload_policy *policy;
+ struct rwlock policy_lock;
+
void *iwarp_softc; /* (struct c4iw_dev *) */
+ struct iw_tunables iwt;
void *iscsi_ulp_softc; /* (struct cxgbei_data *) */
void *ccr_softc; /* (struct ccr_softc *) */
struct l2t_data *l2t; /* L2 table */
diff --git a/sys/dev/cxgbe/offload.h b/sys/dev/cxgbe/offload.h
index 431e486e2afa..cd27d7de6e24 100644
--- a/sys/dev/cxgbe/offload.h
+++ b/sys/dev/cxgbe/offload.h
@@ -156,6 +156,7 @@ struct tom_tunables {
int num_tls_rx_ports;
int tx_align;
int tx_zcopy;
+ int cop_managed_offloading;
};
/* iWARP driver tunables */
struct iw_tunables {
diff --git a/sys/dev/cxgbe/t4_ioctl.h b/sys/dev/cxgbe/t4_ioctl.h
index 8fa92935c62b..b7b8ce5fbe24 100644
--- a/sys/dev/cxgbe/t4_ioctl.h
+++ b/sys/dev/cxgbe/t4_ioctl.h
@@ -35,6 +35,7 @@
#include <sys/types.h>
#include <net/ethernet.h>
+#include <net/bpf.h>
/*
* Ioctl commands specific to this driver.
@@ -344,6 +345,44 @@ struct t4_cudbg_dump {
uint8_t *data;
};
+enum {
+ OPEN_TYPE_LISTEN = 'L',
+ OPEN_TYPE_ACTIVE = 'A',
+ OPEN_TYPE_PASSIVE = 'P',
+ OPEN_TYPE_DONTCARE = 'D',
+};
+
+struct offload_settings {
+ int8_t offload;
+ int8_t rx_coalesce;
+ int8_t cong_algo;
+ int8_t sched_class;
+ int8_t tstamp;
+ int8_t sack;
+ int8_t nagle;
+ int8_t ecn;
+ int8_t ddp;
+ int8_t tls;
+ int16_t txq;
+ int16_t rxq;
+ int16_t mss;
+};
+
+struct offload_rule {
+ char open_type;
+ struct offload_settings settings;
+ struct bpf_program bpf_prog; /* compiled program/filter */
+};
+
+/*
+ * An offload policy consists of a set of rules matched in sequence. The
+ * settings of the first rule that matches are applied to that connection.
+ */
+struct t4_offload_policy {
+ uint32_t nrules;
+ struct offload_rule *rule;
+};
+
#define CHELSIO_T4_GETREG _IOWR('f', T4_GETREG, struct t4_reg)
#define CHELSIO_T4_SETREG _IOW('f', T4_SETREG, struct t4_reg)
#define CHELSIO_T4_REGDUMP _IOWR('f', T4_REGDUMP, struct t4_regdump)
@@ -368,4 +407,5 @@ struct t4_cudbg_dump {
#define CHELSIO_T4_LOAD_BOOT _IOW('f', T4_LOAD_BOOT, struct t4_bootrom)
#define CHELSIO_T4_LOAD_BOOTCFG _IOW('f', T4_LOAD_BOOTCFG, struct t4_data)
#define CHELSIO_T4_CUDBG_DUMP _IOWR('f', T4_CUDBG_DUMP, struct t4_cudbg_dump)
+#define CHELSIO_T4_SET_OFLD_POLICY _IOW('f', T4_SET_OFLD_POLICY, struct t4_offload_policy)
#endif
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index 9fa4e9efc20c..7ab5c4d76e27 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -470,6 +470,14 @@ static int pcie_relaxed_ordering = -1;
TUNABLE_INT("hw.cxgbe.pcie_relaxed_ordering", &pcie_relaxed_ordering);
+#ifdef TCP_OFFLOAD
+/*
+ * TOE tunables.
+ */
+static int t4_cop_managed_offloading = 0;
+TUNABLE_INT("hw.cxgbe.cop_managed_offloading", &t4_cop_managed_offloading);
+#endif
+
/* Functions used by VIs to obtain unique MAC addresses for each VI. */
static int vi_mac_funcs[] = {
FW_VI_FUNC_ETH,
@@ -617,6 +625,8 @@ static int load_cfg(struct adapter *, struct t4_data *);
static int load_boot(struct adapter *, struct t4_bootrom *);
static int load_bootcfg(struct adapter *, struct t4_data *);
static int cudbg_dump(struct adapter *, struct t4_cudbg_dump *);
+static void free_offload_policy(struct t4_offload_policy *);
+static int set_offload_policy(struct adapter *, struct t4_offload_policy *);
static int read_card_mem(struct adapter *, int, struct t4_mem_range *);
static int read_i2c(struct adapter *, struct t4_i2c_data *);
#ifdef TCP_OFFLOAD
@@ -897,6 +907,9 @@ t4_attach(device_t dev)
mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF);
+ sc->policy = NULL;
+ rw_init(&sc->policy_lock, "connection offload policy");
+
rc = t4_map_bars_0_and_4(sc);
if (rc != 0)
goto done; /* error message displayed already */
@@ -1405,6 +1418,14 @@ t4_detach_common(device_t dev)
if (mtx_initialized(&sc->reg_lock))
mtx_destroy(&sc->reg_lock);
+ if (rw_initialized(&sc->policy_lock)) {
+ rw_destroy(&sc->policy_lock);
+#ifdef TCP_OFFLOAD
+ if (sc->policy != NULL)
+ free_offload_policy(sc->policy);
+#endif
+ }
+
for (i = 0; i < NUM_MEMWIN; i++) {
struct memwin *mw = &sc->memwin[i];
@@ -5440,6 +5461,12 @@ t4_sysctls(struct adapter *sc)
CTLFLAG_RW, &sc->tt.tx_zcopy, 0,
"Enable zero-copy aio_write(2)");
+ sc->tt.cop_managed_offloading = !!t4_cop_managed_offloading;
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "cop_managed_offloading", CTLFLAG_RW,
+ &sc->tt.cop_managed_offloading, 0,
+ "COP (Connection Offload Policy) controls all TOE offload");
+
SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick",
CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A",
"TP timer tick (us)");
@@ -9385,6 +9412,113 @@ done:
return (rc);
}
+static void
+free_offload_policy(struct t4_offload_policy *op)
+{
+ struct offload_rule *r;
+ int i;
+
+ if (op == NULL)
+ return;
+
+ r = &op->rule[0];
+ for (i = 0; i < op->nrules; i++, r++) {
+ free(r->bpf_prog.bf_insns, M_CXGBE);
+ }
+ free(op->rule, M_CXGBE);
+ free(op, M_CXGBE);
+}
+
+static int
+set_offload_policy(struct adapter *sc, struct t4_offload_policy *uop)
+{
+ int i, rc, len;
+ struct t4_offload_policy *op, *old;
+ struct bpf_program *bf;
+ const struct offload_settings *s;
+ struct offload_rule *r;
+ void *u;
+
+ if (!is_offload(sc))
+ return (ENODEV);
+
+ if (uop->nrules == 0) {
+ /* Delete installed policies. */
+ op = NULL;
+ goto set_policy;
+ } if (uop->nrules > 256) { /* arbitrary */
+ return (E2BIG);
+ }
+
+ /* Copy userspace offload policy to kernel */
+ op = malloc(sizeof(*op), M_CXGBE, M_ZERO | M_WAITOK);
+ op->nrules = uop->nrules;
+ len = op->nrules * sizeof(struct offload_rule);
+ op->rule = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
+ rc = copyin(uop->rule, op->rule, len);
+ if (rc) {
+ free(op->rule, M_CXGBE);
+ free(op, M_CXGBE);
+ return (rc);
+ }
+
+ r = &op->rule[0];
+ for (i = 0; i < op->nrules; i++, r++) {
+
+ /* Validate open_type */
+ if (r->open_type != OPEN_TYPE_LISTEN &&
+ r->open_type != OPEN_TYPE_ACTIVE &&
+ r->open_type != OPEN_TYPE_PASSIVE &&
+ r->open_type != OPEN_TYPE_DONTCARE) {
+error:
+ /*
+ * Rules 0 to i have malloc'd filters that need to be
+ * freed. Rules i+1 to nrules have userspace pointers
+ * and should be left alone.
+ */
+ op->nrules = i;
+ free_offload_policy(op);
+ return (rc);
+ }
+
+ /* Validate settings */
+ s = &r->settings;
+ if ((s->offload != 0 && s->offload != 1) ||
+ s->cong_algo < -1 || s->cong_algo > CONG_ALG_HIGHSPEED ||
+ s->sched_class < -1 ||
+ s->sched_class >= sc->chip_params->nsched_cls) {
+ rc = EINVAL;
+ goto error;
+ }
+
+ bf = &r->bpf_prog;
+ u = bf->bf_insns; /* userspace ptr */
+ bf->bf_insns = NULL;
+ if (bf->bf_len == 0) {
+ /* legal, matches everything */
+ continue;
+ }
+ len = bf->bf_len * sizeof(*bf->bf_insns);
+ bf->bf_insns = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
+ rc = copyin(u, bf->bf_insns, len);
+ if (rc != 0)
+ goto error;
+
+ if (!bpf_validate(bf->bf_insns, bf->bf_len)) {
+ rc = EINVAL;
+ goto error;
+ }
+ }
+set_policy:
+ rw_wlock(&sc->policy_lock);
+ old = sc->policy;
+ sc->policy = op;
+ rw_wunlock(&sc->policy_lock);
+ free_offload_policy(old);
+
+ return (0);
+}
+
#define MAX_READ_BUF_SIZE (128 * 1024)
static int
read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr)
@@ -9743,6 +9877,9 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
case CHELSIO_T4_CUDBG_DUMP:
rc = cudbg_dump(sc, (struct t4_cudbg_dump *)data);
break;
+ case CHELSIO_T4_SET_OFLD_POLICY:
+ rc = set_offload_policy(sc, (struct t4_offload_policy *)data);
+ break;
default:
rc = ENOTTY;
}
diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c
index 7edd17f6df69..b2fbfe381918 100644
--- a/sys/dev/cxgbe/t4_sge.c
+++ b/sys/dev/cxgbe/t4_sge.c
@@ -963,8 +963,10 @@ mtu_to_max_payload(struct adapter *sc, int mtu, const int toe)
#ifdef TCP_OFFLOAD
if (toe) {
- payload = sc->tt.rx_coalesce ?
- G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)) : mtu;
+ int rxcs = G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2));
+
+ /* Note that COP can set rx_coalesce on/off per connection. */
+ payload = max(mtu, rxcs);
} else {
#endif
/* large enough even when hw VLAN extraction is disabled */
diff --git a/sys/dev/cxgbe/tom/t4_connect.c b/sys/dev/cxgbe/tom/t4_connect.c
index 7b0267007923..9d1b6add679f 100644
--- a/sys/dev/cxgbe/tom/t4_connect.c
+++ b/sys/dev/cxgbe/tom/t4_connect.c
@@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$");
#include <sys/domain.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
+#include <sys/sysctl.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_types.h>
@@ -55,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_var.h>
#include <netinet/toecore.h>
+#include <netinet/cc/cc.h>
#include "common/common.h"
#include "common/t4_msg.h"
@@ -233,47 +235,85 @@ do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
* Options2 for active open.
*/
static uint32_t
-calc_opt2a(struct socket *so, struct toepcb *toep)
+calc_opt2a(struct socket *so, struct toepcb *toep,
+ const struct offload_settings *s)
{
struct tcpcb *tp = so_sototcpcb(so);
struct port_info *pi = toep->vi->pi;
struct adapter *sc = pi->adapter;
- uint32_t opt2;
+ uint32_t opt2 = 0;
- opt2 = V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]) |
- F_RSS_QUEUE_VALID | V_RSS_QUEUE(toep->ofld_rxq->iq.abs_id);
+ /*
+ * rx flow control, rx coalesce, congestion control, and tx pace are all
+ * explicitly set by the driver. On T5+ the ISS is also set by the
+ * driver to the value picked by the kernel.
+ */
+ if (is_t4(sc)) {
+ opt2 |= F_RX_FC_VALID | F_RX_COALESCE_VALID;
+ opt2 |= F_CONG_CNTRL_VALID | F_PACE_VALID;
+ } else {
+ opt2 |= F_T5_OPT_2_VALID; /* all 4 valid */
+ opt2 |= F_T5_ISS; /* ISS provided in CPL */
+ }
- if (tp->t_flags & TF_SACK_PERMIT)
+ if (s->sack > 0 || (s->sack < 0 && (tp->t_flags & TF_SACK_PERMIT)))
opt2 |= F_SACK_EN;
- if (tp->t_flags & TF_REQ_TSTMP)
+ if (s->tstamp > 0 || (s->tstamp < 0 && (tp->t_flags & TF_REQ_TSTMP)))
opt2 |= F_TSTAMPS_EN;
if (tp->t_flags & TF_REQ_SCALE)
opt2 |= F_WND_SCALE_EN;
- if (V_tcp_do_ecn)
+ if (s->ecn > 0 || (s->ecn < 0 && V_tcp_do_ecn == 1))
opt2 |= F_CCTRL_ECN;
- /* RX_COALESCE is always a valid value (M_RX_COALESCE). */
- if (is_t4(sc))
- opt2 |= F_RX_COALESCE_VALID;
+ /* XXX: F_RX_CHANNEL for multiple rx c-chan support goes here. */
+
+ opt2 |= V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]);
+
+ /* These defaults are subject to ULP specific fixups later. */
+ opt2 |= V_RX_FC_DDP(0) | V_RX_FC_DISABLE(0);
+
+ opt2 |= V_PACE(0);
+
+ if (s->cong_algo >= 0)
+ opt2 |= V_CONG_CNTRL(s->cong_algo);
+ else if (sc->tt.cong_algorithm >= 0)
+ opt2 |= V_CONG_CNTRL(sc->tt.cong_algorithm & M_CONG_CNTRL);
else {
- opt2 |= F_T5_OPT_2_VALID;
- opt2 |= F_T5_ISS;
+ struct cc_algo *cc = CC_ALGO(tp);
+
+ if (strcasecmp(cc->name, "reno") == 0)
+ opt2 |= V_CONG_CNTRL(CONG_ALG_RENO);
+ else if (strcasecmp(cc->name, "tahoe") == 0)
+ opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
+ if (strcasecmp(cc->name, "newreno") == 0)
+ opt2 |= V_CONG_CNTRL(CONG_ALG_NEWRENO);
+ if (strcasecmp(cc->name, "highspeed") == 0)
+ opt2 |= V_CONG_CNTRL(CONG_ALG_HIGHSPEED);
+ else {
+ /*
+ * Use newreno in case the algorithm selected by the
+ * host stack is not supported by the hardware.
+ */
+ opt2 |= V_CONG_CNTRL(CONG_ALG_NEWRENO);
+ }
}
- if (sc->tt.rx_coalesce)
+
+ if (s->rx_coalesce > 0 || (s->rx_coalesce < 0 && sc->tt.rx_coalesce))
opt2 |= V_RX_COALESCE(M_RX_COALESCE);
- if (sc->tt.cong_algorithm != -1)
- opt2 |= V_CONG_CNTRL(sc->tt.cong_algorithm & M_CONG_CNTRL);
+ /* Note that ofld_rxq is already set according to s->rxq. */
+ opt2 |= F_RSS_QUEUE_VALID;
+ opt2 |= V_RSS_QUEUE(toep->ofld_rxq->iq.abs_id);
#ifdef USE_DDP_RX_FLOW_CONTROL
if (toep->ulp_mode == ULP_MODE_TCPDDP)
- opt2 |= F_RX_FC_VALID | F_RX_FC_DDP;
+ opt2 |= F_RX_FC_DDP;
#endif
+
if (toep->ulp_mode == ULP_MODE_TLS) {
- opt2 |= F_RX_FC_VALID;
opt2 &= ~V_RX_COALESCE(M_RX_COALESCE);
opt2 |= F_RX_FC_DISABLE;
}
@@ -348,10 +388,12 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
struct wrqe *wr = NULL;
struct ifnet *rt_ifp = rt->rt_ifp;
struct vi_info *vi;
- int mtu_idx, rscale, qid_atid, rc, isipv6;
+ int mtu_idx, rscale, qid_atid, rc, isipv6, txqid, rxqid;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp = intotcpcb(inp);
int reason;
+ struct offload_settings settings;
+ uint16_t vid = 0xffff;
INP_WLOCK_ASSERT(inp);
KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6,
@@ -363,12 +405,30 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
struct ifnet *ifp = VLAN_COOKIE(rt_ifp);
vi = ifp->if_softc;
+ VLAN_TAG(ifp, &vid);
} else if (rt_ifp->if_type == IFT_IEEE8023ADLAG)
DONT_OFFLOAD_ACTIVE_OPEN(ENOSYS); /* XXX: implement lagg+TOE */
else
DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP);
- toep = alloc_toepcb(vi, -1, -1, M_NOWAIT | M_ZERO);
+ rw_rlock(&sc->policy_lock);
+ settings = *lookup_offload_policy(sc, OPEN_TYPE_ACTIVE, NULL, vid, inp);
+ rw_runlock(&sc->policy_lock);
+ if (!settings.offload)
+ DONT_OFFLOAD_ACTIVE_OPEN(EPERM);
+
+ if (settings.txq >= 0 && settings.txq < vi->nofldtxq)
+ txqid = settings.txq;
+ else
+ txqid = arc4random() % vi->nofldtxq;
+ txqid += vi->first_ofld_txq;
+ if (settings.rxq >= 0 && settings.rxq < vi->nofldrxq)
+ rxqid = settings.rxq;
+ else
+ rxqid = arc4random() % vi->nofldrxq;
+ rxqid += vi->first_ofld_rxq;
+
+ toep = alloc_toepcb(vi, txqid, rxqid, M_NOWAIT | M_ZERO);
if (toep == NULL)
DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
@@ -387,7 +447,7 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
toep->vnet = so->so_vnet;
- set_ulp_mode(toep, select_ulp_mode(so, sc));
+ set_ulp_mode(toep, select_ulp_mode(so, sc, &settings));
SOCKBUF_LOCK(&so->so_rcv);
/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
toep->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
@@ -402,7 +462,7 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
rscale = tp->request_r_scale = select_rcv_wscale();
else
rscale = 0;
- mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, 0);
+ mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, &settings);
qid_atid = (toep->ofld_rxq->iq.abs_id << 14) | toep->tid;
if (isipv6) {
@@ -443,8 +503,8 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0];
cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8];
cpl->opt0 = calc_opt0(so, vi, toep->l2te, mtu_idx, rscale,
- toep->rx_credits, toep->ulp_mode);
- cpl->opt2 = calc_opt2a(so, toep);
+ toep->rx_credits, toep->ulp_mode, &settings);
+ cpl->opt2 = calc_opt2a(so, toep, &settings);
} else {
struct cpl_act_open_req *cpl = wrtod(wr);
struct cpl_t5_act_open_req *cpl5 = (void *)cpl;
@@ -472,8 +532,8 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port,
&cpl->peer_ip, &cpl->peer_port);
cpl->opt0 = calc_opt0(so, vi, toep->l2te, mtu_idx, rscale,
- toep->rx_credits, toep->ulp_mode);
- cpl->opt2 = calc_opt2a(so, toep);
+ toep->rx_credits, toep->ulp_mode, &settings);
+ cpl->opt2 = calc_opt2a(so, toep, &settings);
}
CTR5(KTR_CXGBE, "%s: atid %u (%s), toep %p, inp %p", __func__,
diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c
index 351a2e3c42d8..11693371c17b 100644
--- a/sys/dev/cxgbe/tom/t4_cpl_io.c
+++ b/sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -121,6 +121,11 @@ send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp)
nparams++;
if (toep->tls.fcplenmax != 0)
nparams++;
+ if (toep->tc_idx != -1) {
+ MPASS(toep->tc_idx >= 0 &&
+ toep->tc_idx < sc->chip_params->nsched_cls);
+ nparams++;
+ }
flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
@@ -172,6 +177,8 @@ send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp)
FLOWC_PARAM(ULP_MODE, toep->ulp_mode);
if (toep->tls.fcplenmax != 0)
FLOWC_PARAM(TXDATAPLEN_MAX, toep->tls.fcplenmax);
+ if (toep->tc_idx != -1)
+ FLOWC_PARAM(SCHEDCLASS, toep->tc_idx);
#undef FLOWC_PARAM
KASSERT(paramidx == nparams, ("nparams mismatch"));
@@ -333,19 +340,19 @@ assign_rxopt(struct tcpcb *tp, unsigned int opt)
n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
else
n = sizeof(struct ip) + sizeof(struct tcphdr);
- if (V_tcp_do_rfc1323)
- n += TCPOLEN_TSTAMP_APPA;
tp->t_maxseg = sc->params.mtus[G_TCPOPT_MSS(opt)] - n;
- CTR4(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u)", __func__, toep->tid,
- G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)]);
-
if (G_TCPOPT_TSTAMP(opt)) {
tp->t_flags |= TF_RCVD_TSTMP; /* timestamps ok */
tp->ts_recent = 0; /* hmmm */
tp->ts_recent_age = tcp_ts_getticks();
+ tp->t_maxseg -= TCPOLEN_TSTAMP_APPA;
}
+ CTR5(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u), mss %u", __func__,
+ toep->tid, G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)],
+ tp->t_maxseg);
+
if (G_TCPOPT_SACK(opt))
tp->t_flags |= TF_SACK_PERMIT; /* should already be set */
else
diff --git a/sys/dev/cxgbe/tom/t4_listen.c b/sys/dev/cxgbe/tom/t4_listen.c
index 7b7428c9bcb3..7571c353dc3b 100644
--- a/sys/dev/cxgbe/tom/t4_listen.c
+++ b/sys/dev/cxgbe/tom/t4_listen.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <sys/fnv_hash.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
+#include <sys/sysctl.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_types.h>
@@ -62,6 +63,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_var.h>
#include <netinet/toecore.h>
+#include <netinet/cc/cc.h>
#include "common/common.h"
#include "common/t4_msg.h"
@@ -84,7 +86,8 @@ static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *);
static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *);
static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *);
-static inline void save_qids_in_mbuf(struct mbuf *, struct vi_info *);
+static inline void save_qids_in_mbuf(struct mbuf *, struct vi_info *,
+ struct offload_settings *);
static inline void get_qids_from_mbuf(struct mbuf *m, int *, int *);
static void send_reset_synqe(struct toedev *, struct synq_entry *);
@@ -513,9 +516,17 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp)
struct inpcb *inp = tp->t_inpcb;
struct listen_ctx *lctx;
int i, rc, v;
+ struct offload_settings settings;
INP_WLOCK_ASSERT(inp);
+ rw_rlock(&sc->policy_lock);
+ settings = *lookup_offload_policy(sc, OPEN_TYPE_LISTEN, NULL, 0xffff,
+ inp);
+ rw_runlock(&sc->policy_lock);
+ if (!settings.offload)
+ return (0);
+
/* Don't start a hardware listener for any loopback address. */
if (inp->inp_vflag & INP_IPV6 && IN6_IS_ADDR_LOOPBACK(&inp->in6p_laddr))
return (0);
@@ -948,12 +959,22 @@ t4_offload_socket(struct toedev *tod, void *arg, struct socket *so)
}
static inline void
-save_qids_in_mbuf(struct mbuf *m, struct vi_info *vi)
+save_qids_in_mbuf(struct mbuf *m, struct vi_info *vi,
+ struct offload_settings *s)
{
uint32_t txqid, rxqid;
- txqid = (arc4random() % vi->nofldtxq) + vi->first_ofld_txq;
- rxqid = (arc4random() % vi->nofldrxq) + vi->first_ofld_rxq;
+ if (s->txq >= 0 && s->txq < vi->nofldtxq)
+ txqid = s->txq;
+ else
+ txqid = arc4random() % vi->nofldtxq;
+ txqid += vi->first_ofld_txq;
+
+ if (s->rxq >= 0 && s->rxq < vi->nofldrxq)
+ rxqid = s->rxq;
+ else
+ rxqid = arc4random() % vi->nofldrxq;
+ rxqid += vi->first_ofld_rxq;
m->m_pkthdr.flowid = (txqid << 16) | (rxqid & 0xffff);
}
@@ -1019,50 +1040,88 @@ t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to)
*/
static uint32_t
calc_opt2p(struct adapter *sc, struct port_info *pi, int rxqid,
- const struct tcp_options *tcpopt, struct tcphdr *th, int ulp_mode)
+ const struct tcp_options *tcpopt, struct tcphdr *th, int ulp_mode,
+ struct cc_algo *cc, const struct offload_settings *s)
{
struct sge_ofld_rxq *ofld_rxq = &sc->sge.ofld_rxq[rxqid];
- uint32_t opt2;
-
- opt2 = V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]) |
- F_RSS_QUEUE_VALID | V_RSS_QUEUE(ofld_rxq->iq.abs_id);
-
- if (V_tcp_do_rfc1323) {
- if (tcpopt->tstamp)
- opt2 |= F_TSTAMPS_EN;
- if (tcpopt->sack)
- opt2 |= F_SACK_EN;
- if (tcpopt->wsf <= 14)
- opt2 |= F_WND_SCALE_EN;
+ uint32_t opt2 = 0;
+
+ /*
+ * rx flow control, rx coalesce, congestion control, and tx pace are all
+ * explicitly set by the driver. On T5+ the ISS is also set by the
+ * driver to the value picked by the kernel.
+ */
+ if (is_t4(sc)) {
+ opt2 |= F_RX_FC_VALID | F_RX_COALESCE_VALID;
+ opt2 |= F_CONG_CNTRL_VALID | F_PACE_VALID;
+ } else {
+ opt2 |= F_T5_OPT_2_VALID; /* all 4 valid */
+ opt2 |= F_T5_ISS; /* ISS provided in CPL */
}
- if (V_tcp_do_ecn && th->th_flags & (TH_ECE | TH_CWR))
+ if (tcpopt->sack && (s->sack > 0 || (s->sack < 0 && V_tcp_do_rfc1323)))
+ opt2 |= F_SACK_EN;
+
+ if (tcpopt->tstamp &&
+ (s->tstamp > 0 || (s->tstamp < 0 && V_tcp_do_rfc1323)))
+ opt2 |= F_TSTAMPS_EN;
+
+ if (tcpopt->wsf < 15 && V_tcp_do_rfc1323)
+ opt2 |= F_WND_SCALE_EN;
+
+ if (th->th_flags & (TH_ECE | TH_CWR) &&
+ (s->ecn > 0 || (s->ecn < 0 && V_tcp_do_ecn)))
opt2 |= F_CCTRL_ECN;
- /* RX_COALESCE is always a valid value (0 or M_RX_COALESCE). */
- if (is_t4(sc))
- opt2 |= F_RX_COALESCE_VALID;
+ /* XXX: F_RX_CHANNEL for multiple rx c-chan support goes here. */
+
+ opt2 |= V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]);
+
+ /* These defaults are subject to ULP specific fixups later. */
+ opt2 |= V_RX_FC_DDP(0) | V_RX_FC_DISABLE(0);
+
+ opt2 |= V_PACE(0);
+
+ if (s->cong_algo >= 0)
+ opt2 |= V_CONG_CNTRL(s->cong_algo);
+ else if (sc->tt.cong_algorithm >= 0)
+ opt2 |= V_CONG_CNTRL(sc->tt.cong_algorithm & M_CONG_CNTRL);
else {
- opt2 |= F_T5_OPT_2_VALID;
- opt2 |= F_T5_ISS;
+ if (strcasecmp(cc->name, "reno") == 0)
+ opt2 |= V_CONG_CNTRL(CONG_ALG_RENO);
+ else if (strcasecmp(cc->name, "tahoe") == 0)
+ opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
+ if (strcasecmp(cc->name, "newreno") == 0)
+ opt2 |= V_CONG_CNTRL(CONG_ALG_NEWRENO);
+ if (strcasecmp(cc->name, "highspeed") == 0)
+ opt2 |= V_CONG_CNTRL(CONG_ALG_HIGHSPEED);
+ else {
+ /*
+ * Use newreno in case the algorithm selected by the
+ * host stack is not supported by the hardware.
+ */
+ opt2 |= V_CONG_CNTRL(CONG_ALG_NEWRENO);
+ }
}
- if (sc->tt.rx_coalesce)
+
+ if (s->rx_coalesce > 0 || (s->rx_coalesce < 0 && sc->tt.rx_coalesce))
opt2 |= V_RX_COALESCE(M_RX_COALESCE);
- if (sc->tt.cong_algorithm != -1)
- opt2 |= V_CONG_CNTRL(sc->tt.cong_algorithm & M_CONG_CNTRL);
+ /* Note that ofld_rxq is already set according to s->rxq. */
+ opt2 |= F_RSS_QUEUE_VALID;
+ opt2 |= V_RSS_QUEUE(ofld_rxq->iq.abs_id);
#ifdef USE_DDP_RX_FLOW_CONTROL
if (ulp_mode == ULP_MODE_TCPDDP)
- opt2 |= F_RX_FC_VALID | F_RX_FC_DDP;
+ opt2 |= F_RX_FC_DDP;
#endif
+
if (ulp_mode == ULP_MODE_TLS) {
- opt2 |= F_RX_FC_VALID;
opt2 &= ~V_RX_COALESCE(M_RX_COALESCE);
opt2 |= F_RX_FC_DISABLE;
}
- return htobe32(opt2);
+ return (htobe32(opt2));
}
static void
@@ -1199,6 +1258,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
#ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif
+ struct offload_settings settings;
KASSERT(opcode == CPL_PASS_ACCEPT_REQ,
("%s: unexpected opcode 0x%x", __func__, opcode));
@@ -1334,15 +1394,23 @@ found:
REJECT_PASS_ACCEPT();
}
so = inp->inp_socket;
+ rw_rlock(&sc->policy_lock);
+ settings = *lookup_offload_policy(sc, OPEN_TYPE_PASSIVE, m, 0xffff, inp);
+ rw_runlock(&sc->policy_lock);
+ if (!settings.offload) {
+ INP_WUNLOCK(inp);
+ free(wr, M_CXGBE);
+ REJECT_PASS_ACCEPT();
+ }
- mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss));
+ mtu_idx = find_best_mtu_idx(sc, &inc, &settings);
rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0;
/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
wnd = max(so->sol_sbrcv_hiwat, MIN_RCV_WND);
wnd = min(wnd, MAX_RCV_WND);
rx_credits = min(wnd >> 10, M_RCV_BUFSIZ);
- save_qids_in_mbuf(m, vi);
+ save_qids_in_mbuf(m, vi, &settings);
get_qids_from_mbuf(m, NULL, &rxqid);
if (is_t4(sc))
@@ -1352,7 +1420,7 @@ found:
INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid);
}
- ulp_mode = select_ulp_mode(so, sc);
+ ulp_mode = select_ulp_mode(so, sc, &settings);
switch (ulp_mode) {
case ULP_MODE_TCPDDP:
synqe->flags |= TPF_SYNQE_TCPDDP;
@@ -1361,8 +1429,10 @@ found:
synqe->flags |= TPF_SYNQE_TLS;
break;
}
- rpl->opt0 = calc_opt0(so, vi, e, mtu_idx, rscale, rx_credits, ulp_mode);
- rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode);
+ rpl->opt0 = calc_opt0(so, vi, e, mtu_idx, rscale, rx_credits, ulp_mode,
+ &settings);
+ rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode,
+ CC_ALGO(intotcpcb(inp)), &settings);
synqe->tid = tid;
synqe->lctx = lctx;
diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c
index f36d07461305..ce714e14e381 100644
--- a/sys/dev/cxgbe/tom/t4_tom.c
+++ b/sys/dev/cxgbe/tom/t4_tom.c
@@ -51,6 +51,8 @@ __FBSDID("$FreeBSD$");
#include <sys/taskqueue.h>
#include <net/if.h>
#include <net/if_var.h>
+#include <net/if_types.h>
+#include <net/if_vlan_var.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
@@ -137,15 +139,11 @@ alloc_toepcb(struct vi_info *vi, int txqid, int rxqid, int flags)
txsd_total = tx_credits /
howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16);
- if (txqid < 0)
- txqid = (arc4random() % vi->nofldtxq) + vi->first_ofld_txq;
KASSERT(txqid >= vi->first_ofld_txq &&
txqid < vi->first_ofld_txq + vi->nofldtxq,
("%s: txqid %d for vi %p (first %d, n %d)", __func__, txqid, vi,
vi->first_ofld_txq, vi->nofldtxq));
- if (rxqid < 0)
- rxqid = (arc4random() % vi->nofldrxq) + vi->first_ofld_rxq;
KASSERT(rxqid >= vi->first_ofld_rxq &&
rxqid < vi->first_ofld_rxq + vi->nofldrxq,
("%s: rxqid %d for vi %p (first %d, n %d)", __func__, rxqid, vi,
@@ -569,27 +567,28 @@ queue_tid_release(struct adapter *sc, int tid)
}
/*
- * What mtu_idx to use, given a 4-tuple and/or an MSS cap
+ * What mtu_idx to use, given a 4-tuple. Note that both s->mss and tcp_mssopt
+ * have the MSS that we should advertise in our SYN. Advertised MSS doesn't
+ * account for any TCP options so the effective MSS (only payload, no headers or
+ * options) could be different. We fill up tp->t_maxseg with the effective MSS
+ * at the end of the 3-way handshake.
*/
int
-find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss)
+find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc,
+ struct offload_settings *s)
{
unsigned short *mtus = &sc->params.mtus[0];
- int i, mss, n;
+ int i, mss, mtu;
- KASSERT(inc != NULL || pmss > 0,
- ("%s: at least one of inc/pmss must be specified", __func__));
-
- mss = inc ? tcp_mssopt(inc) : pmss;
- if (pmss > 0 && mss > pmss)
- mss = pmss;
+ MPASS(inc != NULL);
+ mss = s->mss > 0 ? s->mss : tcp_mssopt(inc);
if (inc->inc_flags & INC_ISIPV6)
- n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+ mtu = mss + sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
else
- n = sizeof(struct ip) + sizeof(struct tcphdr);
+ mtu = mss + sizeof(struct ip) + sizeof(struct tcphdr);
- for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mss + n; i++)
+ for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mtu; i++)
continue;
return (i);
@@ -632,33 +631,32 @@ select_rcv_wscale(void)
*/
uint64_t
calc_opt0(struct socket *so, struct vi_info *vi, struct l2t_entry *e,
- int mtu_idx, int rscale, int rx_credits, int ulp_mode)
+ int mtu_idx, int rscale, int rx_credits, int ulp_mode,
+ struct offload_settings *s)
{
+ int keepalive;
uint64_t opt0;
+ MPASS(so != NULL);
+ MPASS(vi != NULL);
KASSERT(rx_credits <= M_RCV_BUFSIZ,
("%s: rcv_bufsiz too high", __func__));
opt0 = F_TCAM_BYPASS | V_WND_SCALE(rscale) | V_MSS_IDX(mtu_idx) |
- V_ULP_MODE(ulp_mode) | V_RCV_BUFSIZ(rx_credits);
+ V_ULP_MODE(ulp_mode) | V_RCV_BUFSIZ(rx_credits) |
+ V_L2T_IDX(e->idx) | V_SMAC_SEL(vi->smt_idx) |
+ V_TX_CHAN(vi->pi->tx_chan);
+
+ keepalive = tcp_always_keepalive || so_options_get(so) & SO_KEEPALIVE;
+ opt0 |= V_KEEP_ALIVE(keepalive != 0);
- if (so != NULL) {
+ if (s->nagle < 0) {
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp = intotcpcb(inp);
- int keepalive = tcp_always_keepalive ||
- so_options_get(so) & SO_KEEPALIVE;
opt0 |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0);
- opt0 |= V_KEEP_ALIVE(keepalive != 0);
- }
-
- if (e != NULL)
- opt0 |= V_L2T_IDX(e->idx);
-
- if (vi != NULL) {
- opt0 |= V_SMAC_SEL(vi->smt_idx);
- opt0 |= V_TX_CHAN(vi->pi->tx_chan);
- }
+ } else
+ opt0 |= V_NAGLE(s->nagle != 0);
return htobe64(opt0);
}
@@ -720,12 +718,15 @@ is_tls_sock(struct socket *so, struct adapter *sc)
}
int
-select_ulp_mode(struct socket *so, struct adapter *sc)
+select_ulp_mode(struct socket *so, struct adapter *sc,
+ struct offload_settings *s)
{
- if (can_tls_offload(sc) && is_tls_sock(so, sc))
+ if (can_tls_offload(sc) &&
+ (s->tls > 0 || (s->tls < 0 && is_tls_sock(so, sc))))
return (ULP_MODE_TLS);
- else if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0)
+ else if (s->ddp > 0 ||
+ (s->ddp < 0 && sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0))
return (ULP_MODE_TCPDDP);
else
return (ULP_MODE_NONE);
@@ -1093,6 +1094,181 @@ free_tom_data(struct adapter *sc, struct tom_data *td)
free(td, M_CXGBE);
}
+static char *
+prepare_pkt(int open_type, uint16_t vtag, struct inpcb *inp, int *pktlen,
+ int *buflen)
+{
+ char *pkt;
+ struct tcphdr *th;
+ int ipv6, len;
+ const int maxlen =
+ max(sizeof(struct ether_header), sizeof(struct ether_vlan_header)) +
+ max(sizeof(struct ip), sizeof(struct ip6_hdr)) +
+ sizeof(struct tcphdr);
+
+ MPASS(open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN);
+
+ pkt = malloc(maxlen, M_CXGBE, M_ZERO | M_NOWAIT);
+ if (pkt == NULL)
+ return (NULL);
+
+ ipv6 = inp->inp_vflag & INP_IPV6;
+ len = 0;
+
+ if (vtag == 0xffff) {
+ struct ether_header *eh = (void *)pkt;
+
+ if (ipv6)
+ eh->ether_type = htons(ETHERTYPE_IPV6);
+ else
+ eh->ether_type = htons(ETHERTYPE_IP);
+
+ len += sizeof(*eh);
+ } else {
+ struct ether_vlan_header *evh = (void *)pkt;
+
+ evh->evl_encap_proto = htons(ETHERTYPE_VLAN);
+ evh->evl_tag = htons(vtag);
+ if (ipv6)
+ evh->evl_proto = htons(ETHERTYPE_IPV6);
+ else
+ evh->evl_proto = htons(ETHERTYPE_IP);
+
+ len += sizeof(*evh);
+ }
+
+ if (ipv6) {
+ struct ip6_hdr *ip6 = (void *)&pkt[len];
+
+ ip6->ip6_vfc = IPV6_VERSION;
+ ip6->ip6_plen = htons(sizeof(struct tcphdr));
+ ip6->ip6_nxt = IPPROTO_TCP;
+ if (open_type == OPEN_TYPE_ACTIVE) {
+ ip6->ip6_src = inp->in6p_laddr;
+ ip6->ip6_dst = inp->in6p_faddr;
+ } else if (open_type == OPEN_TYPE_LISTEN) {
+ ip6->ip6_src = inp->in6p_laddr;
+ ip6->ip6_dst = ip6->ip6_src;
+ }
+
+ len += sizeof(*ip6);
+ } else {
+ struct ip *ip = (void *)&pkt[len];
+
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = sizeof(*ip) >> 2;
+ ip->ip_tos = inp->inp_ip_tos;
+ ip->ip_len = htons(sizeof(struct ip) + sizeof(struct tcphdr));
+ ip->ip_ttl = inp->inp_ip_ttl;
+ ip->ip_p = IPPROTO_TCP;
+ if (open_type == OPEN_TYPE_ACTIVE) {
+ ip->ip_src = inp->inp_laddr;
+ ip->ip_dst = inp->inp_faddr;
+ } else if (open_type == OPEN_TYPE_LISTEN) {
+ ip->ip_src = inp->inp_laddr;
+ ip->ip_dst = ip->ip_src;
+ }
+
+ len += sizeof(*ip);
+ }
+
+ th = (void *)&pkt[len];
+ if (open_type == OPEN_TYPE_ACTIVE) {
+ th->th_sport = inp->inp_lport; /* network byte order already */
+ th->th_dport = inp->inp_fport; /* ditto */
+ } else if (open_type == OPEN_TYPE_LISTEN) {
+ th->th_sport = inp->inp_lport; /* network byte order already */
+ th->th_dport = th->th_sport;
+ }
+ len += sizeof(th);
+
+ *pktlen = *buflen = len;
+ return (pkt);
+}
+
+const struct offload_settings *
+lookup_offload_policy(struct adapter *sc, int open_type, struct mbuf *m,
+ uint16_t vtag, struct inpcb *inp)
+{
+ const struct t4_offload_policy *op;
+ char *pkt;
+ struct offload_rule *r;
+ int i, matched, pktlen, buflen;
+ static const struct offload_settings allow_offloading_settings = {
+ .offload = 1,
+ .rx_coalesce = -1,
+ .cong_algo = -1,
+ .sched_class = -1,
+ .tstamp = -1,
+ .sack = -1,
+ .nagle = -1,
+ .ecn = -1,
+ .ddp = -1,
+ .tls = -1,
+ .txq = -1,
+ .rxq = -1,
+ .mss = -1,
+ };
+ static const struct offload_settings disallow_offloading_settings = {
+ .offload = 0,
+ /* rest is irrelevant when offload is off. */
+ };
+
+ rw_assert(&sc->policy_lock, RA_LOCKED);
+
+ /*
+ * If there's no Connection Offloading Policy attached to the device
+ * then we need to return a default static policy. If
+ * "cop_managed_offloading" is true, then we need to disallow
+ * offloading until a COP is attached to the device. Otherwise we
+ * allow offloading ...
+ */
+ op = sc->policy;
+ if (op == NULL) {
+ if (sc->tt.cop_managed_offloading)
+ return (&disallow_offloading_settings);
+ else
+ return (&allow_offloading_settings);
+ }
+
+ switch (open_type) {
+ case OPEN_TYPE_ACTIVE:
+ case OPEN_TYPE_LISTEN:
+ pkt = prepare_pkt(open_type, 0xffff, inp, &pktlen, &buflen);
+ break;
+ case OPEN_TYPE_PASSIVE:
+ MPASS(m != NULL);
+ pkt = mtod(m, char *);
+ MPASS(*pkt == CPL_PASS_ACCEPT_REQ);
+ pkt += sizeof(struct cpl_pass_accept_req);
+ pktlen = m->m_pkthdr.len - sizeof(struct cpl_pass_accept_req);
+ buflen = m->m_len - sizeof(struct cpl_pass_accept_req);
+ break;
+ default:
+ MPASS(0);
+ return (&disallow_offloading_settings);
+ }
+
+ if (pkt == NULL || pktlen == 0 || buflen == 0)
+ return (&disallow_offloading_settings);
+
+ r = &op->rule[0];
+ for (i = 0; i < op->nrules; i++, r++) {
+ if (r->open_type != open_type &&
+ r->open_type != OPEN_TYPE_DONTCARE) {
+ continue;
+ }
+ matched = bpf_filter(r->bpf_prog.bf_insns, pkt, pktlen, buflen);
+ if (matched)
+ break;
+ }
+
+ if (open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN)
+ free(pkt, M_CXGBE);
+
+ return (matched ? &r->settings : &disallow_offloading_settings);
+}
+
static void
reclaim_wr_resources(void *arg, int count)
{
diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h
index 0192238c05cc..b875b0bcf93e 100644
--- a/sys/dev/cxgbe/tom/t4_tom.h
+++ b/sys/dev/cxgbe/tom/t4_tom.h
@@ -87,6 +87,7 @@ enum {
};
struct sockopt;
+struct offload_settings;
struct ofld_tx_sdesc {
uint32_t plen; /* payload length */
@@ -333,13 +334,15 @@ void *lookup_tid(struct adapter *, int);
void update_tid(struct adapter *, int, void *);
void remove_tid(struct adapter *, int, int);
void release_tid(struct adapter *, int, struct sge_wrq *);
-int find_best_mtu_idx(struct adapter *, struct in_conninfo *, int);
+int find_best_mtu_idx(struct adapter *, struct in_conninfo *,
+ struct offload_settings *);
u_long select_rcv_wnd(struct socket *);
int select_rcv_wscale(void);
uint64_t calc_opt0(struct socket *, struct vi_info *, struct l2t_entry *,
- int, int, int, int);
+ int, int, int, int, struct offload_settings *);
uint64_t select_ntuple(struct vi_info *, struct l2t_entry *);
-int select_ulp_mode(struct socket *, struct adapter *);
+int select_ulp_mode(struct socket *, struct adapter *,
+ struct offload_settings *);
void set_ulp_mode(struct toepcb *, int);
int negative_advice(int);
struct clip_entry *hold_lip(struct tom_data *, struct in6_addr *,
@@ -416,6 +419,8 @@ void handle_ddp_close(struct toepcb *, struct tcpcb *, uint32_t);
void handle_ddp_indicate(struct toepcb *);
void handle_ddp_tcb_rpl(struct toepcb *, const struct cpl_set_tcb_rpl *);
void insert_ddp_data(struct toepcb *, uint32_t);
+const struct offload_settings *lookup_offload_policy(struct adapter *, int,
+ struct mbuf *, uint16_t, struct inpcb *);
/* t4_tls.c */
bool can_tls_offload(struct adapter *);