author: Kip Macy <kmacy@FreeBSD.org> 2008-05-05 18:35:55 +0000
committer: Kip Macy <kmacy@FreeBSD.org> 2008-05-05 18:35:55 +0000
commit: e68ff398875b17e19f8a55cd0f2d3d502c45d821 (patch)
tree: 5b86b050e1ca019fffc7e10f98517cb5f92d4686
parent: 143b946188bf3f298f12bee29e7adcbca3215f33 (diff)
download: src-test2-e68ff398875b17e19f8a55cd0f2d3d502c45d821.tar.gz
src-test2-e68ff398875b17e19f8a55cd0f2d3d502c45d821.zip
32 files changed, 15395 insertions, 0 deletions
diff --git a/sys/contrib/rdma/core_priv.h b/sys/contrib/rdma/core_priv.h
new file mode 100644
index 000000000000..5d6c9d805e34
--- /dev/null
+++ b/sys/contrib/rdma/core_priv.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: core_priv.h 1349 2004-12-16 21:09:43Z roland $
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _CORE_PRIV_H
+#define _CORE_PRIV_H
+#include <sys/mutex.h>
+#include <sys/lock.h>
+
+#include <contrib/rdma/ib_verbs.h>
+
+#ifdef notyet
+int  ib_device_register_sysfs(struct ib_device *device);
+void ib_device_unregister_sysfs(struct ib_device *device);
+
+int  ib_sysfs_setup(void);
+void ib_sysfs_cleanup(void);
+#endif
+
+int  ib_cache_setup(void);
+void ib_cache_cleanup(void);
+
+#endif /* _CORE_PRIV_H */
diff --git a/sys/contrib/rdma/ib_addr.h b/sys/contrib/rdma/ib_addr.h
new file mode 100644
index 000000000000..3df9949ad28d
--- /dev/null
+++ b/sys/contrib/rdma/ib_addr.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#if !defined(IB_ADDR_H)
+#define IB_ADDR_H
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/condvar.h>
+
+#include <net/if.h>
+#include <net/ethernet.h>
+
+#include <contrib/rdma/ib_verbs.h>
+
+
+#define MAX_ADDR_LEN ETHER_ADDR_LEN	/* XXX doesn't support IB! */
+
+struct rdma_addr_client {
+	int refcount;
+	struct cv comp;
+	struct mtx lock;
+};
+
+/**
+ * rdma_addr_register_client - Register an address client.
+ */
+void rdma_addr_register_client(struct rdma_addr_client *client);
+
+/**
+ * rdma_addr_unregister_client - Deregister an address client.
+ * @client: Client object to deregister.
+ */
+void rdma_addr_unregister_client(struct rdma_addr_client *client);
+
+struct rdma_dev_addr {
+	unsigned char src_dev_addr[MAX_ADDR_LEN];
+	unsigned char dst_dev_addr[MAX_ADDR_LEN];
+	unsigned char broadcast[MAX_ADDR_LEN];
+	enum rdma_node_type dev_type;
+};
+
+/**
+ * rdma_translate_ip - Translate a local IP address to an RDMA hardware
+ *   address.
+ */
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr);
+
+/**
+ * rdma_resolve_ip - Resolve source and destination IP addresses to
+ *   RDMA hardware addresses.
+ * @client: Address client associated with request.
+ * @src_addr: An optional source address to use in the resolution.  If a
+ *   source address is not provided, a usable address will be returned via
+ *   the callback.
+ * @dst_addr: The destination address to resolve.
+ * @addr: A reference to a data location that will receive the resolved
+ *   addresses.  The data location must remain valid until the callback has
+ *   been invoked.
+ * @timeout_ms: Amount of time to wait for the address resolution to complete.
+ * @callback: Call invoked once address resolution has completed, timed out,
+ *   or been canceled.  A status of 0 indicates success.
+ * @context: User-specified context associated with the call.
+ */
+int rdma_resolve_ip(struct rdma_addr_client *client,
+		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
+		    struct rdma_dev_addr *addr, int timeout_ms,
+		    void (*callback)(int status, struct sockaddr *src_addr,
+				     struct rdma_dev_addr *addr, void *context),
+		    void *context);
+
+void rdma_addr_cancel(struct rdma_dev_addr *addr);
+
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
+	      const unsigned char *dst_dev_addr);
+
+static inline int ip_addr_size(struct sockaddr *addr)
+{
+	return addr->sa_family == AF_INET6 ?
+	       sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in);
+}
+
+static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
+{
+	return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9];
+}
+
+static inline void ib_addr_set_pkey(struct rdma_dev_addr *dev_addr, u16 pkey)
+{
+	dev_addr->broadcast[8] = pkey >> 8;
+	dev_addr->broadcast[9] = (unsigned char) pkey;
+}
+
+static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->broadcast + 4, sizeof *gid);
+}
+
+static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->src_dev_addr + 4, sizeof *gid);
+}
+
+static inline void ib_addr_set_sgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(dev_addr->src_dev_addr + 4, gid, sizeof *gid);
+}
+
+static inline void ib_addr_get_dgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->dst_dev_addr + 4, sizeof *gid);
+}
+
+static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid);
+}
+
+static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->src_dev_addr, sizeof *gid);
+}
+
+static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid);
+}
+
+#endif /* IB_ADDR_H */
diff --git a/sys/contrib/rdma/ib_cache.h b/sys/contrib/rdma/ib_cache.h
new file mode 100644
index 000000000000..419bea2f252b
--- /dev/null
+++ b/sys/contrib/rdma/ib_cache.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ib_cache.h 1349 2004-12-16 21:09:43Z roland $
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IB_CACHE_H
+#define _IB_CACHE_H
+
+#include <contrib/rdma/ib_verbs.h>
+
+/**
+ * ib_get_cached_gid - Returns a cached GID table entry
+ * @device: The device to query.
+ * @port_num: The port number of the device to query.
+ * @index: The index into the cached GID table to query.
+ * @gid: The GID value found at the specified index.
+ *
+ * ib_get_cached_gid() fetches the specified GID table entry stored in
+ * the local software cache.
+ */
+int ib_get_cached_gid(struct ib_device    *device,
+		      u8                   port_num,
+		      int                  index,
+		      union ib_gid        *gid);
+
+/**
+ * ib_find_cached_gid - Returns the port number and GID table index where
+ *   a specified GID value occurs.
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @port_num: The port number of the device where the GID value was found.
+ * @index: The index into the cached GID table where the GID was found.  This
+ *   parameter may be NULL.
+ *
+ * ib_find_cached_gid() searches for the specified GID value in
+ * the local software cache.
+ */
+int ib_find_cached_gid(struct ib_device *device,
+		       union ib_gid	*gid,
+		       u8               *port_num,
+		       u16              *index);
+
+/**
+ * ib_get_cached_pkey - Returns a cached PKey table entry
+ * @device: The device to query.
+ * @port_num: The port number of the device to query.
+ * @index: The index into the cached PKey table to query.
+ * @pkey: The PKey value found at the specified index.
+ *
+ * ib_get_cached_pkey() fetches the specified PKey table entry stored in
+ * the local software cache.
+ */
+int ib_get_cached_pkey(struct ib_device    *device_handle,
+		       u8                   port_num,
+		       int                  index,
+		       u16                 *pkey);
+
+/**
+ * ib_find_cached_pkey - Returns the PKey table index where a specified
+ *   PKey value occurs.
+ * @device: The device to query.
+ * @port_num: The port number of the device to search for the PKey.
+ * @pkey: The PKey value to search for.
+ * @index: The index into the cached PKey table where the PKey was found.
+ *
+ * ib_find_cached_pkey() searches the specified PKey table in
+ * the local software cache.
+ */
+int ib_find_cached_pkey(struct ib_device    *device,
+			u8                   port_num,
+			u16                  pkey,
+			u16                 *index);
+
+/**
+ * ib_get_cached_lmc - Returns a cached lmc table entry
+ * @device: The device to query.
+ * @port_num: The port number of the device to query.
+ * @lmc: The lmc value for the specified port for that device.
+ *
+ * ib_get_cached_lmc() fetches the specified lmc table entry stored in
+ * the local software cache.
+ */
+int ib_get_cached_lmc(struct ib_device *device,
+		      u8                port_num,
+		      u8                *lmc);
+
+#endif /* _IB_CACHE_H */
diff --git a/sys/contrib/rdma/ib_cm.h b/sys/contrib/rdma/ib_cm.h
new file mode 100644
index 000000000000..5fa918a949ec
--- /dev/null
+++ b/sys/contrib/rdma/ib_cm.h
@@ -0,0 +1,593 @@
+/*
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ib_cm.h 4311 2005-12-05 18:42:01Z sean.hefty $
+ *
+ * $FreeBSD$
+ */
+
+
+#if !defined(IB_CM_H)
+#define IB_CM_H
+
+#include <contrib/rdma/ib_mad.h>
+#include <contrib/rdma/ib_sa.h>
+
+enum ib_cm_state {
+	IB_CM_IDLE,
+	IB_CM_LISTEN,
+	IB_CM_REQ_SENT,
+	IB_CM_REQ_RCVD,
+	IB_CM_MRA_REQ_SENT,
+	IB_CM_MRA_REQ_RCVD,
+	IB_CM_REP_SENT,
+	IB_CM_REP_RCVD,
+	IB_CM_MRA_REP_SENT,
+	IB_CM_MRA_REP_RCVD,
+	IB_CM_ESTABLISHED,
+	IB_CM_DREQ_SENT,
+	IB_CM_DREQ_RCVD,
+	IB_CM_TIMEWAIT,
+	IB_CM_SIDR_REQ_SENT,
+	IB_CM_SIDR_REQ_RCVD
+};
+
+enum ib_cm_lap_state {
+	IB_CM_LAP_UNINIT,
+	IB_CM_LAP_IDLE,
+	IB_CM_LAP_SENT,
+	IB_CM_LAP_RCVD,
+	IB_CM_MRA_LAP_SENT,
+	IB_CM_MRA_LAP_RCVD,
+};
+
+enum ib_cm_event_type {
+	IB_CM_REQ_ERROR,
+	IB_CM_REQ_RECEIVED,
+	IB_CM_REP_ERROR,
+	IB_CM_REP_RECEIVED,
+	IB_CM_RTU_RECEIVED,
+	IB_CM_USER_ESTABLISHED,
+	IB_CM_DREQ_ERROR,
+	IB_CM_DREQ_RECEIVED,
+	IB_CM_DREP_RECEIVED,
+	IB_CM_TIMEWAIT_EXIT,
+	IB_CM_MRA_RECEIVED,
+	IB_CM_REJ_RECEIVED,
+	IB_CM_LAP_ERROR,
+	IB_CM_LAP_RECEIVED,
+	IB_CM_APR_RECEIVED,
+	IB_CM_SIDR_REQ_ERROR,
+	IB_CM_SIDR_REQ_RECEIVED,
+	IB_CM_SIDR_REP_RECEIVED
+};
+
+enum ib_cm_data_size {
+	IB_CM_REQ_PRIVATE_DATA_SIZE	 = 92,
+	IB_CM_MRA_PRIVATE_DATA_SIZE	 = 222,
+	IB_CM_REJ_PRIVATE_DATA_SIZE	 = 148,
+	IB_CM_REP_PRIVATE_DATA_SIZE	 = 196,
+	IB_CM_RTU_PRIVATE_DATA_SIZE	 = 224,
+	IB_CM_DREQ_PRIVATE_DATA_SIZE	 = 220,
+	IB_CM_DREP_PRIVATE_DATA_SIZE	 = 224,
+	IB_CM_REJ_ARI_LENGTH		 = 72,
+	IB_CM_LAP_PRIVATE_DATA_SIZE	 = 168,
+	IB_CM_APR_PRIVATE_DATA_SIZE	 = 148,
+	IB_CM_APR_INFO_LENGTH		 = 72,
+	IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216,
+	IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136,
+	IB_CM_SIDR_REP_INFO_LENGTH	 = 72,
+	IB_CM_COMPARE_SIZE		 = 64
+};
+
+struct ib_cm_id;
+
+struct ib_cm_req_event_param {
+	struct ib_cm_id		*listen_id;
+	u8			port;
+
+	struct ib_sa_path_rec	*primary_path;
+	struct ib_sa_path_rec	*alternate_path;
+
+	__be64			remote_ca_guid;
+	u32			remote_qkey;
+	u32			remote_qpn;
+	enum ib_qp_type		qp_type;
+
+	u32			starting_psn;
+	u8			responder_resources;
+	u8			initiator_depth;
+	unsigned int		local_cm_response_timeout:5;
+	unsigned int		flow_control:1;
+	unsigned int		remote_cm_response_timeout:5;
+	unsigned int		retry_count:3;
+	unsigned int		rnr_retry_count:3;
+	unsigned int		srq:1;
+};
+
+struct ib_cm_rep_event_param {
+	__be64			remote_ca_guid;
+	u32			remote_qkey;
+	u32			remote_qpn;
+	u32			starting_psn;
+	u8			responder_resources;
+	u8			initiator_depth;
+	unsigned int		target_ack_delay:5;
+	unsigned int		failover_accepted:2;
+	unsigned int		flow_control:1;
+	unsigned int		rnr_retry_count:3;
+	unsigned int		srq:1;
+};
+
+enum ib_cm_rej_reason {
+	IB_CM_REJ_NO_QP				= 1,
+	IB_CM_REJ_NO_EEC			= 2,
+	IB_CM_REJ_NO_RESOURCES			= 3,
+	IB_CM_REJ_TIMEOUT			= 4,
+	IB_CM_REJ_UNSUPPORTED			= 5,
+	IB_CM_REJ_INVALID_COMM_ID		= 6,
+	IB_CM_REJ_INVALID_COMM_INSTANCE		= 7,
+	IB_CM_REJ_INVALID_SERVICE_ID		= 8,
+	IB_CM_REJ_INVALID_TRANSPORT_TYPE	= 9,
+	IB_CM_REJ_STALE_CONN			= 10,
+	IB_CM_REJ_RDC_NOT_EXIST			= 11,
+	IB_CM_REJ_INVALID_GID			= 12,
+	IB_CM_REJ_INVALID_LID			= 13,
+	IB_CM_REJ_INVALID_SL			= 14,
+	IB_CM_REJ_INVALID_TRAFFIC_CLASS		= 15,
+	IB_CM_REJ_INVALID_HOP_LIMIT		= 16,
+	IB_CM_REJ_INVALID_PACKET_RATE		= 17,
+	IB_CM_REJ_INVALID_ALT_GID		= 18,
+	IB_CM_REJ_INVALID_ALT_LID		= 19,
+	IB_CM_REJ_INVALID_ALT_SL		= 20,
+	IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS	= 21,
+	IB_CM_REJ_INVALID_ALT_HOP_LIMIT		= 22,
+	IB_CM_REJ_INVALID_ALT_PACKET_RATE	= 23,
+	IB_CM_REJ_PORT_CM_REDIRECT		= 24,
+	IB_CM_REJ_PORT_REDIRECT			= 25,
+	IB_CM_REJ_INVALID_MTU			= 26,
+	IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES	= 27,
+	IB_CM_REJ_CONSUMER_DEFINED		= 28,
+	IB_CM_REJ_INVALID_RNR_RETRY		= 29,
+	IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID	= 30,
+	IB_CM_REJ_INVALID_CLASS_VERSION		= 31,
+	IB_CM_REJ_INVALID_FLOW_LABEL		= 32,
+	IB_CM_REJ_INVALID_ALT_FLOW_LABEL	= 33
+};
+
+struct ib_cm_rej_event_param {
+	enum ib_cm_rej_reason	reason;
+	void			*ari;
+	u8			ari_length;
+};
+
+struct ib_cm_mra_event_param {
+	u8	service_timeout;
+};
+
+struct ib_cm_lap_event_param {
+	struct ib_sa_path_rec	*alternate_path;
+};
+
+enum ib_cm_apr_status {
+	IB_CM_APR_SUCCESS,
+	IB_CM_APR_INVALID_COMM_ID,
+	IB_CM_APR_UNSUPPORTED,
+	IB_CM_APR_REJECT,
+	IB_CM_APR_REDIRECT,
+	IB_CM_APR_IS_CURRENT,
+	IB_CM_APR_INVALID_QPN_EECN,
+	IB_CM_APR_INVALID_LID,
+	IB_CM_APR_INVALID_GID,
+	IB_CM_APR_INVALID_FLOW_LABEL,
+	IB_CM_APR_INVALID_TCLASS,
+	IB_CM_APR_INVALID_HOP_LIMIT,
+	IB_CM_APR_INVALID_PACKET_RATE,
+	IB_CM_APR_INVALID_SL
+};
+
+struct ib_cm_apr_event_param {
+	enum ib_cm_apr_status	ap_status;
+	void			*apr_info;
+	u8			info_len;
+};
+
+struct ib_cm_sidr_req_event_param {
+	struct ib_cm_id		*listen_id;
+	u8			port;
+	u16			pkey;
+};
+
+enum ib_cm_sidr_status {
+	IB_SIDR_SUCCESS,
+	IB_SIDR_UNSUPPORTED,
+	IB_SIDR_REJECT,
+	IB_SIDR_NO_QP,
+	IB_SIDR_REDIRECT,
+	IB_SIDR_UNSUPPORTED_VERSION
+};
+
+struct ib_cm_sidr_rep_event_param {
+	enum ib_cm_sidr_status	status;
+	u32			qkey;
+	u32			qpn;
+	void			*info;
+	u8			info_len;
+};
+
+struct ib_cm_event {
+	enum ib_cm_event_type	event;
+	union {
+		struct ib_cm_req_event_param	req_rcvd;
+		struct ib_cm_rep_event_param	rep_rcvd;
+		/* No data for RTU received events. */
+		struct ib_cm_rej_event_param	rej_rcvd;
+		struct ib_cm_mra_event_param	mra_rcvd;
+		struct ib_cm_lap_event_param	lap_rcvd;
+		struct ib_cm_apr_event_param	apr_rcvd;
+		/* No data for DREQ/DREP received events. */
+		struct ib_cm_sidr_req_event_param sidr_req_rcvd;
+		struct ib_cm_sidr_rep_event_param sidr_rep_rcvd;
+		enum ib_wc_status		send_status;
+	} param;
+
+	void			*private_data;
+};
+
+/**
+ * ib_cm_handler - User-defined callback to process communication events.
+ * @cm_id: Communication identifier associated with the reported event.
+ * @event: Information about the communication event.
+ *
+ * IB_CM_REQ_RECEIVED and IB_CM_SIDR_REQ_RECEIVED communication events
+ * generated as a result of listen requests result in the allocation of a
+ * new @cm_id.  The new @cm_id is returned to the user through this callback.
+ * Clients are responsible for destroying the new @cm_id.  For peer-to-peer
+ * IB_CM_REQ_RECEIVED and all other events, the returned @cm_id corresponds
+ * to a user's existing communication identifier.
+ *
+ * Users may not call ib_destroy_cm_id while in the context of this callback;
+ * however, returning a non-zero value instructs the communication manager to
+ * destroy the @cm_id after the callback completes.
+ */
+typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id,
+			     struct ib_cm_event *event);
+
+struct ib_cm_id {
+	ib_cm_handler		cm_handler;
+	void			*context;
+	struct ib_device	*device;
+	__be64			service_id;
+	__be64			service_mask;
+	enum ib_cm_state	state;		/* internal CM/debug use */
+	enum ib_cm_lap_state	lap_state;	/* internal CM/debug use */
+	__be32			local_id;
+	__be32			remote_id;
+	u32			remote_cm_qpn;  /* 1 unless redirected */
+};
+
+/**
+ * ib_create_cm_id - Allocate a communication identifier.
+ * @device: Device associated with the cm_id.  All related communication will
+ * be associated with the specified device.
+ * @cm_handler: Callback invoked to notify the user of CM events.
+ * @context: User specified context associated with the communication
+ *   identifier.
+ *
+ * Communication identifiers are used to track connection states, service
+ * ID resolution requests, and listen requests.
+ */
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+				 ib_cm_handler cm_handler,
+				 void *context);
+
+/**
+ * ib_destroy_cm_id - Destroy a connection identifier.
+ * @cm_id: Connection identifier to destroy.
+ *
+ * This call blocks until the connection identifier is destroyed.
+ */
+void ib_destroy_cm_id(struct ib_cm_id *cm_id);
+
+#define IB_SERVICE_ID_AGN_MASK	__constant_cpu_to_be64(0xFF00000000000000ULL)
+#define IB_CM_ASSIGN_SERVICE_ID __constant_cpu_to_be64(0x0200000000000000ULL)
+#define IB_CMA_SERVICE_ID	__constant_cpu_to_be64(0x0000000001000000ULL)
+#define IB_CMA_SERVICE_ID_MASK	__constant_cpu_to_be64(0xFFFFFFFFFF000000ULL)
+#define IB_SDP_SERVICE_ID	__constant_cpu_to_be64(0x0000000000010000ULL)
+#define IB_SDP_SERVICE_ID_MASK	__constant_cpu_to_be64(0xFFFFFFFFFFFF0000ULL)
+
+struct ib_cm_compare_data {
+	u8  data[IB_CM_COMPARE_SIZE];
+	u8  mask[IB_CM_COMPARE_SIZE];
+};
+
+/**
+ * ib_cm_listen - Initiates listening on the specified service ID for
+ *   connection and service ID resolution requests.
+ * @cm_id: Connection identifier associated with the listen request.
+ * @service_id: Service identifier matched against incoming connection
+ *   and service ID resolution requests.  The service ID should be specified
+ *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
+ *   assign a service ID to the caller.
+ * @service_mask: Mask applied to service ID used to listen across a
+ *   range of service IDs.  If set to 0, the service ID is matched
+ *   exactly.  This parameter is ignored if %service_id is set to
+ *   IB_CM_ASSIGN_SERVICE_ID.
+ * @compare_data: This parameter is optional.  It specifies data that must
+ *   appear in the private data of a connection request for the specified
+ *   listen request.
+ */
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
+		 struct ib_cm_compare_data *compare_data);
+
+struct ib_cm_req_param {
+	struct ib_sa_path_rec	*primary_path;
+	struct ib_sa_path_rec	*alternate_path;
+	__be64			service_id;
+	u32			qp_num;
+	enum ib_qp_type		qp_type;
+	u32			starting_psn;
+	const void		*private_data;
+	u8			private_data_len;
+	u8			peer_to_peer;
+	u8			responder_resources;
+	u8			initiator_depth;
+	u8			remote_cm_response_timeout;
+	u8			flow_control;
+	u8			local_cm_response_timeout;
+	u8			retry_count;
+	u8			rnr_retry_count;
+	u8			max_cm_retries;
+	u8			srq;
+};
+
+/**
+ * ib_send_cm_req - Sends a connection request to the remote node.
+ * @cm_id: Connection identifier that will be associated with the
+ *   connection request.
+ * @param: Connection request information needed to establish the
+ *   connection.
+ */
+int ib_send_cm_req(struct ib_cm_id *cm_id,
+		   struct ib_cm_req_param *param);
+
+struct ib_cm_rep_param {
+	u32		qp_num;
+	u32		starting_psn;
+	const void	*private_data;
+	u8		private_data_len;
+	u8		responder_resources;
+	u8		initiator_depth;
+	u8		target_ack_delay;
+	u8		failover_accepted;
+	u8		flow_control;
+	u8		rnr_retry_count;
+	u8		srq;
+};
+
+/**
+ * ib_send_cm_rep - Sends a connection reply in response to a connection
+ *   request.
+ * @cm_id: Connection identifier that will be associated with the
+ *   connection request.
+ * @param: Connection reply information needed to establish the
+ *   connection.
+ */
+int ib_send_cm_rep(struct ib_cm_id *cm_id,
+		   struct ib_cm_rep_param *param);
+
+/**
+ * ib_send_cm_rtu - Sends a connection ready to use message in response
+ *   to a connection reply message.
+ * @cm_id: Connection identifier associated with the connection request.
+ * @private_data: Optional user-defined private data sent with the
+ *   ready to use message.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ */
+int ib_send_cm_rtu(struct ib_cm_id *cm_id,
+		   const void *private_data,
+		   u8 private_data_len);
+
+/**
+ * ib_send_cm_dreq - Sends a disconnection request for an existing
+ *   connection.
+ * @cm_id: Connection identifier associated with the connection being
+ *   released.
+ * @private_data: Optional user-defined private data sent with the
+ *   disconnection request message.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ */
+int ib_send_cm_dreq(struct ib_cm_id *cm_id,
+		    const void *private_data,
+		    u8 private_data_len);
+
+/**
+ * ib_send_cm_drep - Sends a disconnection reply to a disconnection request.
+ * @cm_id: Connection identifier associated with the connection being
+ *   released.
+ * @private_data: Optional user-defined private data sent with the
+ *   disconnection reply message.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ *
+ * If the cm_id is in the correct state, the CM will transition the connection
+ * to the timewait state, even if an error occurs sending the DREP message.
+ */
+int ib_send_cm_drep(struct ib_cm_id *cm_id,
+		    const void *private_data,
+		    u8 private_data_len);
+
+/**
+ * ib_cm_notify - Notifies the CM of an event reported to the consumer.
+ * @cm_id: Connection identifier to transition to established.
+ * @event: Type of event.
+ *
+ * This routine should be invoked by users to notify the CM of relevant
+ * communication events.  Events that should be reported to the CM and
+ * when to report them are:
+ *
+ * IB_EVENT_COMM_EST - Used when a message is received on a connected
+ *    QP before an RTU has been received.
+ * IB_EVENT_PATH_MIG - Notifies the CM that the connection has failed over
+ *   to the alternate path.
+ */
+int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event);
+
+/**
+ * ib_send_cm_rej - Sends a connection rejection message to the
+ *   remote node.
+ * @cm_id: Connection identifier associated with the connection being
+ *   rejected.
+ * @reason: Reason for the connection request rejection.
+ * @ari: Optional additional rejection information.
+ * @ari_length: Size of the additional rejection information, in bytes.
+ * @private_data: Optional user-defined private data sent with the
+ *   rejection message.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ */
+int ib_send_cm_rej(struct ib_cm_id *cm_id,
+		   enum ib_cm_rej_reason reason,
+		   void *ari,
+		   u8 ari_length,
+		   const void *private_data,
+		   u8 private_data_len);
+
+/**
+ * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection
+ *   message.
+ * @cm_id: Connection identifier associated with the connection message.
+ * @service_timeout: The maximum time required for the sender to reply to
+ *   to the connection message.
+ * @private_data: Optional user-defined private data sent with the
+ *   message receipt acknowledgement.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ */
+int ib_send_cm_mra(struct ib_cm_id *cm_id,
+		   u8 service_timeout,
+		   const void *private_data,
+		   u8 private_data_len);
+
+/**
+ * ib_send_cm_lap - Sends a load alternate path request.
+ * @cm_id: Connection identifier associated with the load alternate path
+ *   message.
+ * @alternate_path: A path record that identifies the alternate path to
+ *   load.
+ * @private_data: Optional user-defined private data sent with the
+ *   load alternate path message.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ */
+int ib_send_cm_lap(struct ib_cm_id *cm_id,
+		   struct ib_sa_path_rec *alternate_path,
+		   const void *private_data,
+		   u8 private_data_len);
+
+/**
+ * ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning
+ *   to a specified QP state.
+ * @cm_id: Communication identifier associated with the QP attributes to
+ *   initialize.
+ * @qp_attr: On input, specifies the desired QP state.  On output, the
+ *   mandatory and desired optional attributes will be set in order to
+ *   modify the QP to the specified state.
+ * @qp_attr_mask: The QP attribute mask that may be used to transition the
+ *   QP to the specified state.
+ *
+ * Users must set the @qp_attr->qp_state to the desired QP state.  This call
+ * will set all required attributes for the given transition, along with
+ * known optional attributes.  Users may override the attributes returned from
+ * this call before calling ib_modify_qp.
+ */
+int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
+		       struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask);
+
+/**
+ * ib_send_cm_apr - Sends an alternate path response message in response to
+ *   a load alternate path request.
+ * @cm_id: Connection identifier associated with the alternate path response.
+ * @status: Reply status sent with the alternate path response.
+ * @info: Optional additional information sent with the alternate path
+ *   response.
+ * @info_length: Size of the additional information, in bytes.
+ * @private_data: Optional user-defined private data sent with the
+ *   alternate path response message.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ */
+int ib_send_cm_apr(struct ib_cm_id *cm_id,
+		   enum ib_cm_apr_status status,
+		   void *info,
+		   u8 info_length,
+		   const void *private_data,
+		   u8 private_data_len);
+
+struct ib_cm_sidr_req_param {
+	struct ib_sa_path_rec	*path;
+	__be64			service_id;
+	int			timeout_ms;
+	const void		*private_data;
+	u8			private_data_len;
+	u8			max_cm_retries;
+};
+
+/**
+ * ib_send_cm_sidr_req - Sends a service ID resolution request to the
+ *   remote node.
+ * @cm_id: Communication identifier that will be associated with the
+ *   service ID resolution request.
+ * @param: Service ID resolution request information.
+ */
+int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
+			struct ib_cm_sidr_req_param *param);
+
+struct ib_cm_sidr_rep_param {
+	u32			qp_num;
+	u32			qkey;
+	enum ib_cm_sidr_status	status;
+	const void		*info;
+	u8			info_length;
+	const void		*private_data;
+	u8			private_data_len;
+};
+
+/**
+ * ib_send_cm_sidr_rep - Sends a service ID resolution reply to the
+ *   remote node.
+ * @cm_id: Communication identifier associated with the received service ID
+ *   resolution request.
+ * @param: Service ID resolution reply information.
+ */
+int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
+			struct ib_cm_sidr_rep_param *param);
+
+#endif /* IB_CM_H */
diff --git a/sys/contrib/rdma/ib_fmr_pool.h b/sys/contrib/rdma/ib_fmr_pool.h
new file mode 100644
index 000000000000..55996b8228f5
--- /dev/null
+++ b/sys/contrib/rdma/ib_fmr_pool.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ib_fmr_pool.h 2730 2005-06-28 16:43:03Z sean.hefty $
+ *
+ * $FreeBSD$
+ */
+
+#if !defined(IB_FMR_POOL_H)
+#define IB_FMR_POOL_H
+
+#include <rdma/ib_verbs.h>
+
+struct ib_fmr_pool;
+
+/**
+ * struct ib_fmr_pool_param - Parameters for creating FMR pool
+ * @max_pages_per_fmr:Maximum number of pages per map request.
+ * @page_shift: Log2 of sizeof "pages" mapped by this fmr
+ * @access:Access flags for FMRs in pool.
+ * @pool_size:Number of FMRs to allocate for pool.
+ * @dirty_watermark:Flush is triggered when @dirty_watermark dirty
+ *     FMRs are present.
+ * @flush_function:Callback called when unmapped FMRs are flushed and
+ *     more FMRs are possibly available for mapping
+ * @flush_arg:Context passed to user's flush function.
+ * @cache:If set, FMRs may be reused after unmapping for identical map
+ *     requests.
+ */
+struct ib_fmr_pool_param {
+	int                     max_pages_per_fmr;
+	int                     page_shift;
+	enum ib_access_flags    access;
+	int                     pool_size;
+	int                     dirty_watermark;
+	void                  (*flush_function)(struct ib_fmr_pool *pool,
+						void *              arg);
+	void                   *flush_arg;
+	unsigned                cache:1;
+};
+
+struct ib_pool_fmr {
+	struct ib_fmr      *fmr;
+	struct ib_fmr_pool *pool;
+	struct list_head    list;
+	struct hlist_node   cache_node;
+	int                 ref_count;
+	int                 remap_count;
+	u64                 io_virtual_address;
+	int                 page_list_len;
+	u64                 page_list[0];
+};
+
+struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
+				       struct ib_fmr_pool_param *params);
+
+void ib_destroy_fmr_pool(struct ib_fmr_pool *pool);
+
+int ib_flush_fmr_pool(struct ib_fmr_pool *pool);
+
+struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
+					 u64                *page_list,
+					 int                 list_len,
+					 u64                 io_virtual_address);
+
+int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr);
+
+#endif /* IB_FMR_POOL_H */
diff --git a/sys/contrib/rdma/ib_mad.h b/sys/contrib/rdma/ib_mad.h
new file mode 100644
index 000000000000..7fabc97bc083
--- /dev/null
+++ b/sys/contrib/rdma/ib_mad.h
@@ -0,0 +1,656 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
+ * Copyright (c) 2004 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004-2006 Voltaire Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ib_mad.h 5596 2006-03-03 01:00:07Z sean.hefty $
+ *
+ * $FreeBSD$
+ */
+
+#if !defined( IB_MAD_H )
+#define IB_MAD_H
+
+
+#include <contrib/rdma/ib_verbs.h>
+
+/* Management base version */
+#define IB_MGMT_BASE_VERSION			1
+
+/* Management classes */
+#define IB_MGMT_CLASS_SUBN_LID_ROUTED		0x01
+#define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE	0x81
+#define IB_MGMT_CLASS_SUBN_ADM			0x03
+#define IB_MGMT_CLASS_PERF_MGMT			0x04
+#define IB_MGMT_CLASS_BM			0x05
+#define IB_MGMT_CLASS_DEVICE_MGMT		0x06
+#define IB_MGMT_CLASS_CM			0x07
+#define IB_MGMT_CLASS_SNMP			0x08
+#define IB_MGMT_CLASS_DEVICE_ADM		0x10
+#define IB_MGMT_CLASS_BOOT_MGMT			0x11
+#define IB_MGMT_CLASS_BIS			0x12
+#define IB_MGMT_CLASS_CONG_MGMT			0x21
+#define IB_MGMT_CLASS_VENDOR_RANGE2_START	0x30
+#define IB_MGMT_CLASS_VENDOR_RANGE2_END		0x4F
+
+#define	IB_OPENIB_OUI				(0x001405)
+
+/* Management methods */
+#define IB_MGMT_METHOD_GET			0x01
+#define IB_MGMT_METHOD_SET			0x02
+#define IB_MGMT_METHOD_GET_RESP			0x81
+#define IB_MGMT_METHOD_SEND			0x03
+#define IB_MGMT_METHOD_TRAP			0x05
+#define IB_MGMT_METHOD_REPORT			0x06
+#define IB_MGMT_METHOD_REPORT_RESP		0x86
+#define IB_MGMT_METHOD_TRAP_REPRESS		0x07
+
+#define IB_MGMT_METHOD_RESP			0x80
+#define IB_BM_ATTR_MOD_RESP			cpu_to_be32(1)
+
+#define IB_MGMT_MAX_METHODS			128
+
+/* RMPP information */
+#define IB_MGMT_RMPP_VERSION			1
+
+#define IB_MGMT_RMPP_TYPE_DATA			1
+#define IB_MGMT_RMPP_TYPE_ACK			2
+#define IB_MGMT_RMPP_TYPE_STOP			3
+#define IB_MGMT_RMPP_TYPE_ABORT			4
+
+#define IB_MGMT_RMPP_FLAG_ACTIVE		1
+#define IB_MGMT_RMPP_FLAG_FIRST			(1<<1)
+#define IB_MGMT_RMPP_FLAG_LAST			(1<<2)
+
+#define IB_MGMT_RMPP_NO_RESPTIME		0x1F
+
+#define	IB_MGMT_RMPP_STATUS_SUCCESS		0
+#define	IB_MGMT_RMPP_STATUS_RESX		1
+#define	IB_MGMT_RMPP_STATUS_ABORT_MIN		118
+#define	IB_MGMT_RMPP_STATUS_T2L			118
+#define	IB_MGMT_RMPP_STATUS_BAD_LEN		119
+#define	IB_MGMT_RMPP_STATUS_BAD_SEG		120
+#define	IB_MGMT_RMPP_STATUS_BADT		121
+#define	IB_MGMT_RMPP_STATUS_W2S			122
+#define	IB_MGMT_RMPP_STATUS_S2B			123
+#define	IB_MGMT_RMPP_STATUS_BAD_STATUS		124
+#define	IB_MGMT_RMPP_STATUS_UNV			125
+#define	IB_MGMT_RMPP_STATUS_TMR			126
+#define	IB_MGMT_RMPP_STATUS_UNSPEC		127
+#define	IB_MGMT_RMPP_STATUS_ABORT_MAX		127
+
+#define IB_QP0		0
+#define IB_QP1		__constant_htonl(1)
+#define IB_QP1_QKEY	0x80010000
+#define IB_QP_SET_QKEY	0x80000000
+
+enum {
+	IB_MGMT_MAD_HDR = 24,
+	IB_MGMT_MAD_DATA = 232,
+	IB_MGMT_RMPP_HDR = 36,
+	IB_MGMT_RMPP_DATA = 220,
+	IB_MGMT_VENDOR_HDR = 40,
+	IB_MGMT_VENDOR_DATA = 216,
+	IB_MGMT_SA_HDR = 56,
+	IB_MGMT_SA_DATA = 200,
+	IB_MGMT_DEVICE_HDR = 64,
+	IB_MGMT_DEVICE_DATA = 192,
+};
+
+struct ib_mad_hdr {
+	u8	base_version;
+	u8	mgmt_class;
+	u8	class_version;
+	u8	method;
+	__be16	status;
+	__be16	class_specific;
+	__be64	tid;
+	__be16	attr_id;
+	__be16	resv;
+	__be32	attr_mod;
+};
+
+struct ib_rmpp_hdr {
+	u8	rmpp_version;
+	u8	rmpp_type;
+	u8	rmpp_rtime_flags;
+	u8	rmpp_status;
+	__be32	seg_num;
+	__be32	paylen_newwin;
+};
+
+typedef u64 ib_sa_comp_mask;
+
+#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << n))
+
+/*
+ * ib_sa_hdr and ib_sa_mad structures must be packed because they have
+ * 64-bit fields that are only 32-bit aligned. 64-bit architectures will
+ * lay them out wrong otherwise.  (And unfortunately they are sent on
+ * the wire so we can't change the layout)
+ */
+struct ib_sa_hdr {
+	__be64			sm_key;
+	__be16			attr_offset;
+	__be16			reserved;
+	ib_sa_comp_mask		comp_mask;
+} __attribute__ ((packed));
+
+struct ib_mad {
+	struct ib_mad_hdr	mad_hdr;
+	u8			data[IB_MGMT_MAD_DATA];
+};
+
+struct ib_rmpp_mad {
+	struct ib_mad_hdr	mad_hdr;
+	struct ib_rmpp_hdr	rmpp_hdr;
+	u8			data[IB_MGMT_RMPP_DATA];
+};
+
+struct ib_sa_mad {
+	struct ib_mad_hdr	mad_hdr;
+	struct ib_rmpp_hdr	rmpp_hdr;
+	struct ib_sa_hdr	sa_hdr;
+	u8			data[IB_MGMT_SA_DATA];
+} __attribute__ ((packed));
+
+struct ib_vendor_mad {
+	struct ib_mad_hdr	mad_hdr;
+	struct ib_rmpp_hdr	rmpp_hdr;
+	u8			reserved;
+	u8			oui[3];
+	u8			data[IB_MGMT_VENDOR_DATA];
+};
+
+struct ib_class_port_info
+{
+	u8			base_version;
+	u8			class_version;
+	__be16			capability_mask;
+	u8			reserved[3];
+	u8			resp_time_value;
+	u8			redirect_gid[16];
+	__be32			redirect_tcslfl;
+	__be16			redirect_lid;
+	__be16			redirect_pkey;
+	__be32			redirect_qp;
+	__be32			redirect_qkey;
+	u8			trap_gid[16];
+	__be32			trap_tcslfl;
+	__be16			trap_lid;
+	__be16			trap_pkey;
+	__be32			trap_hlqp;
+	__be32			trap_qkey;
+};
+
+/**
+ * ib_mad_send_buf - MAD data buffer and work request for sends.
+ * @next: A pointer used to chain together MADs for posting.
+ * @mad: References an allocated MAD data buffer for MADs that do not have
+ *   RMPP active.  For MADs using RMPP, references the common and management
+ *   class specific headers.
+ * @mad_agent: MAD agent that allocated the buffer.
+ * @ah: The address handle to use when sending the MAD.
+ * @context: User-controlled context fields.
+ * @hdr_len: Indicates the size of the data header of the MAD.  This length
+ *   includes the common MAD, RMPP, and class specific headers.
+ * @data_len: Indicates the total size of user-transferred data.
+ * @seg_count: The number of RMPP segments allocated for this send.
+ * @seg_size: Size of each RMPP segment.
+ * @timeout_ms: Time to wait for a response.
+ * @retries: Number of times to retry a request for a response.
+ *
+ * Users are responsible for initializing the MAD buffer itself, with the
+ * exception of any RMPP header.  Additional segment buffer space allocated
+ * beyond data_len is padding.
+ */
+struct ib_mad_send_buf {
+	struct ib_mad_send_buf	*next;
+	void			*mad;
+	struct ib_mad_agent	*mad_agent;
+	struct ib_ah		*ah;
+	void			*context[2];
+	int			hdr_len;
+	int			data_len;
+	int			seg_count;
+	int			seg_size;
+	int			timeout_ms;
+	int			retries;
+};
+
+/**
+ * ib_response_mad - Returns if the specified MAD has been generated in
+ *   response to a sent request or trap.
+ */
+int ib_response_mad(struct ib_mad *mad);
+
+/**
+ * ib_get_rmpp_resptime - Returns the RMPP response time.
+ * @rmpp_hdr: An RMPP header.
+ */
+static inline u8 ib_get_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr)
+{
+	return rmpp_hdr->rmpp_rtime_flags >> 3;
+}
+
+/**
+ * ib_get_rmpp_flags - Returns the RMPP flags.
+ * @rmpp_hdr: An RMPP header.
+ */
+static inline u8 ib_get_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr)
+{
+	return rmpp_hdr->rmpp_rtime_flags & 0x7;
+}
+
+/**
+ * ib_set_rmpp_resptime - Sets the response time in an RMPP header.
+ * @rmpp_hdr: An RMPP header.
+ * @rtime: The response time to set.
+ */
+static inline void ib_set_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr, u8 rtime)
+{
+	rmpp_hdr->rmpp_rtime_flags = ib_get_rmpp_flags(rmpp_hdr) | (rtime << 3);
+}
+
+/**
+ * ib_set_rmpp_flags - Sets the flags in an RMPP header.
+ * @rmpp_hdr: An RMPP header.
+ * @flags: The flags to set.
+ */
+static inline void ib_set_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr, u8 flags)
+{
+	rmpp_hdr->rmpp_rtime_flags = (rmpp_hdr->rmpp_rtime_flags & 0xF1) |
+				     (flags & 0x7);
+}
+
+struct ib_mad_agent;
+struct ib_mad_send_wc;
+struct ib_mad_recv_wc;
+
+/**
+ * ib_mad_send_handler - callback handler for a sent MAD.
+ * @mad_agent: MAD agent that sent the MAD.
+ * @mad_send_wc: Send work completion information on the sent MAD.
+ */
+typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent,
+				    struct ib_mad_send_wc *mad_send_wc);
+
+/**
+ * ib_mad_snoop_handler - Callback handler for snooping sent MADs.
+ * @mad_agent: MAD agent that snooped the MAD.
+ * @send_wr: Work request information on the sent MAD.
+ * @mad_send_wc: Work completion information on the sent MAD.  Valid
+ *   only for snooping that occurs on a send completion.
+ *
+ * Clients snooping MADs should not modify data referenced by the @send_wr
+ * or @mad_send_wc.
+ */
+typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
+				     struct ib_mad_send_buf *send_buf,
+				     struct ib_mad_send_wc *mad_send_wc);
+
+/**
+ * ib_mad_recv_handler - callback handler for a received MAD.
+ * @mad_agent: MAD agent requesting the received MAD.
+ * @mad_recv_wc: Received work completion information on the received MAD.
+ *
+ * MADs received in response to a send request operation will be handed to
+ * the user before the send operation completes.  All data buffers given
+ * to registered agents through this routine are owned by the receiving
+ * client, except for snooping agents.  Clients snooping MADs should not
+ * modify the data referenced by @mad_recv_wc.
+ */
+typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
+				    struct ib_mad_recv_wc *mad_recv_wc);
+
+/**
+ * ib_mad_agent - Used to track MAD registration with the access layer.
+ * @device: Reference to device registration is on.
+ * @qp: Reference to QP used for sending and receiving MADs.
+ * @mr: Memory region for system memory usable for DMA.
+ * @recv_handler: Callback handler for a received MAD.
+ * @send_handler: Callback handler for a sent MAD.
+ * @snoop_handler: Callback handler for snooped sent MADs.
+ * @context: User-specified context associated with this registration.
+ * @hi_tid: Access layer assigned transaction ID for this client.
+ *   Unsolicited MADs sent by this client will have the upper 32-bits
+ *   of their TID set to this value.
+ * @port_num: Port number on which QP is registered
+ * @rmpp_version: If set, indicates the RMPP version used by this agent.
+ */
+struct ib_mad_agent {
+	struct ib_device	*device;
+	struct ib_qp		*qp;
+	struct ib_mr		*mr;
+	ib_mad_recv_handler	recv_handler;
+	ib_mad_send_handler	send_handler;
+	ib_mad_snoop_handler	snoop_handler;
+	void			*context;
+	u32			hi_tid;
+	u8			port_num;
+	u8			rmpp_version;
+};
+
+/**
+ * ib_mad_send_wc - MAD send completion information.
+ * @send_buf: Send MAD data buffer associated with the send MAD request.
+ * @status: Completion status.
+ * @vendor_err: Optional vendor error information returned with a failed
+ *   request.
+ */
+struct ib_mad_send_wc {
+	struct ib_mad_send_buf	*send_buf;
+	enum ib_wc_status	status;
+	u32			vendor_err;
+};
+
+/**
+ * ib_mad_recv_buf - received MAD buffer information.
+ * @list: Reference to next data buffer for a received RMPP MAD.
+ * @grh: References a data buffer containing the global route header.
+ *   The data refereced by this buffer is only valid if the GRH is
+ *   valid.
+ * @mad: References the start of the received MAD.
+ */
+struct ib_mad_recv_buf {
+	TAILQ_ENTRY(ib_mad_recv_buf)	entry;
+	struct ib_grh		*grh;
+	struct ib_mad		*mad;
+};
+
+/**
+ * ib_mad_recv_wc - received MAD information.
+ * @wc: Completion information for the received data.
+ * @recv_buf: Specifies the location of the received data buffer(s).
+ * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers.
+ * @mad_len: The length of the received MAD, without duplicated headers.
+ *
+ * For received response, the wr_id contains a pointer to the ib_mad_send_buf
+ *   for the corresponding send request.
+ */
+struct ib_mad_recv_wc {
+	struct ib_wc		*wc;
+	struct ib_mad_recv_buf	recv_buf;
+	TAILQ_ENTRY(ib_mad_recv_wc)	entry;
+	int			mad_len;
+};
+
+/**
+ * ib_mad_reg_req - MAD registration request
+ * @mgmt_class: Indicates which management class of MADs should be receive
+ *   by the caller.  This field is only required if the user wishes to
+ *   receive unsolicited MADs, otherwise it should be 0.
+ * @mgmt_class_version: Indicates which version of MADs for the given
+ *   management class to receive.
+ * @oui: Indicates IEEE OUI when mgmt_class is a vendor class
+ *   in the range from 0x30 to 0x4f. Otherwise not used.
+ * @method_mask: The caller will receive unsolicited MADs for any method
+ *   where @method_mask = 1.
+ */
+struct ib_mad_reg_req {
+	u8	mgmt_class;
+	u8	mgmt_class_version;
+	u8	oui[3];
+#ifdef notyet	
+	DECLARE_BITMAP(method_mask, IB_MGMT_MAX_METHODS);
+#endif	
+};
+
+/**
+ * ib_register_mad_agent - Register to send/receive MADs.
+ * @device: The device to register with.
+ * @port_num: The port on the specified device to use.
+ * @qp_type: Specifies which QP to access.  Must be either
+ *   IB_QPT_SMI or IB_QPT_GSI.
+ * @mad_reg_req: Specifies which unsolicited MADs should be received
+ *   by the caller.  This parameter may be NULL if the caller only
+ *   wishes to receive solicited responses.
+ * @rmpp_version: If set, indicates that the client will send
+ *   and receive MADs that contain the RMPP header for the given version.
+ *   If set to 0, indicates that RMPP is not used by this client.
+ * @send_handler: The completion callback routine invoked after a send
+ *   request has completed.
+ * @recv_handler: The completion callback routine invoked for a received
+ *   MAD.
+ * @context: User specified context associated with the registration.
+ */
+struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
+					   u8 port_num,
+					   enum ib_qp_type qp_type,
+					   struct ib_mad_reg_req *mad_reg_req,
+					   u8 rmpp_version,
+					   ib_mad_send_handler send_handler,
+					   ib_mad_recv_handler recv_handler,
+					   void *context);
+
+enum ib_mad_snoop_flags {
+	/*IB_MAD_SNOOP_POSTED_SENDS	   = 1,*/
+	/*IB_MAD_SNOOP_RMPP_SENDS	   = (1<<1),*/
+	IB_MAD_SNOOP_SEND_COMPLETIONS	   = (1<<2),
+	/*IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS = (1<<3),*/
+	IB_MAD_SNOOP_RECVS		   = (1<<4)
+	/*IB_MAD_SNOOP_RMPP_RECVS	   = (1<<5),*/
+	/*IB_MAD_SNOOP_REDIRECTED_QPS	   = (1<<6)*/
+};
+
+/**
+ * ib_register_mad_snoop - Register to snoop sent and received MADs.
+ * @device: The device to register with.
+ * @port_num: The port on the specified device to use.
+ * @qp_type: Specifies which QP traffic to snoop.  Must be either
+ *   IB_QPT_SMI or IB_QPT_GSI.
+ * @mad_snoop_flags: Specifies information where snooping occurs.
+ * @send_handler: The callback routine invoked for a snooped send.
+ * @recv_handler: The callback routine invoked for a snooped receive.
+ * @context: User specified context associated with the registration.
+ */
+struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
+					   u8 port_num,
+					   enum ib_qp_type qp_type,
+					   int mad_snoop_flags,
+					   ib_mad_snoop_handler snoop_handler,
+					   ib_mad_recv_handler recv_handler,
+					   void *context);
+
+/**
+ * ib_unregister_mad_agent - Unregisters a client from using MAD services.
+ * @mad_agent: Corresponding MAD registration request to deregister.
+ *
+ * After invoking this routine, MAD services are no longer usable by the
+ * client on the associated QP.
+ */
+int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent);
+
+/**
+ * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
+ *   with the registered client.
+ * @send_buf: Specifies the information needed to send the MAD(s).
+ * @bad_send_buf: Specifies the MAD on which an error was encountered.  This
+ *   parameter is optional if only a single MAD is posted.
+ *
+ * Sent MADs are not guaranteed to complete in the order that they were posted.
+ *
+ * If the MAD requires RMPP, the data buffer should contain a single copy
+ * of the common MAD, RMPP, and class specific headers, followed by the class
+ * defined data.  If the class defined data would not divide evenly into
+ * RMPP segments, then space must be allocated at the end of the referenced
+ * buffer for any required padding.  To indicate the amount of class defined
+ * data being transferred, the paylen_newwin field in the RMPP header should
+ * be set to the size of the class specific header plus the amount of class
+ * defined data being transferred.  The paylen_newwin field should be
+ * specified in network-byte order.
+ */
+int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
+		     struct ib_mad_send_buf **bad_send_buf);
+
+
+/**
+ * ib_free_recv_mad - Returns data buffers used to receive a MAD.
+ * @mad_recv_wc: Work completion information for a received MAD.
+ *
+ * Clients receiving MADs through their ib_mad_recv_handler must call this
+ * routine to return the work completion buffers to the access layer.
+ */
+void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc);
+
+/**
+ * ib_cancel_mad - Cancels an outstanding send MAD operation.
+ * @mad_agent: Specifies the registration associated with sent MAD.
+ * @send_buf: Indicates the MAD to cancel.
+ *
+ * MADs will be returned to the user through the corresponding
+ * ib_mad_send_handler.
+ */
+void ib_cancel_mad(struct ib_mad_agent *mad_agent,
+		   struct ib_mad_send_buf *send_buf);
+
+/**
+ * ib_modify_mad - Modifies an outstanding send MAD operation.
+ * @mad_agent: Specifies the registration associated with sent MAD.
+ * @send_buf: Indicates the MAD to modify.
+ * @timeout_ms: New timeout value for sent MAD.
+ *
+ * This call will reset the timeout value for a sent MAD to the specified
+ * value.
+ */
+int ib_modify_mad(struct ib_mad_agent *mad_agent,
+		  struct ib_mad_send_buf *send_buf, u32 timeout_ms);
+
+/**
+ * ib_redirect_mad_qp - Registers a QP for MAD services.
+ * @qp: Reference to a QP that requires MAD services.
+ * @rmpp_version: If set, indicates that the client will send
+ *   and receive MADs that contain the RMPP header for the given version.
+ *   If set to 0, indicates that RMPP is not used by this client.
+ * @send_handler: The completion callback routine invoked after a send
+ *   request has completed.
+ * @recv_handler: The completion callback routine invoked for a received
+ *   MAD.
+ * @context: User specified context associated with the registration.
+ *
+ * Use of this call allows clients to use MAD services, such as RMPP,
+ * on user-owned QPs.  After calling this routine, users may send
+ * MADs on the specified QP by calling ib_mad_post_send.
+ */
+struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
+					u8 rmpp_version,
+					ib_mad_send_handler send_handler,
+					ib_mad_recv_handler recv_handler,
+					void *context);
+
+/**
+ * ib_process_mad_wc - Processes a work completion associated with a
+ *   MAD sent or received on a redirected QP.
+ * @mad_agent: Specifies the registered MAD service using the redirected QP.
+ * @wc: References a work completion associated with a sent or received
+ *   MAD segment.
+ *
+ * This routine is used to complete or continue processing on a MAD request.
+ * If the work completion is associated with a send operation, calling
+ * this routine is required to continue an RMPP transfer or to wait for a
+ * corresponding response, if it is a request.  If the work completion is
+ * associated with a receive operation, calling this routine is required to
+ * process an inbound or outbound RMPP transfer, or to match a response MAD
+ * with its corresponding request.
+ */
+int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
+		      struct ib_wc *wc);
+
+/**
+ * ib_create_send_mad - Allocate and initialize a data buffer and work request
+ *   for sending a MAD.
+ * @mad_agent: Specifies the registered MAD service to associate with the MAD.
+ * @remote_qpn: Specifies the QPN of the receiving node.
+ * @pkey_index: Specifies which PKey the MAD will be sent using.  This field
+ *   is valid only if the remote_qpn is QP 1.
+ * @rmpp_active: Indicates if the send will enable RMPP.
+ * @hdr_len: Indicates the size of the data header of the MAD.  This length
+ *   should include the common MAD header, RMPP header, plus any class
+ *   specific header.
+ * @data_len: Indicates the size of any user-transferred data.  The call will
+ *   automatically adjust the allocated buffer size to account for any
+ *   additional padding that may be necessary.
+ * @gfp_mask: GFP mask used for the memory allocation.
+ *
+ * This routine allocates a MAD for sending.  The returned MAD send buffer
+ * will reference a data buffer usable for sending a MAD, along
+ * with an initialized work request structure.  Users may modify the returned
+ * MAD data buffer before posting the send.
+ *
+ * The returned MAD header, class specific headers, and any padding will be
+ * cleared.  Users are responsible for initializing the common MAD header,
+ * any class specific header, and MAD data area.
+ * If @rmpp_active is set, the RMPP header will be initialized for sending.
+ */
+struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
+					    u32 remote_qpn, u16 pkey_index,
+					    int rmpp_active,
+					    int hdr_len, int data_len,
+					    int gfp_mask);
+
+/**
+ * ib_is_mad_class_rmpp - returns whether given management class
+ * supports RMPP.
+ * @mgmt_class: management class
+ *
+ * This routine returns whether the management class supports RMPP.
+ */
+int ib_is_mad_class_rmpp(u8 mgmt_class);
+
+/**
+ * ib_get_mad_data_offset - returns the data offset for a given
+ * management class.
+ * @mgmt_class: management class
+ *
+ * This routine returns the data offset in the MAD for the management
+ * class requested.
+ */
+int ib_get_mad_data_offset(u8 mgmt_class);
+
+/**
+ * ib_get_rmpp_segment - returns the data buffer for a given RMPP segment.
+ * @send_buf: Previously allocated send data buffer.
+ * @seg_num: number of segment to return
+ *
+ * This routine returns a pointer to the data buffer of an RMPP MAD.
+ * Users must provide synchronization to @send_buf around this call.
+ */
+void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num);
+
+/**
+ * ib_free_send_mad - Returns data buffers used to send a MAD.
+ * @send_buf: Previously allocated send data buffer.
+ */
+void ib_free_send_mad(struct ib_mad_send_buf *send_buf);
+
+#endif /* IB_MAD_H */
diff --git a/sys/contrib/rdma/ib_marshall.h b/sys/contrib/rdma/ib_marshall.h
new file mode 100644
index 000000000000..60cd219efd04
--- /dev/null
+++ b/sys/contrib/rdma/ib_marshall.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+#if !defined(IB_USER_MARSHALL_H)
+#define IB_USER_MARSHALL_H
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_user_sa.h>
+
+void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
+			     struct ib_qp_attr *src);
+
+void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
+			     struct ib_ah_attr *src);
+
+void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+			      struct ib_sa_path_rec *src);
+
+void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
+				struct ib_user_path_rec *src);
+
+#endif /* IB_USER_MARSHALL_H */
diff --git a/sys/contrib/rdma/ib_pack.h b/sys/contrib/rdma/ib_pack.h
new file mode 100644
index 000000000000..206d1f1c0a63
--- /dev/null
+++ b/sys/contrib/rdma/ib_pack.h
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ib_pack.h 1349 2004-12-16 21:09:43Z roland $
+ *
+ * $FreeBSD$
+ */
+
+#ifndef IB_PACK_H
+#define IB_PACK_H
+
+#include <rdma/ib_verbs.h>
+
+enum {
+	IB_LRH_BYTES  = 8,
+	IB_GRH_BYTES  = 40,
+	IB_BTH_BYTES  = 12,
+	IB_DETH_BYTES = 8
+};
+
+struct ib_field {
+	size_t struct_offset_bytes;
+	size_t struct_size_bytes;
+	int    offset_words;
+	int    offset_bits;
+	int    size_bits;
+	char  *field_name;
+};
+
+#define RESERVED \
+	.field_name          = "reserved"
+
+/*
+ * This macro cleans up the definitions of constants for BTH opcodes.
+ * It is used to define constants such as IB_OPCODE_UD_SEND_ONLY,
+ * which becomes IB_OPCODE_UD + IB_OPCODE_SEND_ONLY, and this gives
+ * the correct value.
+ *
+ * In short, user code should use the constants defined using the
+ * macro rather than worrying about adding together other constants.
+*/
+#define IB_OPCODE(transport, op) \
+	IB_OPCODE_ ## transport ## _ ## op = \
+		IB_OPCODE_ ## transport + IB_OPCODE_ ## op
+
+enum {
+	/* transport types -- just used to define real constants */
+	IB_OPCODE_RC                                = 0x00,
+	IB_OPCODE_UC                                = 0x20,
+	IB_OPCODE_RD                                = 0x40,
+	IB_OPCODE_UD                                = 0x60,
+
+	/* operations -- just used to define real constants */
+	IB_OPCODE_SEND_FIRST                        = 0x00,
+	IB_OPCODE_SEND_MIDDLE                       = 0x01,
+	IB_OPCODE_SEND_LAST                         = 0x02,
+	IB_OPCODE_SEND_LAST_WITH_IMMEDIATE          = 0x03,
+	IB_OPCODE_SEND_ONLY                         = 0x04,
+	IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE          = 0x05,
+	IB_OPCODE_RDMA_WRITE_FIRST                  = 0x06,
+	IB_OPCODE_RDMA_WRITE_MIDDLE                 = 0x07,
+	IB_OPCODE_RDMA_WRITE_LAST                   = 0x08,
+	IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE    = 0x09,
+	IB_OPCODE_RDMA_WRITE_ONLY                   = 0x0a,
+	IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE    = 0x0b,
+	IB_OPCODE_RDMA_READ_REQUEST                 = 0x0c,
+	IB_OPCODE_RDMA_READ_RESPONSE_FIRST          = 0x0d,
+	IB_OPCODE_RDMA_READ_RESPONSE_MIDDLE         = 0x0e,
+	IB_OPCODE_RDMA_READ_RESPONSE_LAST           = 0x0f,
+	IB_OPCODE_RDMA_READ_RESPONSE_ONLY           = 0x10,
+	IB_OPCODE_ACKNOWLEDGE                       = 0x11,
+	IB_OPCODE_ATOMIC_ACKNOWLEDGE                = 0x12,
+	IB_OPCODE_COMPARE_SWAP                      = 0x13,
+	IB_OPCODE_FETCH_ADD                         = 0x14,
+
+	/* real constants follow -- see comment about above IB_OPCODE()
+	   macro for more details */
+
+	/* RC */
+	IB_OPCODE(RC, SEND_FIRST),
+	IB_OPCODE(RC, SEND_MIDDLE),
+	IB_OPCODE(RC, SEND_LAST),
+	IB_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE),
+	IB_OPCODE(RC, SEND_ONLY),
+	IB_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE),
+	IB_OPCODE(RC, RDMA_WRITE_FIRST),
+	IB_OPCODE(RC, RDMA_WRITE_MIDDLE),
+	IB_OPCODE(RC, RDMA_WRITE_LAST),
+	IB_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE),
+	IB_OPCODE(RC, RDMA_WRITE_ONLY),
+	IB_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
+	IB_OPCODE(RC, RDMA_READ_REQUEST),
+	IB_OPCODE(RC, RDMA_READ_RESPONSE_FIRST),
+	IB_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE),
+	IB_OPCODE(RC, RDMA_READ_RESPONSE_LAST),
+	IB_OPCODE(RC, RDMA_READ_RESPONSE_ONLY),
+	IB_OPCODE(RC, ACKNOWLEDGE),
+	IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE),
+	IB_OPCODE(RC, COMPARE_SWAP),
+	IB_OPCODE(RC, FETCH_ADD),
+
+	/* UC */
+	IB_OPCODE(UC, SEND_FIRST),
+	IB_OPCODE(UC, SEND_MIDDLE),
+	IB_OPCODE(UC, SEND_LAST),
+	IB_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE),
+	IB_OPCODE(UC, SEND_ONLY),
+	IB_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE),
+	IB_OPCODE(UC, RDMA_WRITE_FIRST),
+	IB_OPCODE(UC, RDMA_WRITE_MIDDLE),
+	IB_OPCODE(UC, RDMA_WRITE_LAST),
+	IB_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE),
+	IB_OPCODE(UC, RDMA_WRITE_ONLY),
+	IB_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
+
+	/* RD */
+	IB_OPCODE(RD, SEND_FIRST),
+	IB_OPCODE(RD, SEND_MIDDLE),
+	IB_OPCODE(RD, SEND_LAST),
+	IB_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE),
+	IB_OPCODE(RD, SEND_ONLY),
+	IB_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE),
+	IB_OPCODE(RD, RDMA_WRITE_FIRST),
+	IB_OPCODE(RD, RDMA_WRITE_MIDDLE),
+	IB_OPCODE(RD, RDMA_WRITE_LAST),
+	IB_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE),
+	IB_OPCODE(RD, RDMA_WRITE_ONLY),
+	IB_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
+	IB_OPCODE(RD, RDMA_READ_REQUEST),
+	IB_OPCODE(RD, RDMA_READ_RESPONSE_FIRST),
+	IB_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE),
+	IB_OPCODE(RD, RDMA_READ_RESPONSE_LAST),
+	IB_OPCODE(RD, RDMA_READ_RESPONSE_ONLY),
+	IB_OPCODE(RD, ACKNOWLEDGE),
+	IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE),
+	IB_OPCODE(RD, COMPARE_SWAP),
+	IB_OPCODE(RD, FETCH_ADD),
+
+	/* UD */
+	IB_OPCODE(UD, SEND_ONLY),
+	IB_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE)
+};
+
+enum {
+	IB_LNH_RAW        = 0,
+	IB_LNH_IP         = 1,
+	IB_LNH_IBA_LOCAL  = 2,
+	IB_LNH_IBA_GLOBAL = 3
+};
+
+struct ib_unpacked_lrh {
+	u8        virtual_lane;
+	u8        link_version;
+	u8        service_level;
+	u8        link_next_header;
+	__be16    destination_lid;
+	__be16    packet_length;
+	__be16    source_lid;
+};
+
+struct ib_unpacked_grh {
+	u8    	     ip_version;
+	u8    	     traffic_class;
+	__be32 	     flow_label;
+	__be16       payload_length;
+	u8    	     next_header;
+	u8    	     hop_limit;
+	union ib_gid source_gid;
+	union ib_gid destination_gid;
+};
+
+struct ib_unpacked_bth {
+	u8           opcode;
+	u8           solicited_event;
+	u8           mig_req;
+	u8           pad_count;
+	u8           transport_header_version;
+	__be16       pkey;
+	__be32       destination_qpn;
+	u8           ack_req;
+	__be32       psn;
+};
+
+struct ib_unpacked_deth {
+	__be32       qkey;
+	__be32       source_qpn;
+};
+
+struct ib_ud_header {
+	struct ib_unpacked_lrh  lrh;
+	int                     grh_present;
+	struct ib_unpacked_grh  grh;
+	struct ib_unpacked_bth  bth;
+	struct ib_unpacked_deth deth;
+	int            		immediate_present;
+	__be32         		immediate_data;
+};
+
+void ib_pack(const struct ib_field        *desc,
+	     int                           desc_len,
+	     void                         *structure,
+	     void                         *buf);
+
+void ib_unpack(const struct ib_field        *desc,
+	       int                           desc_len,
+	       void                         *buf,
+	       void                         *structure);
+
+void ib_ud_header_init(int     		   payload_bytes,
+		       int    		   grh_present,
+		       struct ib_ud_header *header);
+
+int ib_ud_header_pack(struct ib_ud_header *header,
+		      void                *buf);
+
+int ib_ud_header_unpack(void                *buf,
+			struct ib_ud_header *header);
+
+#endif /* IB_PACK_H */
diff --git a/sys/contrib/rdma/ib_sa.h b/sys/contrib/rdma/ib_sa.h
new file mode 100644
index 000000000000..bf6a28a1c616
--- /dev/null
+++ b/sys/contrib/rdma/ib_sa.h
@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ib_sa.h 2811 2005-07-06 18:11:43Z halr $
+ *
+ * $FreeBSD$
+ */
+
+#ifndef IB_SA_H
+#define IB_SA_H
+
+#include <machine/atomic.h>
+
+#include <contrib/rdma/ib_verbs.h>
+#include <contrib/rdma/ib_mad.h>
+
+enum {
+	IB_SA_CLASS_VERSION		= 2,	/* IB spec version 1.1/1.2 */
+
+	IB_SA_METHOD_GET_TABLE		= 0x12,
+	IB_SA_METHOD_GET_TABLE_RESP	= 0x92,
+	IB_SA_METHOD_DELETE		= 0x15,
+	IB_SA_METHOD_DELETE_RESP	= 0x95,
+	IB_SA_METHOD_GET_MULTI		= 0x14,
+	IB_SA_METHOD_GET_MULTI_RESP	= 0x94,
+	IB_SA_METHOD_GET_TRACE_TBL	= 0x13
+};
+
+enum {
+	IB_SA_ATTR_CLASS_PORTINFO    = 0x01,
+	IB_SA_ATTR_NOTICE	     = 0x02,
+	IB_SA_ATTR_INFORM_INFO	     = 0x03,
+	IB_SA_ATTR_NODE_REC	     = 0x11,
+	IB_SA_ATTR_PORT_INFO_REC     = 0x12,
+	IB_SA_ATTR_SL2VL_REC	     = 0x13,
+	IB_SA_ATTR_SWITCH_REC	     = 0x14,
+	IB_SA_ATTR_LINEAR_FDB_REC    = 0x15,
+	IB_SA_ATTR_RANDOM_FDB_REC    = 0x16,
+	IB_SA_ATTR_MCAST_FDB_REC     = 0x17,
+	IB_SA_ATTR_SM_INFO_REC	     = 0x18,
+	IB_SA_ATTR_LINK_REC	     = 0x20,
+	IB_SA_ATTR_GUID_INFO_REC     = 0x30,
+	IB_SA_ATTR_SERVICE_REC	     = 0x31,
+	IB_SA_ATTR_PARTITION_REC     = 0x33,
+	IB_SA_ATTR_PATH_REC	     = 0x35,
+	IB_SA_ATTR_VL_ARB_REC	     = 0x36,
+	IB_SA_ATTR_MC_MEMBER_REC     = 0x38,
+	IB_SA_ATTR_TRACE_REC	     = 0x39,
+	IB_SA_ATTR_MULTI_PATH_REC    = 0x3a,
+	IB_SA_ATTR_SERVICE_ASSOC_REC = 0x3b,
+	IB_SA_ATTR_INFORM_INFO_REC   = 0xf3
+};
+
+enum ib_sa_selector {
+	IB_SA_GT   = 0,
+	IB_SA_LT   = 1,
+	IB_SA_EQ   = 2,
+	/*
+	 * The meaning of "best" depends on the attribute: for
+	 * example, for MTU best will return the largest available
+	 * MTU, while for packet life time, best will return the
+	 * smallest available life time.
+	 */
+	IB_SA_BEST = 3
+};
+
+/*
+ * Structures for SA records are named "struct ib_sa_xxx_rec."  No
+ * attempt is made to pack structures to match the physical layout of
+ * SA records in SA MADs; all packing and unpacking is handled by the
+ * SA query code.
+ *
+ * For a record with structure ib_sa_xxx_rec, the naming convention
+ * for the component mask value for field yyy is IB_SA_XXX_REC_YYY (we
+ * never use different abbreviations or otherwise change the spelling
+ * of xxx/yyy between ib_sa_xxx_rec.yyy and IB_SA_XXX_REC_YYY).
+ *
+ * Reserved rows are indicated with comments to help maintainability.
+ */
+
+/* reserved:								 0 */
+/* reserved:								 1 */
+#define IB_SA_PATH_REC_DGID				IB_SA_COMP_MASK( 2)
+#define IB_SA_PATH_REC_SGID				IB_SA_COMP_MASK( 3)
+#define IB_SA_PATH_REC_DLID				IB_SA_COMP_MASK( 4)
+#define IB_SA_PATH_REC_SLID				IB_SA_COMP_MASK( 5)
+#define IB_SA_PATH_REC_RAW_TRAFFIC			IB_SA_COMP_MASK( 6)
+/* reserved:								 7 */
+#define IB_SA_PATH_REC_FLOW_LABEL       		IB_SA_COMP_MASK( 8)
+#define IB_SA_PATH_REC_HOP_LIMIT			IB_SA_COMP_MASK( 9)
+#define IB_SA_PATH_REC_TRAFFIC_CLASS			IB_SA_COMP_MASK(10)
+#define IB_SA_PATH_REC_REVERSIBLE			IB_SA_COMP_MASK(11)
+#define IB_SA_PATH_REC_NUMB_PATH			IB_SA_COMP_MASK(12)
+#define IB_SA_PATH_REC_PKEY				IB_SA_COMP_MASK(13)
+/* reserved:								14 */
+#define IB_SA_PATH_REC_SL				IB_SA_COMP_MASK(15)
+#define IB_SA_PATH_REC_MTU_SELECTOR			IB_SA_COMP_MASK(16)
+#define IB_SA_PATH_REC_MTU				IB_SA_COMP_MASK(17)
+#define IB_SA_PATH_REC_RATE_SELECTOR			IB_SA_COMP_MASK(18)
+#define IB_SA_PATH_REC_RATE				IB_SA_COMP_MASK(19)
+#define IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR	IB_SA_COMP_MASK(20)
+#define IB_SA_PATH_REC_PACKET_LIFE_TIME			IB_SA_COMP_MASK(21)
+#define IB_SA_PATH_REC_PREFERENCE			IB_SA_COMP_MASK(22)
+
+struct ib_sa_path_rec {
+	/* reserved */
+	/* reserved */
+	union ib_gid dgid;
+	union ib_gid sgid;
+	__be16       dlid;
+	__be16       slid;
+	int          raw_traffic;
+	/* reserved */
+	__be32       flow_label;
+	u8           hop_limit;
+	u8           traffic_class;
+	int          reversible;
+	u8           numb_path;
+	__be16       pkey;
+	/* reserved */
+	u8           sl;
+	u8           mtu_selector;
+	u8           mtu;
+	u8           rate_selector;
+	u8           rate;
+	u8           packet_life_time_selector;
+	u8           packet_life_time;
+	u8           preference;
+};
+
+#define IB_SA_MCMEMBER_REC_MGID				IB_SA_COMP_MASK( 0)
+#define IB_SA_MCMEMBER_REC_PORT_GID			IB_SA_COMP_MASK( 1)
+#define IB_SA_MCMEMBER_REC_QKEY				IB_SA_COMP_MASK( 2)
+#define IB_SA_MCMEMBER_REC_MLID				IB_SA_COMP_MASK( 3)
+#define IB_SA_MCMEMBER_REC_MTU_SELECTOR			IB_SA_COMP_MASK( 4)
+#define IB_SA_MCMEMBER_REC_MTU				IB_SA_COMP_MASK( 5)
+#define IB_SA_MCMEMBER_REC_TRAFFIC_CLASS		IB_SA_COMP_MASK( 6)
+#define IB_SA_MCMEMBER_REC_PKEY				IB_SA_COMP_MASK( 7)
+#define IB_SA_MCMEMBER_REC_RATE_SELECTOR		IB_SA_COMP_MASK( 8)
+#define IB_SA_MCMEMBER_REC_RATE				IB_SA_COMP_MASK( 9)
+#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR	IB_SA_COMP_MASK(10)
+#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME		IB_SA_COMP_MASK(11)
+#define IB_SA_MCMEMBER_REC_SL				IB_SA_COMP_MASK(12)
+#define IB_SA_MCMEMBER_REC_FLOW_LABEL			IB_SA_COMP_MASK(13)
+#define IB_SA_MCMEMBER_REC_HOP_LIMIT			IB_SA_COMP_MASK(14)
+#define IB_SA_MCMEMBER_REC_SCOPE			IB_SA_COMP_MASK(15)
+#define IB_SA_MCMEMBER_REC_JOIN_STATE			IB_SA_COMP_MASK(16)
+#define IB_SA_MCMEMBER_REC_PROXY_JOIN			IB_SA_COMP_MASK(17)
+
+struct ib_sa_mcmember_rec {
+	union ib_gid mgid;
+	union ib_gid port_gid;
+	__be32       qkey;
+	__be16       mlid;
+	u8           mtu_selector;
+	u8           mtu;
+	u8           traffic_class;
+	__be16       pkey;
+	u8 	     rate_selector;
+	u8 	     rate;
+	u8 	     packet_life_time_selector;
+	u8 	     packet_life_time;
+	u8           sl;
+	__be32       flow_label;
+	u8           hop_limit;
+	u8           scope;
+	u8           join_state;
+	int          proxy_join;
+};
+
+/* Service Record Component Mask Sec 15.2.5.14 Ver 1.1	*/
+#define IB_SA_SERVICE_REC_SERVICE_ID			IB_SA_COMP_MASK( 0)
+#define IB_SA_SERVICE_REC_SERVICE_GID			IB_SA_COMP_MASK( 1)
+#define IB_SA_SERVICE_REC_SERVICE_PKEY			IB_SA_COMP_MASK( 2)
+/* reserved:								 3 */
+#define IB_SA_SERVICE_REC_SERVICE_LEASE			IB_SA_COMP_MASK( 4)
+#define IB_SA_SERVICE_REC_SERVICE_KEY			IB_SA_COMP_MASK( 5)
+#define IB_SA_SERVICE_REC_SERVICE_NAME			IB_SA_COMP_MASK( 6)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_0		IB_SA_COMP_MASK( 7)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_1		IB_SA_COMP_MASK( 8)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_2		IB_SA_COMP_MASK( 9)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_3		IB_SA_COMP_MASK(10)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_4		IB_SA_COMP_MASK(11)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_5		IB_SA_COMP_MASK(12)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_6		IB_SA_COMP_MASK(13)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_7		IB_SA_COMP_MASK(14)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_8		IB_SA_COMP_MASK(15)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_9		IB_SA_COMP_MASK(16)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_10		IB_SA_COMP_MASK(17)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_11		IB_SA_COMP_MASK(18)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_12		IB_SA_COMP_MASK(19)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_13		IB_SA_COMP_MASK(20)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_14		IB_SA_COMP_MASK(21)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_15		IB_SA_COMP_MASK(22)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_0		IB_SA_COMP_MASK(23)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_1		IB_SA_COMP_MASK(24)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_2		IB_SA_COMP_MASK(25)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_3		IB_SA_COMP_MASK(26)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_4		IB_SA_COMP_MASK(27)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_5		IB_SA_COMP_MASK(28)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_6		IB_SA_COMP_MASK(29)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_7		IB_SA_COMP_MASK(30)
+#define IB_SA_SERVICE_REC_SERVICE_DATA32_0		IB_SA_COMP_MASK(31)
+#define IB_SA_SERVICE_REC_SERVICE_DATA32_1		IB_SA_COMP_MASK(32)
+#define IB_SA_SERVICE_REC_SERVICE_DATA32_2		IB_SA_COMP_MASK(33)
+#define IB_SA_SERVICE_REC_SERVICE_DATA32_3		IB_SA_COMP_MASK(34)
+#define IB_SA_SERVICE_REC_SERVICE_DATA64_0		IB_SA_COMP_MASK(35)
+#define IB_SA_SERVICE_REC_SERVICE_DATA64_1		IB_SA_COMP_MASK(36)
+
+#define IB_DEFAULT_SERVICE_LEASE 	0xFFFFFFFF
+
+struct ib_sa_service_rec {
+	u64		id;
+	union ib_gid	gid;
+	__be16 		pkey;
+	/* reserved */
+	u32		lease;
+	u8		key[16];
+	u8		name[64];
+	u8		data8[16];
+	u16		data16[8];
+	u32		data32[4];
+	u64		data64[2];
+};
+
+struct ib_sa_client {
+	volatile int users;
+#ifdef notyet
+	struct completion comp;
+#endif	
+};
+
+/**
+ * ib_sa_register_client - Register an SA client.
+ */
+void ib_sa_register_client(struct ib_sa_client *client);
+
+/**
+ * ib_sa_unregister_client - Deregister an SA client.
+ * @client: Client object to deregister.
+ */
+void ib_sa_unregister_client(struct ib_sa_client *client);
+
+struct ib_sa_query;
+
+void ib_sa_cancel_query(int id, struct ib_sa_query *query);
+
+int ib_sa_path_rec_get(struct ib_sa_client *client,
+		       struct ib_device *device, u8 port_num,
+		       struct ib_sa_path_rec *rec,
+		       ib_sa_comp_mask comp_mask,
+		       int timeout_ms, int gfp_mask,
+		       void (*callback)(int status,
+					struct ib_sa_path_rec *resp,
+					void *context),
+		       void *context,
+		       struct ib_sa_query **query);
+
+int ib_sa_service_rec_query(struct ib_sa_client *client,
+			 struct ib_device *device, u8 port_num,
+			 u8 method,
+			 struct ib_sa_service_rec *rec,
+			 ib_sa_comp_mask comp_mask,
+			 int timeout_ms, int gfp_mask,
+			 void (*callback)(int status,
+					  struct ib_sa_service_rec *resp,
+					  void *context),
+			 void *context,
+			 struct ib_sa_query **sa_query);
+
+struct ib_sa_multicast {
+	struct ib_sa_mcmember_rec rec;
+	ib_sa_comp_mask		comp_mask;
+	int			(*callback)(int status,
+					    struct ib_sa_multicast *multicast);
+	void			*context;
+};
+
+/**
+ * ib_sa_join_multicast - Initiates a join request to the specified multicast
+ *   group.
+ * @client: SA client
+ * @device: Device associated with the multicast group.
+ * @port_num: Port on the specified device to associate with the multicast
+ *   group.
+ * @rec: SA multicast member record specifying group attributes.
+ * @comp_mask: Component mask indicating which group attributes of %rec are
+ *   valid.
+ * @gfp_mask: GFP mask for memory allocations.
+ * @callback: User callback invoked once the join operation completes.
+ * @context: User specified context stored with the ib_sa_multicast structure.
+ *
+ * This call initiates a multicast join request with the SA for the specified
+ * multicast group.  If the join operation is started successfully, it returns
+ * an ib_sa_multicast structure that is used to track the multicast operation.
+ * Users must free this structure by calling ib_free_multicast, even if the
+ * join operation later fails.  (The callback status is non-zero.)
+ *
+ * If the join operation fails; status will be non-zero, with the following
+ * failures possible:
+ * ETIMEDOUT: The request timed out.
+ * EIO: An error occurred sending the query.
+ * EINVAL: The MCMemberRecord values differed from the existing group's.
+ * ENETRESET: Indicates that an fatal error has occurred on the multicast
+ *   group, and the user must rejoin the group to continue using it.
+ */
+struct ib_sa_multicast *ib_sa_join_multicast(struct ib_sa_client *client,
+					     struct ib_device *device, u8 port_num,
+					     struct ib_sa_mcmember_rec *rec,
+					     ib_sa_comp_mask comp_mask, int gfp_mask,
+					     int (*callback)(int status,
+							     struct ib_sa_multicast
+								    *multicast),
+					     void *context);
+
+/**
+ * ib_free_multicast - Frees the multicast tracking structure, and releases
+ *    any reference on the multicast group.
+ * @multicast: Multicast tracking structure allocated by ib_join_multicast.
+ *
+ * This call blocks until the multicast identifier is destroyed.  It may
+ * not be called from within the multicast callback; however, returning a non-
+ * zero value from the callback will result in destroying the multicast
+ * tracking structure.
+ */
+void ib_sa_free_multicast(struct ib_sa_multicast *multicast);
+
+/**
+ * ib_get_mcmember_rec - Looks up a multicast member record by its MGID and
+ *   returns it if found.
+ * @device: Device associated with the multicast group.
+ * @port_num: Port on the specified device to associate with the multicast
+ *   group.
+ * @mgid: MGID of multicast group.
+ * @rec: Location to copy SA multicast member record.
+ */
+int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
+			   union ib_gid *mgid, struct ib_sa_mcmember_rec *rec);
+
+/**
+ * ib_init_ah_from_mcmember - Initialize address handle attributes based on
+ * an SA multicast member record.
+ */
+int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+			     struct ib_sa_mcmember_rec *rec,
+			     struct ib_ah_attr *ah_attr);
+
+/**
+ * ib_init_ah_from_path - Initialize address handle attributes based on an SA
+ *   path record.
+ */
+int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
+			 struct ib_sa_path_rec *rec,
+			 struct ib_ah_attr *ah_attr);
+
+#endif /* IB_SA_H */
diff --git a/sys/contrib/rdma/ib_smi.h b/sys/contrib/rdma/ib_smi.h
new file mode 100644
index 000000000000..0e4b1e940d8e
--- /dev/null
+++ b/sys/contrib/rdma/ib_smi.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
+ * Copyright (c) 2004 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ib_smi.h 1389 2004-12-27 22:56:47Z roland $
+ *
+ * $FreeBSD$
+ */
+
+#if !defined( IB_SMI_H )
+#define IB_SMI_H
+
+#include <rdma/ib_mad.h>
+
+#define IB_SMP_DATA_SIZE			64
+#define IB_SMP_MAX_PATH_HOPS			64
+
+struct ib_smp {
+	u8	base_version;
+	u8	mgmt_class;
+	u8	class_version;
+	u8	method;
+	__be16	status;
+	u8	hop_ptr;
+	u8	hop_cnt;
+	__be64	tid;
+	__be16	attr_id;
+	__be16	resv;
+	__be32	attr_mod;
+	__be64	mkey;
+	__be16	dr_slid;
+	__be16	dr_dlid;
+	u8	reserved[28];
+	u8	data[IB_SMP_DATA_SIZE];
+	u8	initial_path[IB_SMP_MAX_PATH_HOPS];
+	u8	return_path[IB_SMP_MAX_PATH_HOPS];
+} __attribute__ ((packed));
+
+#define IB_SMP_DIRECTION			__constant_htons(0x8000)
+
+/* Subnet management attributes */
+#define IB_SMP_ATTR_NOTICE			__constant_htons(0x0002)
+#define IB_SMP_ATTR_NODE_DESC			__constant_htons(0x0010)
+#define IB_SMP_ATTR_NODE_INFO			__constant_htons(0x0011)
+#define IB_SMP_ATTR_SWITCH_INFO			__constant_htons(0x0012)
+#define IB_SMP_ATTR_GUID_INFO			__constant_htons(0x0014)
+#define IB_SMP_ATTR_PORT_INFO			__constant_htons(0x0015)
+#define IB_SMP_ATTR_PKEY_TABLE			__constant_htons(0x0016)
+#define IB_SMP_ATTR_SL_TO_VL_TABLE		__constant_htons(0x0017)
+#define IB_SMP_ATTR_VL_ARB_TABLE		__constant_htons(0x0018)
+#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE	__constant_htons(0x0019)
+#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE	__constant_htons(0x001A)
+#define IB_SMP_ATTR_MCAST_FORWARD_TABLE		__constant_htons(0x001B)
+#define IB_SMP_ATTR_SM_INFO			__constant_htons(0x0020)
+#define IB_SMP_ATTR_VENDOR_DIAG			__constant_htons(0x0030)
+#define IB_SMP_ATTR_LED_INFO			__constant_htons(0x0031)
+#define IB_SMP_ATTR_VENDOR_MASK			__constant_htons(0xFF00)
+
+struct ib_port_info {
+	__be64 mkey;
+	__be64 gid_prefix;
+	__be16 lid;
+	__be16 sm_lid;
+	__be32 cap_mask;
+	__be16 diag_code;
+	__be16 mkey_lease_period;
+	u8 local_port_num;
+	u8 link_width_enabled;
+	u8 link_width_supported;
+	u8 link_width_active;
+	u8 linkspeed_portstate;			/* 4 bits, 4 bits */
+	u8 portphysstate_linkdown;		/* 4 bits, 4 bits */
+	u8 mkeyprot_resv_lmc;			/* 2 bits, 3, 3 */
+	u8 linkspeedactive_enabled;		/* 4 bits, 4 bits */
+	u8 neighbormtu_mastersmsl;		/* 4 bits, 4 bits */
+	u8 vlcap_inittype;			/* 4 bits, 4 bits */
+	u8 vl_high_limit;
+	u8 vl_arb_high_cap;
+	u8 vl_arb_low_cap;
+	u8 inittypereply_mtucap;		/* 4 bits, 4 bits */
+	u8 vlstallcnt_hoqlife;			/* 3 bits, 5 bits */
+	u8 operationalvl_pei_peo_fpi_fpo;	/* 4 bits, 1, 1, 1, 1 */
+	__be16 mkey_violations;
+	__be16 pkey_violations;
+	__be16 qkey_violations;
+	u8 guid_cap;
+	u8 clientrereg_resv_subnetto;		/* 1 bit, 2 bits, 5 */
+	u8 resv_resptimevalue;			/* 3 bits, 5 bits */
+	u8 localphyerrors_overrunerrors;	/* 4 bits, 4 bits */
+	__be16 max_credit_hint;
+	u8 resv;
+	u8 link_roundtrip_latency[3];
+};
+
+static inline u8
+ib_get_smp_direction(struct ib_smp *smp)
+{
+	return ((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION);
+}
+
+#endif /* IB_SMI_H */
diff --git a/sys/contrib/rdma/ib_umem.h b/sys/contrib/rdma/ib_umem.h
new file mode 100644
index 000000000000..50dd5dac867d
--- /dev/null
+++ b/sys/contrib/rdma/ib_umem.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2007 Cisco Systems.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef IB_UMEM_H
+#define IB_UMEM_H
+
+struct ib_ucontext;
+
+struct ib_umem_chunk {
+	TAILQ_ENTRY(ib_umem_chunk) entry;
+	int                     nents;
+	int                     nmap;
+	struct rdma_scatterlist page_list[0];
+};
+
+struct ib_umem {
+	struct ib_ucontext     *context;
+	size_t			length;
+	int			offset;
+	int			page_size;
+	int                     writable;
+	TAILQ_HEAD(, ib_umem_chunk) chunk_list;
+#ifdef notyet	
+	struct work_struct	work;
+	struct mm_struct       *mm;
+#endif	
+	unsigned long		diff;
+};
+
+#ifdef CONFIG_INFINIBAND_USER_MEM
+
+struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
+			    size_t size, int access);
+void ib_umem_release(struct ib_umem *umem);
+int ib_umem_page_count(struct ib_umem *umem);
+
+#else /* CONFIG_INFINIBAND_USER_MEM */
+
+
+static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
+					  unsigned long addr, size_t size,
+					  int access) {
+	return ERR_PTR(EINVAL);
+}
+static inline void ib_umem_release(struct ib_umem *umem) { }
+static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
+
+#endif /* CONFIG_INFINIBAND_USER_MEM */
+
+#endif /* IB_UMEM_H */
diff --git a/sys/contrib/rdma/ib_user_cm.h b/sys/contrib/rdma/ib_user_cm.h
new file mode 100644
index 000000000000..e8815d51cf5b
--- /dev/null
+++ b/sys/contrib/rdma/ib_user_cm.h
@@ -0,0 +1,328 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ib_user_cm.h 4019 2005-11-11 00:33:09Z sean.hefty $
+ *
+ * $FreeBSD$
+ */
+
+#ifndef IB_USER_CM_H
+#define IB_USER_CM_H
+
+#include <rdma/ib_user_sa.h>
+
+#define IB_USER_CM_ABI_VERSION 5
+
+enum {
+	IB_USER_CM_CMD_CREATE_ID,
+	IB_USER_CM_CMD_DESTROY_ID,
+	IB_USER_CM_CMD_ATTR_ID,
+
+	IB_USER_CM_CMD_LISTEN,
+	IB_USER_CM_CMD_NOTIFY,
+
+	IB_USER_CM_CMD_SEND_REQ,
+	IB_USER_CM_CMD_SEND_REP,
+	IB_USER_CM_CMD_SEND_RTU,
+	IB_USER_CM_CMD_SEND_DREQ,
+	IB_USER_CM_CMD_SEND_DREP,
+	IB_USER_CM_CMD_SEND_REJ,
+	IB_USER_CM_CMD_SEND_MRA,
+	IB_USER_CM_CMD_SEND_LAP,
+	IB_USER_CM_CMD_SEND_APR,
+	IB_USER_CM_CMD_SEND_SIDR_REQ,
+	IB_USER_CM_CMD_SEND_SIDR_REP,
+
+	IB_USER_CM_CMD_EVENT,
+	IB_USER_CM_CMD_INIT_QP_ATTR,
+};
+/*
+ * command ABI structures.
+ */
+struct ib_ucm_cmd_hdr {
+	__u32 cmd;
+	__u16 in;
+	__u16 out;
+};
+
+struct ib_ucm_create_id {
+	__u64 uid;
+	__u64 response;
+};
+
+struct ib_ucm_create_id_resp {
+	__u32 id;
+};
+
+struct ib_ucm_destroy_id {
+	__u64 response;
+	__u32 id;
+	__u32 reserved;
+};
+
+struct ib_ucm_destroy_id_resp {
+	__u32 events_reported;
+};
+
+struct ib_ucm_attr_id {
+	__u64 response;
+	__u32 id;
+	__u32 reserved;
+};
+
+struct ib_ucm_attr_id_resp {
+	__be64 service_id;
+	__be64 service_mask;
+	__be32 local_id;
+	__be32 remote_id;
+};
+
+struct ib_ucm_init_qp_attr {
+	__u64 response;
+	__u32 id;
+	__u32 qp_state;
+};
+
+struct ib_ucm_listen {
+	__be64 service_id;
+	__be64 service_mask;
+	__u32 id;
+	__u32 reserved;
+};
+
+struct ib_ucm_notify {
+	__u32 id;
+	__u32 event;
+};
+
+struct ib_ucm_private_data {
+	__u64 data;
+	__u32 id;
+	__u8  len;
+	__u8  reserved[3];
+};
+
+struct ib_ucm_req {
+	__u32 id;
+	__u32 qpn;
+	__u32 qp_type;
+	__u32 psn;
+	__be64 sid;
+	__u64 data;
+	__u64 primary_path;
+	__u64 alternate_path;
+	__u8  len;
+	__u8  peer_to_peer;
+	__u8  responder_resources;
+	__u8  initiator_depth;
+	__u8  remote_cm_response_timeout;
+	__u8  flow_control;
+	__u8  local_cm_response_timeout;
+	__u8  retry_count;
+	__u8  rnr_retry_count;
+	__u8  max_cm_retries;
+	__u8  srq;
+	__u8  reserved[5];
+};
+
+struct ib_ucm_rep {
+	__u64 uid;
+	__u64 data;
+	__u32 id;
+	__u32 qpn;
+	__u32 psn;
+	__u8  len;
+	__u8  responder_resources;
+	__u8  initiator_depth;
+	__u8  target_ack_delay;
+	__u8  failover_accepted;
+	__u8  flow_control;
+	__u8  rnr_retry_count;
+	__u8  srq;
+	__u8  reserved[4];
+};
+
+struct ib_ucm_info {
+	__u32 id;
+	__u32 status;
+	__u64 info;
+	__u64 data;
+	__u8  info_len;
+	__u8  data_len;
+	__u8  reserved[6];
+};
+
+struct ib_ucm_mra {
+	__u64 data;
+	__u32 id;
+	__u8  len;
+	__u8  timeout;
+	__u8  reserved[2];
+};
+
+struct ib_ucm_lap {
+	__u64 path;
+	__u64 data;
+	__u32 id;
+	__u8  len;
+	__u8  reserved[3];
+};
+
+struct ib_ucm_sidr_req {
+	__u32 id;
+	__u32 timeout;
+	__be64 sid;
+	__u64 data;
+	__u64 path;
+	__u16 reserved_pkey;
+	__u8  len;
+	__u8  max_cm_retries;
+	__u8  reserved[4];
+};
+
+struct ib_ucm_sidr_rep {
+	__u32 id;
+	__u32 qpn;
+	__u32 qkey;
+	__u32 status;
+	__u64 info;
+	__u64 data;
+	__u8  info_len;
+	__u8  data_len;
+	__u8  reserved[6];
+};
+/*
+ * event notification ABI structures.
+ */
+struct ib_ucm_event_get {
+	__u64 response;
+	__u64 data;
+	__u64 info;
+	__u8  data_len;
+	__u8  info_len;
+	__u8  reserved[6];
+};
+
+struct ib_ucm_req_event_resp {
+	struct ib_user_path_rec primary_path;
+	struct ib_user_path_rec alternate_path;
+	__be64                 remote_ca_guid;
+	__u32                  remote_qkey;
+	__u32                  remote_qpn;
+	__u32                  qp_type;
+	__u32                  starting_psn;
+	__u8  responder_resources;
+	__u8  initiator_depth;
+	__u8  local_cm_response_timeout;
+	__u8  flow_control;
+	__u8  remote_cm_response_timeout;
+	__u8  retry_count;
+	__u8  rnr_retry_count;
+	__u8  srq;
+	__u8  port;
+	__u8  reserved[7];
+};
+
+struct ib_ucm_rep_event_resp {
+	__be64 remote_ca_guid;
+	__u32 remote_qkey;
+	__u32 remote_qpn;
+	__u32 starting_psn;
+	__u8  responder_resources;
+	__u8  initiator_depth;
+	__u8  target_ack_delay;
+	__u8  failover_accepted;
+	__u8  flow_control;
+	__u8  rnr_retry_count;
+	__u8  srq;
+	__u8  reserved[5];
+};
+
+struct ib_ucm_rej_event_resp {
+	__u32 reason;
+	/* ari in ib_ucm_event_get info field. */
+};
+
+struct ib_ucm_mra_event_resp {
+	__u8  timeout;
+	__u8  reserved[3];
+};
+
+struct ib_ucm_lap_event_resp {
+	struct ib_user_path_rec path;
+};
+
+struct ib_ucm_apr_event_resp {
+	__u32 status;
+	/* apr info in ib_ucm_event_get info field. */
+};
+
+struct ib_ucm_sidr_req_event_resp {
+	__u16 pkey;
+	__u8  port;
+	__u8  reserved;
+};
+
+struct ib_ucm_sidr_rep_event_resp {
+	__u32 status;
+	__u32 qkey;
+	__u32 qpn;
+	/* info in ib_ucm_event_get info field. */
+};
+
+#define IB_UCM_PRES_DATA      0x01
+#define IB_UCM_PRES_INFO      0x02
+#define IB_UCM_PRES_PRIMARY   0x04
+#define IB_UCM_PRES_ALTERNATE 0x08
+
+struct ib_ucm_event_resp {
+	__u64 uid;
+	__u32 id;
+	__u32 event;
+	__u32 present;
+	__u32 reserved;
+	union {
+		struct ib_ucm_req_event_resp req_resp;
+		struct ib_ucm_rep_event_resp rep_resp;
+		struct ib_ucm_rej_event_resp rej_resp;
+		struct ib_ucm_mra_event_resp mra_resp;
+		struct ib_ucm_lap_event_resp lap_resp;
+		struct ib_ucm_apr_event_resp apr_resp;
+
+		struct ib_ucm_sidr_req_event_resp sidr_req_resp;
+		struct ib_ucm_sidr_rep_event_resp sidr_rep_resp;
+
+		__u32                             send_status;
+	} u;
+};
+
+#endif /* IB_USER_CM_H */
diff --git a/sys/contrib/rdma/ib_user_mad.h b/sys/contrib/rdma/ib_user_mad.h
new file mode 100644
index 000000000000..ec1d50987c88
--- /dev/null
+++ b/sys/contrib/rdma/ib_user_mad.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ib_user_mad.h 2814 2005-07-06 19:14:09Z halr $
+ *
+ * $FreeBSD$
+ */
+
+#ifndef IB_USER_MAD_H
+#define IB_USER_MAD_H
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define IB_USER_MAD_ABI_VERSION	5
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ */
+
+/**
+ * ib_user_mad_hdr - MAD packet header
+ * @id - ID of agent MAD received with/to be sent with
+ * @status - 0 on successful receive, ETIMEDOUT if no response
+ *   received (transaction ID in data[] will be set to TID of original
+ *   request) (ignored on send)
+ * @timeout_ms - Milliseconds to wait for response (unset on receive)
+ * @retries - Number of automatic retries to attempt
+ * @qpn - Remote QP number received from/to be sent to
+ * @qkey - Remote Q_Key to be sent with (unset on receive)
+ * @lid - Remote lid received from/to be sent to
+ * @sl - Service level received with/to be sent with
+ * @path_bits - Local path bits received with/to be sent with
+ * @grh_present - If set, GRH was received/should be sent
+ * @gid_index - Local GID index to send with (unset on receive)
+ * @hop_limit - Hop limit in GRH
+ * @traffic_class - Traffic class in GRH
+ * @gid - Remote GID in GRH
+ * @flow_label - Flow label in GRH
+ */
+struct ib_user_mad_hdr {
+	__u32	id;
+	__u32	status;
+	__u32	timeout_ms;
+	__u32	retries;
+	__u32	length;
+	__be32	qpn;
+	__be32  qkey;
+	__be16	lid;
+	__u8	sl;
+	__u8	path_bits;
+	__u8	grh_present;
+	__u8	gid_index;
+	__u8	hop_limit;
+	__u8	traffic_class;
+	__u8	gid[16];
+	__be32	flow_label;
+};
+
+/**
+ * ib_user_mad - MAD packet
+ * @hdr - MAD packet header
+ * @data - Contents of MAD
+ *
+ */
+struct ib_user_mad {
+	struct ib_user_mad_hdr hdr;
+	__u64	data[0];
+};
+
+/**
+ * ib_user_mad_reg_req - MAD registration request
+ * @id - Set by the kernel; used to identify agent in future requests.
+ * @qpn - Queue pair number; must be 0 or 1.
+ * @method_mask - The caller will receive unsolicited MADs for any method
+ *   where @method_mask = 1.
+ * @mgmt_class - Indicates which management class of MADs should be receive
+ *   by the caller.  This field is only required if the user wishes to
+ *   receive unsolicited MADs, otherwise it should be 0.
+ * @mgmt_class_version - Indicates which version of MADs for the given
+ *   management class to receive.
+ * @oui: Indicates IEEE OUI when mgmt_class is a vendor class
+ *   in the range from 0x30 to 0x4f. Otherwise not used.
+ * @rmpp_version: If set, indicates the RMPP version used.
+ *
+ */
+struct ib_user_mad_reg_req {
+	__u32	id;
+	__u32	method_mask[4];
+	__u8	qpn;
+	__u8	mgmt_class;
+	__u8	mgmt_class_version;
+	__u8    oui[3];
+	__u8	rmpp_version;
+};
+
+#define IB_IOCTL_MAGIC		0x1b
+
+#define IB_USER_MAD_REGISTER_AGENT	_IOWR(IB_IOCTL_MAGIC, 1, \
+					      struct ib_user_mad_reg_req)
+
+#define IB_USER_MAD_UNREGISTER_AGENT	_IOW(IB_IOCTL_MAGIC, 2, __u32)
+
+#endif /* IB_USER_MAD_H */
diff --git a/sys/contrib/rdma/ib_user_sa.h b/sys/contrib/rdma/ib_user_sa.h
new file mode 100644
index 000000000000..ddb76ed5c9c2
--- /dev/null
+++ b/sys/contrib/rdma/ib_user_sa.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef IB_USER_SA_H
+#define IB_USER_SA_H
+
+struct ib_user_path_rec {
+	__u8	dgid[16];
+	__u8	sgid[16];
+	__be16	dlid;
+	__be16	slid;
+	__u32	raw_traffic;
+	__be32	flow_label;
+	__u32	reversible;
+	__u32	mtu;
+	__be16	pkey;
+	__u8	hop_limit;
+	__u8	traffic_class;
+	__u8	numb_path;
+	__u8	sl;
+	__u8	mtu_selector;
+	__u8	rate_selector;
+	__u8	rate;
+	__u8	packet_life_time_selector;
+	__u8	packet_life_time;
+	__u8	preference;
+};
+
+#endif /* IB_USER_SA_H */
diff --git a/sys/contrib/rdma/ib_user_verbs.h b/sys/contrib/rdma/ib_user_verbs.h
new file mode 100644
index 000000000000..faa966c20920
--- /dev/null
+++ b/sys/contrib/rdma/ib_user_verbs.h
@@ -0,0 +1,688 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
+ * Copyright (c) 2006 Mellanox Technologies.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ib_user_verbs.h 4019 2005-11-11 00:33:09Z sean.hefty $
+ *
+ * $FreeBSD$
+ */
+
+#ifndef IB_USER_VERBS_H
+#define IB_USER_VERBS_H
+
+#include <contrib/rdma/types.h>
+
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define IB_USER_VERBS_ABI_VERSION	6
+
+enum {
+	IB_USER_VERBS_CMD_GET_CONTEXT,
+	IB_USER_VERBS_CMD_QUERY_DEVICE,
+	IB_USER_VERBS_CMD_QUERY_PORT,
+	IB_USER_VERBS_CMD_ALLOC_PD,
+	IB_USER_VERBS_CMD_DEALLOC_PD,
+	IB_USER_VERBS_CMD_CREATE_AH,
+	IB_USER_VERBS_CMD_MODIFY_AH,
+	IB_USER_VERBS_CMD_QUERY_AH,
+	IB_USER_VERBS_CMD_DESTROY_AH,
+	IB_USER_VERBS_CMD_REG_MR,
+	IB_USER_VERBS_CMD_REG_SMR,
+	IB_USER_VERBS_CMD_REREG_MR,
+	IB_USER_VERBS_CMD_QUERY_MR,
+	IB_USER_VERBS_CMD_DEREG_MR,
+	IB_USER_VERBS_CMD_ALLOC_MW,
+	IB_USER_VERBS_CMD_BIND_MW,
+	IB_USER_VERBS_CMD_DEALLOC_MW,
+	IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL,
+	IB_USER_VERBS_CMD_CREATE_CQ,
+	IB_USER_VERBS_CMD_RESIZE_CQ,
+	IB_USER_VERBS_CMD_DESTROY_CQ,
+	IB_USER_VERBS_CMD_POLL_CQ,
+	IB_USER_VERBS_CMD_PEEK_CQ,
+	IB_USER_VERBS_CMD_REQ_NOTIFY_CQ,
+	IB_USER_VERBS_CMD_CREATE_QP,
+	IB_USER_VERBS_CMD_QUERY_QP,
+	IB_USER_VERBS_CMD_MODIFY_QP,
+	IB_USER_VERBS_CMD_DESTROY_QP,
+	IB_USER_VERBS_CMD_POST_SEND,
+	IB_USER_VERBS_CMD_POST_RECV,
+	IB_USER_VERBS_CMD_ATTACH_MCAST,
+	IB_USER_VERBS_CMD_DETACH_MCAST,
+	IB_USER_VERBS_CMD_CREATE_SRQ,
+	IB_USER_VERBS_CMD_MODIFY_SRQ,
+	IB_USER_VERBS_CMD_QUERY_SRQ,
+	IB_USER_VERBS_CMD_DESTROY_SRQ,
+	IB_USER_VERBS_CMD_POST_SRQ_RECV
+};
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * Specifically:
+ *  - Do not use pointer types -- pass pointers in __u64 instead.
+ *  - Make sure that any structure larger than 4 bytes is padded to a
+ *    multiple of 8 bytes.  Otherwise the structure size will be
+ *    different between 32-bit and 64-bit architectures.
+ */
+
+struct ib_uverbs_async_event_desc {
+	__u64 element;
+	__u32 event_type;	/* enum ib_event_type */
+	__u32 reserved;
+};
+
+struct ib_uverbs_comp_event_desc {
+	__u64 cq_handle;
+};
+
+/*
+ * All commands from userspace should start with a __u32 command field
+ * followed by __u16 in_words and out_words fields (which give the
+ * length of the command block and response buffer if any in 32-bit
+ * words).  The kernel driver will read these fields first and read
+ * the rest of the command struct based on these value.
+ */
+
+struct ib_uverbs_cmd_hdr {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+};
+
+struct ib_uverbs_get_context {
+	__u64 response;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_get_context_resp {
+	__u32 async_fd;
+	__u32 num_comp_vectors;
+};
+
+struct ib_uverbs_query_device {
+	__u64 response;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_query_device_resp {
+	__u64 fw_ver;
+	__be64 node_guid;
+	__be64 sys_image_guid;
+	__u64 max_mr_size;
+	__u64 page_size_cap;
+	__u32 vendor_id;
+	__u32 vendor_part_id;
+	__u32 hw_ver;
+	__u32 max_qp;
+	__u32 max_qp_wr;
+	__u32 device_cap_flags;
+	__u32 max_sge;
+	__u32 max_sge_rd;
+	__u32 max_cq;
+	__u32 max_cqe;
+	__u32 max_mr;
+	__u32 max_pd;
+	__u32 max_qp_rd_atom;
+	__u32 max_ee_rd_atom;
+	__u32 max_res_rd_atom;
+	__u32 max_qp_init_rd_atom;
+	__u32 max_ee_init_rd_atom;
+	__u32 atomic_cap;
+	__u32 max_ee;
+	__u32 max_rdd;
+	__u32 max_mw;
+	__u32 max_raw_ipv6_qp;
+	__u32 max_raw_ethy_qp;
+	__u32 max_mcast_grp;
+	__u32 max_mcast_qp_attach;
+	__u32 max_total_mcast_qp_attach;
+	__u32 max_ah;
+	__u32 max_fmr;
+	__u32 max_map_per_fmr;
+	__u32 max_srq;
+	__u32 max_srq_wr;
+	__u32 max_srq_sge;
+	__u16 max_pkeys;
+	__u8  local_ca_ack_delay;
+	__u8  phys_port_cnt;
+	__u8  reserved[4];
+};
+
+struct ib_uverbs_query_port {
+	__u64 response;
+	__u8  port_num;
+	__u8  reserved[7];
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_query_port_resp {
+	__u32 port_cap_flags;
+	__u32 max_msg_sz;
+	__u32 bad_pkey_cntr;
+	__u32 qkey_viol_cntr;
+	__u32 gid_tbl_len;
+	__u16 pkey_tbl_len;
+	__u16 lid;
+	__u16 sm_lid;
+	__u8  state;
+	__u8  max_mtu;
+	__u8  active_mtu;
+	__u8  lmc;
+	__u8  max_vl_num;
+	__u8  sm_sl;
+	__u8  subnet_timeout;
+	__u8  init_type_reply;
+	__u8  active_width;
+	__u8  active_speed;
+	__u8  phys_state;
+	__u8  reserved[3];
+};
+
+struct ib_uverbs_alloc_pd {
+	__u64 response;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_alloc_pd_resp {
+	__u32 pd_handle;
+};
+
+struct ib_uverbs_dealloc_pd {
+	__u32 pd_handle;
+};
+
+struct ib_uverbs_reg_mr {
+	__u64 response;
+	__u64 start;
+	__u64 length;
+	__u64 hca_va;
+	__u32 pd_handle;
+	__u32 access_flags;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_reg_mr_resp {
+	__u32 mr_handle;
+	__u32 lkey;
+	__u32 rkey;
+};
+
+struct ib_uverbs_dereg_mr {
+	__u32 mr_handle;
+};
+
+struct ib_uverbs_create_comp_channel {
+	__u64 response;
+};
+
+struct ib_uverbs_create_comp_channel_resp {
+	__u32 fd;
+};
+
+struct ib_uverbs_create_cq {
+	__u64 response;
+	__u64 user_handle;
+	__u32 cqe;
+	__u32 comp_vector;
+	__s32 comp_channel;
+	__u32 reserved;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_create_cq_resp {
+	__u32 cq_handle;
+	__u32 cqe;
+};
+
+struct ib_uverbs_resize_cq {
+	__u64 response;
+	__u32 cq_handle;
+	__u32 cqe;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_resize_cq_resp {
+	__u32 cqe;
+	__u32 reserved;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_poll_cq {
+	__u64 response;
+	__u32 cq_handle;
+	__u32 ne;
+};
+
+struct ib_uverbs_wc {
+	__u64 wr_id;
+	__u32 status;
+	__u32 opcode;
+	__u32 vendor_err;
+	__u32 byte_len;
+	__u32 imm_data;
+	__u32 qp_num;
+	__u32 src_qp;
+	__u32 wc_flags;
+	__u16 pkey_index;
+	__u16 slid;
+	__u8 sl;
+	__u8 dlid_path_bits;
+	__u8 port_num;
+	__u8 reserved;
+};
+
+struct ib_uverbs_poll_cq_resp {
+	__u32 count;
+	__u32 reserved;
+	struct ib_uverbs_wc wc[0];
+};
+
+struct ib_uverbs_req_notify_cq {
+	__u32 cq_handle;
+	__u32 solicited_only;
+};
+
+struct ib_uverbs_destroy_cq {
+	__u64 response;
+	__u32 cq_handle;
+	__u32 reserved;
+};
+
+struct ib_uverbs_destroy_cq_resp {
+	__u32 comp_events_reported;
+	__u32 async_events_reported;
+};
+
+struct ib_uverbs_global_route {
+	__u8  dgid[16];
+	__u32 flow_label;
+	__u8  sgid_index;
+	__u8  hop_limit;
+	__u8  traffic_class;
+	__u8  reserved;
+};
+
+struct ib_uverbs_ah_attr {
+	struct ib_uverbs_global_route grh;
+	__u16 dlid;
+	__u8  sl;
+	__u8  src_path_bits;
+	__u8  static_rate;
+	__u8  is_global;
+	__u8  port_num;
+	__u8  reserved;
+};
+
+struct ib_uverbs_qp_attr {
+	__u32	qp_attr_mask;
+	__u32	qp_state;
+	__u32	cur_qp_state;
+	__u32	path_mtu;
+	__u32	path_mig_state;
+	__u32	qkey;
+	__u32	rq_psn;
+	__u32	sq_psn;
+	__u32	dest_qp_num;
+	__u32	qp_access_flags;
+
+	struct ib_uverbs_ah_attr ah_attr;
+	struct ib_uverbs_ah_attr alt_ah_attr;
+
+	/* ib_qp_cap */
+	__u32	max_send_wr;
+	__u32	max_recv_wr;
+	__u32	max_send_sge;
+	__u32	max_recv_sge;
+	__u32	max_inline_data;
+
+	__u16	pkey_index;
+	__u16	alt_pkey_index;
+	__u8	en_sqd_async_notify;
+	__u8	sq_draining;
+	__u8	max_rd_atomic;
+	__u8	max_dest_rd_atomic;
+	__u8	min_rnr_timer;
+	__u8	port_num;
+	__u8	timeout;
+	__u8	retry_cnt;
+	__u8	rnr_retry;
+	__u8	alt_port_num;
+	__u8	alt_timeout;
+	__u8	reserved[5];
+};
+
+struct ib_uverbs_create_qp {
+	__u64 response;
+	__u64 user_handle;
+	__u32 pd_handle;
+	__u32 send_cq_handle;
+	__u32 recv_cq_handle;
+	__u32 srq_handle;
+	__u32 max_send_wr;
+	__u32 max_recv_wr;
+	__u32 max_send_sge;
+	__u32 max_recv_sge;
+	__u32 max_inline_data;
+	__u8  sq_sig_all;
+	__u8  qp_type;
+	__u8  is_srq;
+	__u8  reserved;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_create_qp_resp {
+	__u32 qp_handle;
+	__u32 qpn;
+	__u32 max_send_wr;
+	__u32 max_recv_wr;
+	__u32 max_send_sge;
+	__u32 max_recv_sge;
+	__u32 max_inline_data;
+	__u32 reserved;
+};
+
+/*
+ * This struct needs to remain a multiple of 8 bytes to keep the
+ * alignment of the modify QP parameters.
+ */
+struct ib_uverbs_qp_dest {
+	__u8  dgid[16];
+	__u32 flow_label;
+	__u16 dlid;
+	__u16 reserved;
+	__u8  sgid_index;
+	__u8  hop_limit;
+	__u8  traffic_class;
+	__u8  sl;
+	__u8  src_path_bits;
+	__u8  static_rate;
+	__u8  is_global;
+	__u8  port_num;
+};
+
+struct ib_uverbs_query_qp {
+	__u64 response;
+	__u32 qp_handle;
+	__u32 attr_mask;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_query_qp_resp {
+	struct ib_uverbs_qp_dest dest;
+	struct ib_uverbs_qp_dest alt_dest;
+	__u32 max_send_wr;
+	__u32 max_recv_wr;
+	__u32 max_send_sge;
+	__u32 max_recv_sge;
+	__u32 max_inline_data;
+	__u32 qkey;
+	__u32 rq_psn;
+	__u32 sq_psn;
+	__u32 dest_qp_num;
+	__u32 qp_access_flags;
+	__u16 pkey_index;
+	__u16 alt_pkey_index;
+	__u8  qp_state;
+	__u8  cur_qp_state;
+	__u8  path_mtu;
+	__u8  path_mig_state;
+	__u8  sq_draining;
+	__u8  max_rd_atomic;
+	__u8  max_dest_rd_atomic;
+	__u8  min_rnr_timer;
+	__u8  port_num;
+	__u8  timeout;
+	__u8  retry_cnt;
+	__u8  rnr_retry;
+	__u8  alt_port_num;
+	__u8  alt_timeout;
+	__u8  sq_sig_all;
+	__u8  reserved[5];
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_modify_qp {
+	struct ib_uverbs_qp_dest dest;
+	struct ib_uverbs_qp_dest alt_dest;
+	__u32 qp_handle;
+	__u32 attr_mask;
+	__u32 qkey;
+	__u32 rq_psn;
+	__u32 sq_psn;
+	__u32 dest_qp_num;
+	__u32 qp_access_flags;
+	__u16 pkey_index;
+	__u16 alt_pkey_index;
+	__u8  qp_state;
+	__u8  cur_qp_state;
+	__u8  path_mtu;
+	__u8  path_mig_state;
+	__u8  en_sqd_async_notify;
+	__u8  max_rd_atomic;
+	__u8  max_dest_rd_atomic;
+	__u8  min_rnr_timer;
+	__u8  port_num;
+	__u8  timeout;
+	__u8  retry_cnt;
+	__u8  rnr_retry;
+	__u8  alt_port_num;
+	__u8  alt_timeout;
+	__u8  reserved[2];
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_modify_qp_resp {
+};
+
+struct ib_uverbs_destroy_qp {
+	__u64 response;
+	__u32 qp_handle;
+	__u32 reserved;
+};
+
+struct ib_uverbs_destroy_qp_resp {
+	__u32 events_reported;
+};
+
+/*
+ * The ib_uverbs_sge structure isn't used anywhere, since we assume
+ * the ib_sge structure is packed the same way on 32-bit and 64-bit
+ * architectures in both kernel and user space.  It's just here to
+ * document the ABI.
+ */
+struct ib_uverbs_sge {
+	__u64 addr;
+	__u32 length;
+	__u32 lkey;
+};
+
+struct ib_uverbs_send_wr {
+	__u64 wr_id;
+	__u32 num_sge;
+	__u32 opcode;
+	__u32 send_flags;
+	__u32 imm_data;
+	union {
+		struct {
+			__u64 remote_addr;
+			__u32 rkey;
+			__u32 reserved;
+		} rdma;
+		struct {
+			__u64 remote_addr;
+			__u64 compare_add;
+			__u64 swap;
+			__u32 rkey;
+			__u32 reserved;
+		} atomic;
+		struct {
+			__u32 ah;
+			__u32 remote_qpn;
+			__u32 remote_qkey;
+			__u32 reserved;
+		} ud;
+	} wr;
+};
+
+struct ib_uverbs_post_send {
+	__u64 response;
+	__u32 qp_handle;
+	__u32 wr_count;
+	__u32 sge_count;
+	__u32 wqe_size;
+	struct ib_uverbs_send_wr send_wr[0];
+};
+
+struct ib_uverbs_post_send_resp {
+	__u32 bad_wr;
+};
+
+struct ib_uverbs_recv_wr {
+	__u64 wr_id;
+	__u32 num_sge;
+	__u32 reserved;
+};
+
+struct ib_uverbs_post_recv {
+	__u64 response;
+	__u32 qp_handle;
+	__u32 wr_count;
+	__u32 sge_count;
+	__u32 wqe_size;
+	struct ib_uverbs_recv_wr recv_wr[0];
+};
+
+struct ib_uverbs_post_recv_resp {
+	__u32 bad_wr;
+};
+
+struct ib_uverbs_post_srq_recv {
+	__u64 response;
+	__u32 srq_handle;
+	__u32 wr_count;
+	__u32 sge_count;
+	__u32 wqe_size;
+	struct ib_uverbs_recv_wr recv[0];
+};
+
+struct ib_uverbs_post_srq_recv_resp {
+	__u32 bad_wr;
+};
+
+struct ib_uverbs_create_ah {
+	__u64 response;
+	__u64 user_handle;
+	__u32 pd_handle;
+	__u32 reserved;
+	struct ib_uverbs_ah_attr attr;
+};
+
+struct ib_uverbs_create_ah_resp {
+	__u32 ah_handle;
+};
+
+struct ib_uverbs_destroy_ah {
+	__u32 ah_handle;
+};
+
+struct ib_uverbs_attach_mcast {
+	__u8  gid[16];
+	__u32 qp_handle;
+	__u16 mlid;
+	__u16 reserved;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_detach_mcast {
+	__u8  gid[16];
+	__u32 qp_handle;
+	__u16 mlid;
+	__u16 reserved;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_create_srq {
+	__u64 response;
+	__u64 user_handle;
+	__u32 pd_handle;
+	__u32 max_wr;
+	__u32 max_sge;
+	__u32 srq_limit;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_create_srq_resp {
+	__u32 srq_handle;
+	__u32 max_wr;
+	__u32 max_sge;
+	__u32 reserved;
+};
+
+struct ib_uverbs_modify_srq {
+	__u32 srq_handle;
+	__u32 attr_mask;
+	__u32 max_wr;
+	__u32 srq_limit;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_query_srq {
+	__u64 response;
+	__u32 srq_handle;
+	__u32 reserved;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_query_srq_resp {
+	__u32 max_wr;
+	__u32 max_sge;
+	__u32 srq_limit;
+	__u32 reserved;
+};
+
+struct ib_uverbs_destroy_srq {
+	__u64 response;
+	__u32 srq_handle;
+	__u32 reserved;
+};
+
+struct ib_uverbs_destroy_srq_resp {
+	__u32 events_reported;
+};
+
+#endif /* IB_USER_VERBS_H */
diff --git a/sys/contrib/rdma/ib_verbs.h b/sys/contrib/rdma/ib_verbs.h
new file mode 100644
index 000000000000..5d98ef7e0e41
--- /dev/null
+++ b/sys/contrib/rdma/ib_verbs.h
@@ -0,0 +1,1854 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
+ * Copyright (c) 2004 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: ib_verbs.h 1349 2004-12-16 21:09:43Z roland $
+ *
+ * $FreeBSD$
+ */
+
+
+#if !defined(IB_VERBS_H)
+#define IB_VERBS_H
+
+#include <contrib/rdma/types.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+struct rdma_scatterlist {
+	void *page;
+	unsigned int length;
+	unsigned int offset; 
+};
+struct vm_object;
+
+union ib_gid {
+	u8	raw[16];
+	struct {
+		__be64	subnet_prefix;
+		__be64	interface_id;
+	} global;
+};
+
+enum rdma_node_type {
+	/* IB values map to NodeInfo:NodeType. */
+	RDMA_NODE_IB_CA 	= 1,
+	RDMA_NODE_IB_SWITCH,
+	RDMA_NODE_IB_ROUTER,
+	RDMA_NODE_RNIC
+};
+
+enum rdma_transport_type {
+	RDMA_TRANSPORT_IB,
+	RDMA_TRANSPORT_IWARP
+};
+
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type);
+
+enum ib_device_cap_flags {
+	IB_DEVICE_RESIZE_MAX_WR		= 1,
+	IB_DEVICE_BAD_PKEY_CNTR		= (1<<1),
+	IB_DEVICE_BAD_QKEY_CNTR		= (1<<2),
+	IB_DEVICE_RAW_MULTI		= (1<<3),
+	IB_DEVICE_AUTO_PATH_MIG		= (1<<4),
+	IB_DEVICE_CHANGE_PHY_PORT	= (1<<5),
+	IB_DEVICE_UD_AV_PORT_ENFORCE	= (1<<6),
+	IB_DEVICE_CURR_QP_STATE_MOD	= (1<<7),
+	IB_DEVICE_SHUTDOWN_PORT		= (1<<8),
+	IB_DEVICE_INIT_TYPE		= (1<<9),
+	IB_DEVICE_PORT_ACTIVE_EVENT	= (1<<10),
+	IB_DEVICE_SYS_IMAGE_GUID	= (1<<11),
+	IB_DEVICE_RC_RNR_NAK_GEN	= (1<<12),
+	IB_DEVICE_SRQ_RESIZE		= (1<<13),
+	IB_DEVICE_N_NOTIFY_CQ		= (1<<14),
+	IB_DEVICE_ZERO_STAG		= (1<<15),
+	IB_DEVICE_SEND_W_INV		= (1<<16),
+	IB_DEVICE_MEM_WINDOW		= (1<<17)
+};
+
+enum ib_atomic_cap {
+	IB_ATOMIC_NONE,
+	IB_ATOMIC_HCA,
+	IB_ATOMIC_GLOB
+};
+
+struct ib_device_attr {
+	u64			fw_ver;
+	__be64			sys_image_guid;
+	u64			max_mr_size;
+	u64			page_size_cap;
+	u32			vendor_id;
+	u32			vendor_part_id;
+	u32			hw_ver;
+	int			max_qp;
+	int			max_qp_wr;
+	int			device_cap_flags;
+	int			max_sge;
+	int			max_sge_rd;
+	int			max_cq;
+	int			max_cqe;
+	int			max_mr;
+	int			max_pd;
+	int			max_qp_rd_atom;
+	int			max_ee_rd_atom;
+	int			max_res_rd_atom;
+	int			max_qp_init_rd_atom;
+	int			max_ee_init_rd_atom;
+	enum ib_atomic_cap	atomic_cap;
+	int			max_ee;
+	int			max_rdd;
+	int			max_mw;
+	int			max_raw_ipv6_qp;
+	int			max_raw_ethy_qp;
+	int			max_mcast_grp;
+	int			max_mcast_qp_attach;
+	int			max_total_mcast_qp_attach;
+	int			max_ah;
+	int			max_fmr;
+	int			max_map_per_fmr;
+	int			max_srq;
+	int			max_srq_wr;
+	int			max_srq_sge;
+	u16			max_pkeys;
+	u8			local_ca_ack_delay;
+};
+
+enum ib_mtu {
+	IB_MTU_256  = 1,
+	IB_MTU_512  = 2,
+	IB_MTU_1024 = 3,
+	IB_MTU_2048 = 4,
+	IB_MTU_4096 = 5
+};
+
+static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
+{
+	switch (mtu) {
+	case IB_MTU_256:  return  256;
+	case IB_MTU_512:  return  512;
+	case IB_MTU_1024: return 1024;
+	case IB_MTU_2048: return 2048;
+	case IB_MTU_4096: return 4096;
+	default: 	  return -1;
+	}
+}
+
+enum ib_port_state {
+	IB_PORT_NOP		= 0,
+	IB_PORT_DOWN		= 1,
+	IB_PORT_INIT		= 2,
+	IB_PORT_ARMED		= 3,
+	IB_PORT_ACTIVE		= 4,
+	IB_PORT_ACTIVE_DEFER	= 5
+};
+
+enum ib_port_cap_flags {
+	IB_PORT_SM				= 1 <<  1,
+	IB_PORT_NOTICE_SUP			= 1 <<  2,
+	IB_PORT_TRAP_SUP			= 1 <<  3,
+	IB_PORT_OPT_IPD_SUP                     = 1 <<  4,
+	IB_PORT_AUTO_MIGR_SUP			= 1 <<  5,
+	IB_PORT_SL_MAP_SUP			= 1 <<  6,
+	IB_PORT_MKEY_NVRAM			= 1 <<  7,
+	IB_PORT_PKEY_NVRAM			= 1 <<  8,
+	IB_PORT_LED_INFO_SUP			= 1 <<  9,
+	IB_PORT_SM_DISABLED			= 1 << 10,
+	IB_PORT_SYS_IMAGE_GUID_SUP		= 1 << 11,
+	IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP	= 1 << 12,
+	IB_PORT_CM_SUP				= 1 << 16,
+	IB_PORT_SNMP_TUNNEL_SUP			= 1 << 17,
+	IB_PORT_REINIT_SUP			= 1 << 18,
+	IB_PORT_DEVICE_MGMT_SUP			= 1 << 19,
+	IB_PORT_VENDOR_CLASS_SUP		= 1 << 20,
+	IB_PORT_DR_NOTICE_SUP			= 1 << 21,
+	IB_PORT_CAP_MASK_NOTICE_SUP		= 1 << 22,
+	IB_PORT_BOOT_MGMT_SUP			= 1 << 23,
+	IB_PORT_LINK_LATENCY_SUP		= 1 << 24,
+	IB_PORT_CLIENT_REG_SUP			= 1 << 25
+};
+
+enum ib_port_width {
+	IB_WIDTH_1X	= 1,
+	IB_WIDTH_4X	= 2,
+	IB_WIDTH_8X	= 4,
+	IB_WIDTH_12X	= 8
+};
+
+static inline int ib_width_enum_to_int(enum ib_port_width width)
+{
+	switch (width) {
+	case IB_WIDTH_1X:  return  1;
+	case IB_WIDTH_4X:  return  4;
+	case IB_WIDTH_8X:  return  8;
+	case IB_WIDTH_12X: return 12;
+	default: 	  return -1;
+	}
+}
+
+struct ib_port_attr {
+	enum ib_port_state	state;
+	enum ib_mtu		max_mtu;
+	enum ib_mtu		active_mtu;
+	int			gid_tbl_len;
+	u32			port_cap_flags;
+	u32			max_msg_sz;
+	u32			bad_pkey_cntr;
+	u32			qkey_viol_cntr;
+	u16			pkey_tbl_len;
+	u16			lid;
+	u16			sm_lid;
+	u8			lmc;
+	u8			max_vl_num;
+	u8			sm_sl;
+	u8			subnet_timeout;
+	u8			init_type_reply;
+	u8			active_width;
+	u8			active_speed;
+	u8                      phys_state;
+};
+
+enum ib_device_modify_flags {
+	IB_DEVICE_MODIFY_SYS_IMAGE_GUID	= 1 << 0,
+	IB_DEVICE_MODIFY_NODE_DESC	= 1 << 1
+};
+
+struct ib_device_modify {
+	u64	sys_image_guid;
+	char	node_desc[64];
+};
+
+enum ib_port_modify_flags {
+	IB_PORT_SHUTDOWN		= 1,
+	IB_PORT_INIT_TYPE		= (1<<2),
+	IB_PORT_RESET_QKEY_CNTR		= (1<<3)
+};
+
+struct ib_port_modify {
+	u32	set_port_cap_mask;
+	u32	clr_port_cap_mask;
+	u8	init_type;
+};
+
+enum ib_event_type {
+	IB_EVENT_CQ_ERR,
+	IB_EVENT_QP_FATAL,
+	IB_EVENT_QP_REQ_ERR,
+	IB_EVENT_QP_ACCESS_ERR,
+	IB_EVENT_COMM_EST,
+	IB_EVENT_SQ_DRAINED,
+	IB_EVENT_PATH_MIG,
+	IB_EVENT_PATH_MIG_ERR,
+	IB_EVENT_DEVICE_FATAL,
+	IB_EVENT_PORT_ACTIVE,
+	IB_EVENT_PORT_ERR,
+	IB_EVENT_LID_CHANGE,
+	IB_EVENT_PKEY_CHANGE,
+	IB_EVENT_SM_CHANGE,
+	IB_EVENT_SRQ_ERR,
+	IB_EVENT_SRQ_LIMIT_REACHED,
+	IB_EVENT_QP_LAST_WQE_REACHED,
+	IB_EVENT_CLIENT_REREGISTER
+};
+
+enum dma_data_direction {
+        DMA_BIDIRECTIONAL = 0,
+        DMA_TO_DEVICE = 1,
+        DMA_FROM_DEVICE = 2,
+        DMA_NONE = 3,
+};
+
+struct ib_event {
+	struct ib_device	*device;
+	union {
+		struct ib_cq	*cq;
+		struct ib_qp	*qp;
+		struct ib_srq	*srq;
+		u8		port_num;
+	} element;
+	enum ib_event_type	event;
+};
+
+struct ib_event_handler {
+	struct ib_device *device;
+	void            (*handler)(struct ib_event_handler *, struct ib_event *);
+	TAILQ_ENTRY(ib_event_handler) list;
+};
+
+#define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler)		\
+	do {							\
+		(_ptr)->device  = _device;			\
+		(_ptr)->handler = _handler;			\
+	} while (0)
+
+struct ib_global_route {
+	union ib_gid	dgid;
+	u32		flow_label;
+	u8		sgid_index;
+	u8		hop_limit;
+	u8		traffic_class;
+};
+
+struct ib_grh {
+	__be32		version_tclass_flow;
+	__be16		paylen;
+	u8		next_hdr;
+	u8		hop_limit;
+	union ib_gid	sgid;
+	union ib_gid	dgid;
+};
+
+enum {
+	IB_MULTICAST_QPN = 0xffffff
+};
+
+#define IB_LID_PERMISSIVE	__constant_htons(0xFFFF)
+
+enum ib_ah_flags {
+	IB_AH_GRH	= 1
+};
+
+enum ib_rate {
+	IB_RATE_PORT_CURRENT = 0,
+	IB_RATE_2_5_GBPS = 2,
+	IB_RATE_5_GBPS   = 5,
+	IB_RATE_10_GBPS  = 3,
+	IB_RATE_20_GBPS  = 6,
+	IB_RATE_30_GBPS  = 4,
+	IB_RATE_40_GBPS  = 7,
+	IB_RATE_60_GBPS  = 8,
+	IB_RATE_80_GBPS  = 9,
+	IB_RATE_120_GBPS = 10
+};
+
+/**
+ * ib_rate_to_mult - Convert the IB rate enum to a multiple of the
+ * base rate of 2.5 Gbit/sec.  For example, IB_RATE_5_GBPS will be
+ * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec.
+ * @rate: rate to convert.
+ */
+int ib_rate_to_mult(enum ib_rate rate);
+
+/**
+ * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
+ * enum.
+ * @mult: multiple to convert.
+ */
+enum ib_rate mult_to_ib_rate(int mult);
+
+struct ib_ah_attr {
+	struct ib_global_route	grh;
+	u16			dlid;
+	u8			sl;
+	u8			src_path_bits;
+	u8			static_rate;
+	u8			ah_flags;
+	u8			port_num;
+};
+
+enum ib_wc_status {
+	IB_WC_SUCCESS,
+	IB_WC_LOC_LEN_ERR,
+	IB_WC_LOC_QP_OP_ERR,
+	IB_WC_LOC_EEC_OP_ERR,
+	IB_WC_LOC_PROT_ERR,
+	IB_WC_WR_FLUSH_ERR,
+	IB_WC_MW_BIND_ERR,
+	IB_WC_BAD_RESP_ERR,
+	IB_WC_LOC_ACCESS_ERR,
+	IB_WC_REM_INV_REQ_ERR,
+	IB_WC_REM_ACCESS_ERR,
+	IB_WC_REM_OP_ERR,
+	IB_WC_RETRY_EXC_ERR,
+	IB_WC_RNR_RETRY_EXC_ERR,
+	IB_WC_LOC_RDD_VIOL_ERR,
+	IB_WC_REM_INV_RD_REQ_ERR,
+	IB_WC_REM_ABORT_ERR,
+	IB_WC_INV_EECN_ERR,
+	IB_WC_INV_EEC_STATE_ERR,
+	IB_WC_FATAL_ERR,
+	IB_WC_RESP_TIMEOUT_ERR,
+	IB_WC_GENERAL_ERR
+};
+
+enum ib_wc_opcode {
+	IB_WC_SEND,
+	IB_WC_RDMA_WRITE,
+	IB_WC_RDMA_READ,
+	IB_WC_COMP_SWAP,
+	IB_WC_FETCH_ADD,
+	IB_WC_BIND_MW,
+/*
+ * Set value of IB_WC_RECV so consumers can test if a completion is a
+ * receive by testing (opcode & IB_WC_RECV).
+ */
+	IB_WC_RECV			= 1 << 7,
+	IB_WC_RECV_RDMA_WITH_IMM
+};
+
+enum ib_wc_flags {
+	IB_WC_GRH		= 1,
+	IB_WC_WITH_IMM		= (1<<1)
+};
+
+struct ib_wc {
+	u64			wr_id;
+	enum ib_wc_status	status;
+	enum ib_wc_opcode	opcode;
+	u32			vendor_err;
+	u32			byte_len;
+	struct ib_qp	       *qp;
+	__be32			imm_data;
+	u32			src_qp;
+	int			wc_flags;
+	u16			pkey_index;
+	u16			slid;
+	u8			sl;
+	u8			dlid_path_bits;
+	u8			port_num;	/* valid only for DR SMPs on switches */
+};
+
+enum ib_cq_notify_flags {
+	IB_CQ_SOLICITED			= 1 << 0,
+	IB_CQ_NEXT_COMP			= 1 << 1,
+	IB_CQ_SOLICITED_MASK		= IB_CQ_SOLICITED | IB_CQ_NEXT_COMP,
+	IB_CQ_REPORT_MISSED_EVENTS	= 1 << 2,
+};
+
+enum ib_srq_attr_mask {
+	IB_SRQ_MAX_WR	= 1 << 0,
+	IB_SRQ_LIMIT	= 1 << 1,
+};
+
+struct ib_srq_attr {
+	u32	max_wr;
+	u32	max_sge;
+	u32	srq_limit;
+};
+
+struct ib_srq_init_attr {
+	void		      (*event_handler)(struct ib_event *, void *);
+	void		       *srq_context;
+	struct ib_srq_attr	attr;
+};
+
+struct ib_qp_cap {
+	u32	max_send_wr;
+	u32	max_recv_wr;
+	u32	max_send_sge;
+	u32	max_recv_sge;
+	u32	max_inline_data;
+};
+
+enum ib_sig_type {
+	IB_SIGNAL_ALL_WR,
+	IB_SIGNAL_REQ_WR
+};
+
+enum ib_qp_type {
+	/*
+	 * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
+	 * here (and in that order) since the MAD layer uses them as
+	 * indices into a 2-entry table.
+	 */
+	IB_QPT_SMI,
+	IB_QPT_GSI,
+
+	IB_QPT_RC,
+	IB_QPT_UC,
+	IB_QPT_UD,
+	IB_QPT_RAW_IPV6,
+	IB_QPT_RAW_ETY
+};
+
+struct ib_qp_init_attr {
+	void                  (*event_handler)(struct ib_event *, void *);
+	void		       *qp_context;
+	struct ib_cq	       *send_cq;
+	struct ib_cq	       *recv_cq;
+	struct ib_srq	       *srq;
+	struct ib_qp_cap	cap;
+	enum ib_sig_type	sq_sig_type;
+	enum ib_qp_type		qp_type;
+	u8			port_num; /* special QP types only */
+};
+
+enum ib_rnr_timeout {
+	IB_RNR_TIMER_655_36 =  0,
+	IB_RNR_TIMER_000_01 =  1,
+	IB_RNR_TIMER_000_02 =  2,
+	IB_RNR_TIMER_000_03 =  3,
+	IB_RNR_TIMER_000_04 =  4,
+	IB_RNR_TIMER_000_06 =  5,
+	IB_RNR_TIMER_000_08 =  6,
+	IB_RNR_TIMER_000_12 =  7,
+	IB_RNR_TIMER_000_16 =  8,
+	IB_RNR_TIMER_000_24 =  9,
+	IB_RNR_TIMER_000_32 = 10,
+	IB_RNR_TIMER_000_48 = 11,
+	IB_RNR_TIMER_000_64 = 12,
+	IB_RNR_TIMER_000_96 = 13,
+	IB_RNR_TIMER_001_28 = 14,
+	IB_RNR_TIMER_001_92 = 15,
+	IB_RNR_TIMER_002_56 = 16,
+	IB_RNR_TIMER_003_84 = 17,
+	IB_RNR_TIMER_005_12 = 18,
+	IB_RNR_TIMER_007_68 = 19,
+	IB_RNR_TIMER_010_24 = 20,
+	IB_RNR_TIMER_015_36 = 21,
+	IB_RNR_TIMER_020_48 = 22,
+	IB_RNR_TIMER_030_72 = 23,
+	IB_RNR_TIMER_040_96 = 24,
+	IB_RNR_TIMER_061_44 = 25,
+	IB_RNR_TIMER_081_92 = 26,
+	IB_RNR_TIMER_122_88 = 27,
+	IB_RNR_TIMER_163_84 = 28,
+	IB_RNR_TIMER_245_76 = 29,
+	IB_RNR_TIMER_327_68 = 30,
+	IB_RNR_TIMER_491_52 = 31
+};
+
+enum ib_qp_attr_mask {
+	IB_QP_STATE			= 1,
+	IB_QP_CUR_STATE			= (1<<1),
+	IB_QP_EN_SQD_ASYNC_NOTIFY	= (1<<2),
+	IB_QP_ACCESS_FLAGS		= (1<<3),
+	IB_QP_PKEY_INDEX		= (1<<4),
+	IB_QP_PORT			= (1<<5),
+	IB_QP_QKEY			= (1<<6),
+	IB_QP_AV			= (1<<7),
+	IB_QP_PATH_MTU			= (1<<8),
+	IB_QP_TIMEOUT			= (1<<9),
+	IB_QP_RETRY_CNT			= (1<<10),
+	IB_QP_RNR_RETRY			= (1<<11),
+	IB_QP_RQ_PSN			= (1<<12),
+	IB_QP_MAX_QP_RD_ATOMIC		= (1<<13),
+	IB_QP_ALT_PATH			= (1<<14),
+	IB_QP_MIN_RNR_TIMER		= (1<<15),
+	IB_QP_SQ_PSN			= (1<<16),
+	IB_QP_MAX_DEST_RD_ATOMIC	= (1<<17),
+	IB_QP_PATH_MIG_STATE		= (1<<18),
+	IB_QP_CAP			= (1<<19),
+	IB_QP_DEST_QPN			= (1<<20)
+};
+
+enum ib_qp_state {
+	IB_QPS_RESET,
+	IB_QPS_INIT,
+	IB_QPS_RTR,
+	IB_QPS_RTS,
+	IB_QPS_SQD,
+	IB_QPS_SQE,
+	IB_QPS_ERR
+};
+
+enum ib_mig_state {
+	IB_MIG_MIGRATED,
+	IB_MIG_REARM,
+	IB_MIG_ARMED
+};
+
+struct ib_qp_attr {
+	enum ib_qp_state	qp_state;
+	enum ib_qp_state	cur_qp_state;
+	enum ib_mtu		path_mtu;
+	enum ib_mig_state	path_mig_state;
+	u32			qkey;
+	u32			rq_psn;
+	u32			sq_psn;
+	u32			dest_qp_num;
+	int			qp_access_flags;
+	struct ib_qp_cap	cap;
+	struct ib_ah_attr	ah_attr;
+	struct ib_ah_attr	alt_ah_attr;
+	u16			pkey_index;
+	u16			alt_pkey_index;
+	u8			en_sqd_async_notify;
+	u8			sq_draining;
+	u8			max_rd_atomic;
+	u8			max_dest_rd_atomic;
+	u8			min_rnr_timer;
+	u8			port_num;
+	u8			timeout;
+	u8			retry_cnt;
+	u8			rnr_retry;
+	u8			alt_port_num;
+	u8			alt_timeout;
+};
+
+enum ib_wr_opcode {
+	IB_WR_RDMA_WRITE,
+	IB_WR_RDMA_WRITE_WITH_IMM,
+	IB_WR_SEND,
+	IB_WR_SEND_WITH_IMM,
+	IB_WR_RDMA_READ,
+	IB_WR_ATOMIC_CMP_AND_SWP,
+	IB_WR_ATOMIC_FETCH_AND_ADD
+};
+
+enum ib_send_flags {
+	IB_SEND_FENCE		= 1,
+	IB_SEND_SIGNALED	= (1<<1),
+	IB_SEND_SOLICITED	= (1<<2),
+	IB_SEND_INLINE		= (1<<3)
+};
+
+struct ib_sge {
+	u64	addr;
+	u32	length;
+	u32	lkey;
+};
+
+struct ib_send_wr {
+	struct ib_send_wr      *next;
+	u64			wr_id;
+	struct ib_sge	       *sg_list;
+	int			num_sge;
+	enum ib_wr_opcode	opcode;
+	int			send_flags;
+	__be32			imm_data;
+	union {
+		struct {
+			u64	remote_addr;
+			u32	rkey;
+		} rdma;
+		struct {
+			u64	remote_addr;
+			u64	compare_add;
+			u64	swap;
+			u32	rkey;
+		} atomic;
+		struct {
+			struct ib_ah *ah;
+			u32	remote_qpn;
+			u32	remote_qkey;
+			u16	pkey_index; /* valid for GSI only */
+			u8	port_num;   /* valid for DR SMPs on switch only */
+		} ud;
+	} wr;
+};
+
+struct ib_recv_wr {
+	struct ib_recv_wr      *next;
+	u64			wr_id;
+	struct ib_sge	       *sg_list;
+	int			num_sge;
+};
+
+enum ib_access_flags {
+	IB_ACCESS_LOCAL_WRITE	= 1,
+	IB_ACCESS_REMOTE_WRITE	= (1<<1),
+	IB_ACCESS_REMOTE_READ	= (1<<2),
+	IB_ACCESS_REMOTE_ATOMIC	= (1<<3),
+	IB_ACCESS_MW_BIND	= (1<<4)
+};
+
+struct ib_phys_buf {
+	u64      addr;
+	u64      size;
+};
+
+struct ib_mr_attr {
+	struct ib_pd	*pd;
+	u64		device_virt_addr;
+	u64		size;
+	int		mr_access_flags;
+	u32		lkey;
+	u32		rkey;
+};
+
+enum ib_mr_rereg_flags {
+	IB_MR_REREG_TRANS	= 1,
+	IB_MR_REREG_PD		= (1<<1),
+	IB_MR_REREG_ACCESS	= (1<<2)
+};
+
+struct ib_mw_bind {
+	struct ib_mr   *mr;
+	u64		wr_id;
+	u64		addr;
+	u32		length;
+	int		send_flags;
+	int		mw_access_flags;
+};
+
+struct ib_fmr_attr {
+	int	max_pages;
+	int	max_maps;
+	u8	page_shift;
+};
+
+/*
+ * XXX can this really be on 7 different lists at once?
+ *
+ */
+struct ib_ucontext {
+	struct ib_device       *device;
+	TAILQ_ENTRY(ib_ucontext)	pd_list;
+	TAILQ_ENTRY(ib_ucontext)	mr_list;
+	TAILQ_ENTRY(ib_ucontext)	mw_list;
+	TAILQ_ENTRY(ib_ucontext)	cq_list;
+	TAILQ_ENTRY(ib_ucontext)	qp_list;
+	TAILQ_ENTRY(ib_ucontext)	srq_list;
+	TAILQ_ENTRY(ib_ucontext)	ah_list;
+	int			closing;
+};
+
+struct ib_uobject {
+	u64			user_handle;	/* handle given to us by userspace */
+	struct ib_ucontext     *context;	/* associated user context */
+	void		       *object;		/* containing object */
+	TAILQ_ENTRY(ib_uobject)	entry;		/* link to context's list */
+	u32			id;		/* index into kernel idr */
+	volatile uint32_t	ref;
+	struct mtx	        lock;		/* protects .live */
+	int			live;
+};
+
+struct ib_udata {
+	void	*inbuf;
+	void	*outbuf;
+	size_t	inlen;
+	size_t	outlen;
+};
+
+#define IB_UMEM_MAX_PAGE_CHUNK						\
+	((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) /	\
+	 ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] -	\
+	  (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
+
+struct ib_pd {
+	struct ib_device       *device;
+	struct ib_uobject      *uobject;
+	volatile int      	usecnt; /* count all resources */
+};
+
+struct ib_ah {
+	struct ib_device	*device;
+	struct ib_pd		*pd;
+	struct ib_uobject	*uobject;
+};
+
+typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
+
+struct ib_cq {
+	struct ib_device       *device;
+	struct ib_uobject      *uobject;
+	ib_comp_handler   	comp_handler;
+	void                  (*event_handler)(struct ib_event *, void *);
+	void *            	cq_context;
+	int               	cqe;
+	volatile int          	usecnt; /* count number of work queues */
+};
+
+struct ib_srq {
+	struct ib_device       *device;
+	struct ib_pd	       *pd;
+	struct ib_uobject      *uobject;
+	void		      (*event_handler)(struct ib_event *, void *);
+	void		       *srq_context;
+	volatile int		usecnt;
+};
+
+struct ib_qp {
+	struct ib_device       *device;
+	struct ib_pd	       *pd;
+	struct ib_cq	       *send_cq;
+	struct ib_cq	       *recv_cq;
+	struct ib_srq	       *srq;
+	struct ib_uobject      *uobject;
+	void                  (*event_handler)(struct ib_event *, void *);
+	void		       *qp_context;
+	u32			qp_num;
+	enum ib_qp_type		qp_type;
+};
+
+struct ib_mr {
+	struct ib_device  *device;
+	struct ib_pd	  *pd;
+	struct ib_uobject *uobject;
+	u32		   lkey;
+	u32		   rkey;
+	volatile int	   usecnt; /* count number of MWs */
+};
+
+struct ib_mw {
+	struct ib_device	*device;
+	struct ib_pd		*pd;
+	struct ib_uobject	*uobject;
+	u32			rkey;
+};
+
+
+struct ib_fmr {
+	struct ib_device	*device;
+	struct ib_pd		*pd;
+	TAILQ_ENTRY(ib_fmr)	entry;
+	u32			lkey;
+	u32			rkey;
+};
+
+TAILQ_HEAD(ib_fmr_list_head, ib_fmr);
+
+struct ib_mad;
+struct ib_grh;
+
+enum ib_process_mad_flags {
+	IB_MAD_IGNORE_MKEY	= 1,
+	IB_MAD_IGNORE_BKEY	= 2,
+	IB_MAD_IGNORE_ALL	= IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY
+};
+
+enum ib_mad_result {
+	IB_MAD_RESULT_FAILURE  = 0,      /* (!SUCCESS is the important flag) */
+	IB_MAD_RESULT_SUCCESS  = 1 << 0, /* MAD was successfully processed   */
+	IB_MAD_RESULT_REPLY    = 1 << 1, /* Reply packet needs to be sent    */
+	IB_MAD_RESULT_CONSUMED = 1 << 2  /* Packet consumed: stop processing */
+};
+
+#define IB_DEVICE_NAME_MAX 64
+
+struct ib_cache {
+	struct mtx              lock;
+	struct ib_event_handler event_handler;
+	struct ib_pkey_cache  **pkey_cache;
+	struct ib_gid_cache   **gid_cache;
+	u8                     *lmc_cache;
+};
+
+struct ib_dma_mapping_ops {
+	int		(*mapping_error)(struct ib_device *dev,
+					 u64 dma_addr);
+	u64		(*map_single)(struct ib_device *dev,
+				      void *ptr, size_t size,
+				      enum dma_data_direction direction);
+	void		(*unmap_single)(struct ib_device *dev,
+					u64 addr, size_t size,
+					enum dma_data_direction direction);
+	u64		(*map_page)(struct ib_device *dev,
+				    void *page, unsigned long offset,
+				    size_t size,
+				    enum dma_data_direction direction);
+	void		(*unmap_page)(struct ib_device *dev,
+				      u64 addr, size_t size,
+				      enum dma_data_direction direction);
+	int		(*map_sg)(struct ib_device *dev,
+				  struct rdma_scatterlist *sg, int nents,
+				  enum dma_data_direction direction);
+	void		(*unmap_sg)(struct ib_device *dev,
+				    struct rdma_scatterlist *sg, int nents,
+				    enum dma_data_direction direction);
+	u64		(*dma_address)(struct ib_device *dev,
+				       struct rdma_scatterlist *sg);
+	unsigned int	(*dma_len)(struct ib_device *dev,
+				   struct rdma_scatterlist *sg);
+	void		(*sync_single_for_cpu)(struct ib_device *dev,
+					       u64 dma_handle,
+					       size_t size,
+				               enum dma_data_direction dir);
+	void		(*sync_single_for_device)(struct ib_device *dev,
+						  u64 dma_handle,
+						  size_t size,
+						  enum dma_data_direction dir);
+	void		*(*alloc_coherent)(struct ib_device *dev,
+					   size_t size,
+					   u64 *dma_handle,
+					   int flag);
+	void		(*free_coherent)(struct ib_device *dev,
+					 size_t size, void *cpu_addr,
+					 u64 dma_handle);
+};
+
+struct iw_cm_verbs;
+
+struct ib_device {
+	struct device                *dma_device;
+
+	char                          name[IB_DEVICE_NAME_MAX];
+
+	TAILQ_HEAD(, ib_event_handler) event_handler_list;
+	struct mtx                    event_handler_lock;
+
+        TAILQ_ENTRY(ib_device)        core_list;
+        TAILQ_HEAD(, ib_client_data)  client_data_list;
+	struct mtx                    client_data_lock;
+
+	struct ib_cache               cache;
+	int                          *pkey_tbl_len;
+	int                          *gid_tbl_len;
+
+	u32                           flags;
+
+	int			      num_comp_vectors;
+
+	struct iw_cm_verbs	     *iwcm;
+
+	int		           (*query_device)(struct ib_device *device,
+						   struct ib_device_attr *device_attr);
+	int		           (*query_port)(struct ib_device *device,
+						 u8 port_num,
+						 struct ib_port_attr *port_attr);
+	int		           (*query_gid)(struct ib_device *device,
+						u8 port_num, int index,
+						union ib_gid *gid);
+	int		           (*query_pkey)(struct ib_device *device,
+						 u8 port_num, u16 index, u16 *pkey);
+	int		           (*modify_device)(struct ib_device *device,
+						    int device_modify_mask,
+						    struct ib_device_modify *device_modify);
+	int		           (*modify_port)(struct ib_device *device,
+						  u8 port_num, int port_modify_mask,
+						  struct ib_port_modify *port_modify);
+	struct ib_ucontext *       (*alloc_ucontext)(struct ib_device *device,
+						     struct ib_udata *udata);
+	int                        (*dealloc_ucontext)(struct ib_ucontext *context);
+	int                        (*mmap)(struct ib_ucontext *context,
+					   struct vm_object *vma);
+	struct ib_pd *             (*alloc_pd)(struct ib_device *device,
+					       struct ib_ucontext *context,
+					       struct ib_udata *udata);
+	int                        (*dealloc_pd)(struct ib_pd *pd);
+	struct ib_ah *             (*create_ah)(struct ib_pd *pd,
+						struct ib_ah_attr *ah_attr);
+	int                        (*modify_ah)(struct ib_ah *ah,
+						struct ib_ah_attr *ah_attr);
+	int                        (*query_ah)(struct ib_ah *ah,
+					       struct ib_ah_attr *ah_attr);
+	int                        (*destroy_ah)(struct ib_ah *ah);
+	struct ib_srq *            (*create_srq)(struct ib_pd *pd,
+						 struct ib_srq_init_attr *srq_init_attr,
+						 struct ib_udata *udata);
+	int                        (*modify_srq)(struct ib_srq *srq,
+						 struct ib_srq_attr *srq_attr,
+						 enum ib_srq_attr_mask srq_attr_mask,
+						 struct ib_udata *udata);
+	int                        (*query_srq)(struct ib_srq *srq,
+						struct ib_srq_attr *srq_attr);
+	int                        (*destroy_srq)(struct ib_srq *srq);
+	int                        (*post_srq_recv)(struct ib_srq *srq,
+						    struct ib_recv_wr *recv_wr,
+						    struct ib_recv_wr **bad_recv_wr);
+	struct ib_qp *             (*create_qp)(struct ib_pd *pd,
+						struct ib_qp_init_attr *qp_init_attr,
+						struct ib_udata *udata);
+	int                        (*modify_qp)(struct ib_qp *qp,
+						struct ib_qp_attr *qp_attr,
+						int qp_attr_mask,
+						struct ib_udata *udata);
+	int                        (*query_qp)(struct ib_qp *qp,
+					       struct ib_qp_attr *qp_attr,
+					       int qp_attr_mask,
+					       struct ib_qp_init_attr *qp_init_attr);
+	int                        (*destroy_qp)(struct ib_qp *qp);
+	int                        (*post_send)(struct ib_qp *qp,
+						struct ib_send_wr *send_wr,
+						struct ib_send_wr **bad_send_wr);
+	int                        (*post_recv)(struct ib_qp *qp,
+						struct ib_recv_wr *recv_wr,
+						struct ib_recv_wr **bad_recv_wr);
+	struct ib_cq *             (*create_cq)(struct ib_device *device, int cqe,
+						int comp_vector,
+						struct ib_ucontext *context,
+						struct ib_udata *udata);
+	int                        (*destroy_cq)(struct ib_cq *cq);
+	int                        (*resize_cq)(struct ib_cq *cq, int cqe,
+						struct ib_udata *udata);
+	int                        (*poll_cq)(struct ib_cq *cq, int num_entries,
+					      struct ib_wc *wc);
+	int                        (*peek_cq)(struct ib_cq *cq, int wc_cnt);
+	int                        (*req_notify_cq)(struct ib_cq *cq,
+						    enum ib_cq_notify_flags flags);
+	int                        (*req_ncomp_notif)(struct ib_cq *cq,
+						      int wc_cnt);
+	struct ib_mr *             (*get_dma_mr)(struct ib_pd *pd,
+						 int mr_access_flags);
+	struct ib_mr *             (*reg_phys_mr)(struct ib_pd *pd,
+						  struct ib_phys_buf *phys_buf_array,
+						  int num_phys_buf,
+						  int mr_access_flags,
+						  u64 *iova_start);
+	struct ib_mr *             (*reg_user_mr)(struct ib_pd *pd,
+						  u64 start, u64 length,
+						  u64 virt_addr,
+						  int mr_access_flags,
+						  struct ib_udata *udata);
+	int                        (*query_mr)(struct ib_mr *mr,
+					       struct ib_mr_attr *mr_attr);
+	int                        (*dereg_mr)(struct ib_mr *mr);
+	int                        (*rereg_phys_mr)(struct ib_mr *mr,
+						    int mr_rereg_mask,
+						    struct ib_pd *pd,
+						    struct ib_phys_buf *phys_buf_array,
+						    int num_phys_buf,
+						    int mr_access_flags,
+						    u64 *iova_start);
+	struct ib_mw *             (*alloc_mw)(struct ib_pd *pd);
+	int                        (*bind_mw)(struct ib_qp *qp,
+					      struct ib_mw *mw,
+					      struct ib_mw_bind *mw_bind);
+	int                        (*dealloc_mw)(struct ib_mw *mw);
+	struct ib_fmr *	           (*alloc_fmr)(struct ib_pd *pd,
+						int mr_access_flags,
+						struct ib_fmr_attr *fmr_attr);
+	int		           (*map_phys_fmr)(struct ib_fmr *fmr,
+						   u64 *page_list, int list_len,
+						   u64 iova);
+	int		           (*unmap_fmr)(struct ib_fmr_list_head *fmr_list);
+	int		           (*dealloc_fmr)(struct ib_fmr *fmr);
+	int                        (*attach_mcast)(struct ib_qp *qp,
+						   union ib_gid *gid,
+						   u16 lid);
+	int                        (*detach_mcast)(struct ib_qp *qp,
+						   union ib_gid *gid,
+						   u16 lid);
+	int                        (*process_mad)(struct ib_device *device,
+						  int process_mad_flags,
+						  u8 port_num,
+						  struct ib_wc *in_wc,
+						  struct ib_grh *in_grh,
+						  struct ib_mad *in_mad,
+						  struct ib_mad *out_mad);
+
+	struct ib_dma_mapping_ops   *dma_ops;
+
+	struct module               *owner;
+#ifdef notyet	
+	struct class_device          class_dev;
+	struct kobject               ports_parent;
+	struct list_head             port_list;
+#endif
+	enum {
+		IB_DEV_UNINITIALIZED,
+		IB_DEV_REGISTERED,
+		IB_DEV_UNREGISTERED
+	}                            reg_state;
+
+	u64			     uverbs_cmd_mask;
+	int			     uverbs_abi_ver;
+
+	char			     node_desc[64];
+	__be64			     node_guid;
+	u8                           node_type;
+	u8                           phys_port_cnt;
+};
+
+struct ib_client {
+	char  *name;
+	void (*add)   (struct ib_device *);
+	void (*remove)(struct ib_device *);
+	TAILQ_ENTRY(ib_client) list;
+};
+
+struct ib_device *ib_alloc_device(size_t size);
+void ib_dealloc_device(struct ib_device *device);
+
+int ib_register_device   (struct ib_device *device);
+void ib_unregister_device(struct ib_device *device);
+
+int ib_register_client   (struct ib_client *client);
+void ib_unregister_client(struct ib_client *client);
+
+void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
+void  ib_set_client_data(struct ib_device *device, struct ib_client *client,
+			 void *data);
+
+static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
+{
+	return copyin(udata->inbuf, dest, len);
+}
+
+static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
+{
+	return copyout(src, udata->outbuf, len);
+}
+
+/**
+ * ib_modify_qp_is_ok - Check that the supplied attribute mask
+ * contains all required attributes and no attributes not allowed for
+ * the given QP state transition.
+ * @cur_state: Current QP state
+ * @next_state: Next QP state
+ * @type: QP type
+ * @mask: Mask of supplied QP attributes
+ *
+ * This function is a helper function that a low-level driver's
+ * modify_qp method can use to validate the consumer's input.  It
+ * checks that cur_state and next_state are valid QP states, that a
+ * transition from cur_state to next_state is allowed by the IB spec,
+ * and that the attribute mask supplied is allowed for the transition.
+ */
+int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
+		       enum ib_qp_type type, enum ib_qp_attr_mask mask);
+
+int ib_register_event_handler  (struct ib_event_handler *event_handler);
+int ib_unregister_event_handler(struct ib_event_handler *event_handler);
+void ib_dispatch_event(struct ib_event *event);
+
+int ib_query_device(struct ib_device *device,
+		    struct ib_device_attr *device_attr);
+
+int ib_query_port(struct ib_device *device,
+		  u8 port_num, struct ib_port_attr *port_attr);
+
+int ib_query_gid(struct ib_device *device,
+		 u8 port_num, int index, union ib_gid *gid);
+
+int ib_query_pkey(struct ib_device *device,
+		  u8 port_num, u16 index, u16 *pkey);
+
+int ib_modify_device(struct ib_device *device,
+		     int device_modify_mask,
+		     struct ib_device_modify *device_modify);
+
+int ib_modify_port(struct ib_device *device,
+		   u8 port_num, int port_modify_mask,
+		   struct ib_port_modify *port_modify);
+
+int ib_find_gid(struct ib_device *device, union ib_gid *gid,
+		u8 *port_num, u16 *index);
+
+int ib_find_pkey(struct ib_device *device,
+		 u8 port_num, u16 pkey, u16 *index);
+
+/**
+ * ib_alloc_pd - Allocates an unused protection domain.
+ * @device: The device on which to allocate the protection domain.
+ *
+ * A protection domain object provides an association between QPs, shared
+ * receive queues, address handles, memory regions, and memory windows.
+ */
+struct ib_pd *ib_alloc_pd(struct ib_device *device);
+
+/**
+ * ib_dealloc_pd - Deallocates a protection domain.
+ * @pd: The protection domain to deallocate.
+ */
+int ib_dealloc_pd(struct ib_pd *pd);
+
+/**
+ * ib_create_ah - Creates an address handle for the given address vector.
+ * @pd: The protection domain associated with the address handle.
+ * @ah_attr: The attributes of the address vector.
+ *
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
+struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+
+/**
+ * ib_init_ah_from_wc - Initializes address handle attributes from a
+ *   work completion.
+ * @device: Device on which the received message arrived.
+ * @port_num: Port on which the received message arrived.
+ * @wc: Work completion associated with the received message.
+ * @grh: References the received global route header.  This parameter is
+ *   ignored unless the work completion indicates that the GRH is valid.
+ * @ah_attr: Returned attributes that can be used when creating an address
+ *   handle for replying to the message.
+ */
+int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
+		       struct ib_grh *grh, struct ib_ah_attr *ah_attr);
+
+/**
+ * ib_create_ah_from_wc - Creates an address handle associated with the
+ *   sender of the specified work completion.
+ * @pd: The protection domain associated with the address handle.
+ * @wc: Work completion information associated with a received message.
+ * @grh: References the received global route header.  This parameter is
+ *   ignored unless the work completion indicates that the GRH is valid.
+ * @port_num: The outbound port number to associate with the address.
+ *
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
+struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
+				   struct ib_grh *grh, u8 port_num);
+
+/**
+ * ib_modify_ah - Modifies the address vector associated with an address
+ *   handle.
+ * @ah: The address handle to modify.
+ * @ah_attr: The new address vector attributes to associate with the
+ *   address handle.
+ */
+int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+
+/**
+ * ib_query_ah - Queries the address vector associated with an address
+ *   handle.
+ * @ah: The address handle to query.
+ * @ah_attr: The address vector attributes associated with the address
+ *   handle.
+ */
+int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+
+/**
+ * ib_destroy_ah - Destroys an address handle.
+ * @ah: The address handle to destroy.
+ */
+int ib_destroy_ah(struct ib_ah *ah);
+
+/**
+ * ib_create_srq - Creates a SRQ associated with the specified protection
+ *   domain.
+ * @pd: The protection domain associated with the SRQ.
+ * @srq_init_attr: A list of initial attributes required to create the
+ *   SRQ.  If SRQ creation succeeds, then the attributes are updated to
+ *   the actual capabilities of the created SRQ.
+ *
+ * srq_attr->max_wr and srq_attr->max_sge are read the determine the
+ * requested size of the SRQ, and set to the actual values allocated
+ * on return.  If ib_create_srq() succeeds, then max_wr and max_sge
+ * will always be at least as large as the requested values.
+ */
+struct ib_srq *ib_create_srq(struct ib_pd *pd,
+			     struct ib_srq_init_attr *srq_init_attr);
+
+/**
+ * ib_modify_srq - Modifies the attributes for the specified SRQ.
+ * @srq: The SRQ to modify.
+ * @srq_attr: On input, specifies the SRQ attributes to modify.  On output,
+ *   the current values of selected SRQ attributes are returned.
+ * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ
+ *   are being modified.
+ *
+ * The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or
+ * IB_SRQ_LIMIT to set the SRQ's limit and request notification when
+ * the number of receives queued drops below the limit.
+ */
+int ib_modify_srq(struct ib_srq *srq,
+		  struct ib_srq_attr *srq_attr,
+		  enum ib_srq_attr_mask srq_attr_mask);
+
+/**
+ * ib_query_srq - Returns the attribute list and current values for the
+ *   specified SRQ.
+ * @srq: The SRQ to query.
+ * @srq_attr: The attributes of the specified SRQ.
+ */
+int ib_query_srq(struct ib_srq *srq,
+		 struct ib_srq_attr *srq_attr);
+
+/**
+ * ib_destroy_srq - Destroys the specified SRQ.
+ * @srq: The SRQ to destroy.
+ */
+int ib_destroy_srq(struct ib_srq *srq);
+
+/**
+ * ib_post_srq_recv - Posts a list of work requests to the specified SRQ.
+ * @srq: The SRQ to post the work request on.
+ * @recv_wr: A list of work requests to post on the receive queue.
+ * @bad_recv_wr: On an immediate failure, this parameter will reference
+ *   the work request that failed to be posted on the QP.
+ */
+static inline int ib_post_srq_recv(struct ib_srq *srq,
+				   struct ib_recv_wr *recv_wr,
+				   struct ib_recv_wr **bad_recv_wr)
+{
+	return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr);
+}
+
+/**
+ * ib_create_qp - Creates a QP associated with the specified protection
+ *   domain.
+ * @pd: The protection domain associated with the QP.
+ * @qp_init_attr: A list of initial attributes required to create the
+ *   QP.  If QP creation succeeds, then the attributes are updated to
+ *   the actual capabilities of the created QP.
+ */
+struct ib_qp *ib_create_qp(struct ib_pd *pd,
+			   struct ib_qp_init_attr *qp_init_attr);
+
+/**
+ * ib_modify_qp - Modifies the attributes for the specified QP and then
+ *   transitions the QP to the given state.
+ * @qp: The QP to modify.
+ * @qp_attr: On input, specifies the QP attributes to modify.  On output,
+ *   the current values of selected QP attributes are returned.
+ * @qp_attr_mask: A bit-mask used to specify which attributes of the QP
+ *   are being modified.
+ */
+int ib_modify_qp(struct ib_qp *qp,
+		 struct ib_qp_attr *qp_attr,
+		 int qp_attr_mask);
+
+/**
+ * ib_query_qp - Returns the attribute list and current values for the
+ *   specified QP.
+ * @qp: The QP to query.
+ * @qp_attr: The attributes of the specified QP.
+ * @qp_attr_mask: A bit-mask used to select specific attributes to query.
+ * @qp_init_attr: Additional attributes of the selected QP.
+ *
+ * The qp_attr_mask may be used to limit the query to gathering only the
+ * selected attributes.
+ */
+int ib_query_qp(struct ib_qp *qp,
+		struct ib_qp_attr *qp_attr,
+		int qp_attr_mask,
+		struct ib_qp_init_attr *qp_init_attr);
+
+/**
+ * ib_destroy_qp - Destroys the specified QP.
+ * @qp: The QP to destroy.
+ */
+int ib_destroy_qp(struct ib_qp *qp);
+
+/**
+ * ib_post_send - Posts a list of work requests to the send queue of
+ *   the specified QP.
+ * @qp: The QP to post the work request on.
+ * @send_wr: A list of work requests to post on the send queue.
+ * @bad_send_wr: On an immediate failure, this parameter will reference
+ *   the work request that failed to be posted on the QP.
+ */
+static inline int ib_post_send(struct ib_qp *qp,
+			       struct ib_send_wr *send_wr,
+			       struct ib_send_wr **bad_send_wr)
+{
+	return qp->device->post_send(qp, send_wr, bad_send_wr);
+}
+
+/**
+ * ib_post_recv - Posts a list of work requests to the receive queue of
+ *   the specified QP.
+ * @qp: The QP to post the work request on.
+ * @recv_wr: A list of work requests to post on the receive queue.
+ * @bad_recv_wr: On an immediate failure, this parameter will reference
+ *   the work request that failed to be posted on the QP.
+ */
+static inline int ib_post_recv(struct ib_qp *qp,
+			       struct ib_recv_wr *recv_wr,
+			       struct ib_recv_wr **bad_recv_wr)
+{
+	return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
+}
+
+/**
+ * ib_create_cq - Creates a CQ on the specified device.
+ * @device: The device on which to create the CQ.
+ * @comp_handler: A user-specified callback that is invoked when a
+ *   completion event occurs on the CQ.
+ * @event_handler: A user-specified callback that is invoked when an
+ *   asynchronous event not associated with a completion occurs on the CQ.
+ * @cq_context: Context associated with the CQ returned to the user via
+ *   the associated completion and event handlers.
+ * @cqe: The minimum size of the CQ.
+ * @comp_vector - Completion vector used to signal completion events.
+ *     Must be >= 0 and < context->num_comp_vectors.
+ *
+ * Users can examine the cq structure to determine the actual CQ size.
+ */
+struct ib_cq *ib_create_cq(struct ib_device *device,
+			   ib_comp_handler comp_handler,
+			   void (*event_handler)(struct ib_event *, void *),
+			   void *cq_context, int cqe, int comp_vector);
+
+/**
+ * ib_resize_cq - Modifies the capacity of the CQ.
+ * @cq: The CQ to resize.
+ * @cqe: The minimum size of the CQ.
+ *
+ * Users can examine the cq structure to determine the actual CQ size.
+ */
+int ib_resize_cq(struct ib_cq *cq, int cqe);
+
+/**
+ * ib_destroy_cq - Destroys the specified CQ.
+ * @cq: The CQ to destroy.
+ */
+int ib_destroy_cq(struct ib_cq *cq);
+
+/**
+ * ib_poll_cq - poll a CQ for completion(s)
+ * @cq:the CQ being polled
+ * @num_entries:maximum number of completions to return
+ * @wc:array of at least @num_entries &struct ib_wc where completions
+ *   will be returned
+ *
+ * Poll a CQ for (possibly multiple) completions.  If the return value
+ * is < 0, an error occurred.  If the return value is >= 0, it is the
+ * number of completions returned.  If the return value is
+ * non-negative and < num_entries, then the CQ was emptied.
+ */
+static inline int ib_poll_cq(struct ib_cq *cq, int num_entries,
+			     struct ib_wc *wc)
+{
+	return cq->device->poll_cq(cq, num_entries, wc);
+}
+
+/**
+ * ib_peek_cq - Returns the number of unreaped completions currently
+ *   on the specified CQ.
+ * @cq: The CQ to peek.
+ * @wc_cnt: A minimum number of unreaped completions to check for.
+ *
+ * If the number of unreaped completions is greater than or equal to wc_cnt,
+ * this function returns wc_cnt, otherwise, it returns the actual number of
+ * unreaped completions.
+ */
+int ib_peek_cq(struct ib_cq *cq, int wc_cnt);
+
+/**
+ * ib_req_notify_cq - Request completion notification on a CQ.
+ * @cq: The CQ to generate an event for.
+ * @flags:
+ *   Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP
+ *   to request an event on the next solicited event or next work
+ *   completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS
+ *   may also be |ed in to request a hint about missed events, as
+ *   described below.
+ *
+ * Return Value:
+ *    < 0 means an error occurred while requesting notification
+ *   == 0 means notification was requested successfully, and if
+ *        IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events
+ *        were missed and it is safe to wait for another event.  In
+ *        this case is it guaranteed that any work completions added
+ *        to the CQ since the last CQ poll will trigger a completion
+ *        notification event.
+ *    > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed
+ *        in.  It means that the consumer must poll the CQ again to
+ *        make sure it is empty to avoid missing an event because of a
+ *        race between requesting notification and an entry being
+ *        added to the CQ.  This return value means it is possible
+ *        (but not guaranteed) that a work completion has been added
+ *        to the CQ since the last poll without triggering a
+ *        completion notification event.
+ */
+static inline int ib_req_notify_cq(struct ib_cq *cq,
+				   enum ib_cq_notify_flags flags)
+{
+	return cq->device->req_notify_cq(cq, flags);
+}
+
+/**
+ * ib_req_ncomp_notif - Request completion notification when there are
+ *   at least the specified number of unreaped completions on the CQ.
+ * @cq: The CQ to generate an event for.
+ * @wc_cnt: The number of unreaped completions that should be on the
+ *   CQ before an event is generated.
+ */
+static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
+{
+	return cq->device->req_ncomp_notif ?
+		cq->device->req_ncomp_notif(cq, wc_cnt) :
+		ENOSYS;
+}
+
+/**
+ * ib_get_dma_mr - Returns a memory region for system memory that is
+ *   usable for DMA.
+ * @pd: The protection domain associated with the memory region.
+ * @mr_access_flags: Specifies the memory access rights.
+ *
+ * Note that the ib_dma_*() functions defined below must be used
+ * to create/destroy addresses used with the Lkey or Rkey returned
+ * by ib_get_dma_mr().
+ */
+struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
+#ifdef notyet
+/**
+ * ib_dma_mapping_error - check a DMA addr for error
+ * @dev: The device for which the dma_addr was created
+ * @dma_addr: The DMA address to check
+ */
+static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+	if (dev->dma_ops)
+		return dev->dma_ops->mapping_error(dev, dma_addr);
+	return dma_mapping_error(dma_addr);
+}
+
+/**
+ * ib_dma_map_single - Map a kernel virtual address to DMA address
+ * @dev: The device for which the dma_addr is to be created
+ * @cpu_addr: The kernel virtual address
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ */
+static inline u64 ib_dma_map_single(struct ib_device *dev,
+				    void *cpu_addr, size_t size,
+				    enum dma_data_direction direction)
+{
+	if (dev->dma_ops)
+		return dev->dma_ops->map_single(dev, cpu_addr, size, direction);
+	return dma_map_single(dev->dma_device, cpu_addr, size, direction);
+}
+
+/**
+ * ib_dma_unmap_single - Destroy a mapping created by ib_dma_map_single()
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ */
+static inline void ib_dma_unmap_single(struct ib_device *dev,
+				       u64 addr, size_t size,
+				       enum dma_data_direction direction)
+{
+	if (dev->dma_ops)
+		dev->dma_ops->unmap_single(dev, addr, size, direction);
+	else
+		dma_unmap_single(dev->dma_device, addr, size, direction);
+}
+
+/**
+ * ib_dma_map_page - Map a physical page to DMA address
+ * @dev: The device for which the dma_addr is to be created
+ * @page: The page to be mapped
+ * @offset: The offset within the page
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ */
+static inline u64 ib_dma_map_page(struct ib_device *dev,
+				  struct page *page,
+				  unsigned long offset,
+				  size_t size,
+					 enum dma_data_direction direction)
+{
+	if (dev->dma_ops)
+		return dev->dma_ops->map_page(dev, page, offset, size, direction);
+	return dma_map_page(dev->dma_device, page, offset, size, direction);
+}
+
+/**
+ * ib_dma_unmap_page - Destroy a mapping created by ib_dma_map_page()
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ */
+static inline void ib_dma_unmap_page(struct ib_device *dev,
+				     u64 addr, size_t size,
+				     enum dma_data_direction direction)
+{
+	if (dev->dma_ops)
+		dev->dma_ops->unmap_page(dev, addr, size, direction);
+	else
+		dma_unmap_page(dev->dma_device, addr, size, direction);
+}
+
+/**
+ * ib_dma_map_sg - Map a scatter/gather list to DMA addresses
+ * @dev: The device for which the DMA addresses are to be created
+ * @sg: The array of scatter/gather entries
+ * @nents: The number of scatter/gather entries
+ * @direction: The direction of the DMA
+ */
+static inline int ib_dma_map_sg(struct ib_device *dev,
+				struct rdma_scatterlist *sg, int nents,
+				enum dma_data_direction direction)
+{
+	if (dev->dma_ops)
+		return dev->dma_ops->map_sg(dev, sg, nents, direction);
+	return dma_map_sg(dev->dma_device, sg, nents, direction);
+}
+
+/**
+ * ib_dma_unmap_sg - Unmap a scatter/gather list of DMA addresses
+ * @dev: The device for which the DMA addresses were created
+ * @sg: The array of scatter/gather entries
+ * @nents: The number of scatter/gather entries
+ * @direction: The direction of the DMA
+ */
+static inline void ib_dma_unmap_sg(struct ib_device *dev,
+				   struct rdma_scatterlist *sg, int nents,
+				   enum dma_data_direction direction)
+{
+	if (dev->dma_ops)
+		dev->dma_ops->unmap_sg(dev, sg, nents, direction);
+	else
+		dma_unmap_sg(dev->dma_device, sg, nents, direction);
+}
+
+/**
+ * ib_sg_dma_address - Return the DMA address from a scatter/gather entry
+ * @dev: The device for which the DMA addresses were created
+ * @sg: The scatter/gather entry
+ */
+static inline u64 ib_sg_dma_address(struct ib_device *dev,
+				    struct rdma_scatterlist *sg)
+{
+	if (dev->dma_ops)
+		return dev->dma_ops->dma_address(dev, sg);
+	return sg_dma_address(sg);
+}
+
+/**
+ * ib_sg_dma_len - Return the DMA length from a scatter/gather entry
+ * @dev: The device for which the DMA addresses were created
+ * @sg: The scatter/gather entry
+ */
+static inline unsigned int ib_sg_dma_len(struct ib_device *dev,
+					 struct rdma_scatterlist *sg)
+{
+	if (dev->dma_ops)
+		return dev->dma_ops->dma_len(dev, sg);
+	return sg_dma_len(sg);
+}
+
+/**
+ * ib_dma_sync_single_for_cpu - Prepare DMA region to be accessed by CPU
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address
+ * @size: The size of the region in bytes
+ * @dir: The direction of the DMA
+ */
+static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
+					      u64 addr,
+					      size_t size,
+					      enum dma_data_direction dir)
+{
+	if (dev->dma_ops)
+		dev->dma_ops->sync_single_for_cpu(dev, addr, size, dir);
+	else
+		dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
+}
+
+/**
+ * ib_dma_sync_single_for_device - Prepare DMA region to be accessed by device
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address
+ * @size: The size of the region in bytes
+ * @dir: The direction of the DMA
+ */
+static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
+						 u64 addr,
+						 size_t size,
+						 enum dma_data_direction dir)
+{
+	if (dev->dma_ops)
+		dev->dma_ops->sync_single_for_device(dev, addr, size, dir);
+	else
+		dma_sync_single_for_device(dev->dma_device, addr, size, dir);
+}
+
+/**
+ * ib_dma_alloc_coherent - Allocate memory and map it for DMA
+ * @dev: The device for which the DMA address is requested
+ * @size: The size of the region to allocate in bytes
+ * @dma_handle: A pointer for returning the DMA address of the region
+ * @flag: memory allocator flags
+ */
+static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
+					   size_t size,
+					   u64 *dma_handle,
+					   gfp_t flag)
+{
+	if (dev->dma_ops)
+		return dev->dma_ops->alloc_coherent(dev, size, dma_handle, flag);
+	else {
+		dma_addr_t handle;
+		void *ret;
+
+		ret = dma_alloc_coherent(dev->dma_device, size, &handle, flag);
+		*dma_handle = handle;
+		return ret;
+	}
+}
+
+/**
+ * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent()
+ * @dev: The device for which the DMA addresses were allocated
+ * @size: The size of the region
+ * @cpu_addr: the address returned by ib_dma_alloc_coherent()
+ * @dma_handle: the DMA address returned by ib_dma_alloc_coherent()
+ */
+static inline void ib_dma_free_coherent(struct ib_device *dev,
+					size_t size, void *cpu_addr,
+					u64 dma_handle)
+{
+	if (dev->dma_ops)
+		dev->dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
+	else
+		dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
+}
+#endif
+/**
+ * ib_reg_phys_mr - Prepares a virtually addressed memory region for use
+ *   by an HCA.
+ * @pd: The protection domain associated assigned to the registered region.
+ * @phys_buf_array: Specifies a list of physical buffers to use in the
+ *   memory region.
+ * @num_phys_buf: Specifies the size of the phys_buf_array.
+ * @mr_access_flags: Specifies the memory access rights.
+ * @iova_start: The offset of the region's starting I/O virtual address.
+ */
+struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
+			     struct ib_phys_buf *phys_buf_array,
+			     int num_phys_buf,
+			     int mr_access_flags,
+			     u64 *iova_start);
+
+/**
+ * ib_rereg_phys_mr - Modifies the attributes of an existing memory region.
+ *   Conceptually, this call performs the functions deregister memory region
+ *   followed by register physical memory region.  Where possible,
+ *   resources are reused instead of deallocated and reallocated.
+ * @mr: The memory region to modify.
+ * @mr_rereg_mask: A bit-mask used to indicate which of the following
+ *   properties of the memory region are being modified.
+ * @pd: If %IB_MR_REREG_PD is set in mr_rereg_mask, this field specifies
+ *   the new protection domain to associated with the memory region,
+ *   otherwise, this parameter is ignored.
+ * @phys_buf_array: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this
+ *   field specifies a list of physical buffers to use in the new
+ *   translation, otherwise, this parameter is ignored.
+ * @num_phys_buf: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this
+ *   field specifies the size of the phys_buf_array, otherwise, this
+ *   parameter is ignored.
+ * @mr_access_flags: If %IB_MR_REREG_ACCESS is set in mr_rereg_mask, this
+ *   field specifies the new memory access rights, otherwise, this
+ *   parameter is ignored.
+ * @iova_start: The offset of the region's starting I/O virtual address.
+ */
+int ib_rereg_phys_mr(struct ib_mr *mr,
+		     int mr_rereg_mask,
+		     struct ib_pd *pd,
+		     struct ib_phys_buf *phys_buf_array,
+		     int num_phys_buf,
+		     int mr_access_flags,
+		     u64 *iova_start);
+
+/**
+ * ib_query_mr - Retrieves information about a specific memory region.
+ * @mr: The memory region to retrieve information about.
+ * @mr_attr: The attributes of the specified memory region.
+ */
+int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
+
+/**
+ * ib_dereg_mr - Deregisters a memory region and removes it from the
+ *   HCA translation table.
+ * @mr: The memory region to deregister.
+ */
+int ib_dereg_mr(struct ib_mr *mr);
+
+/**
+ * ib_alloc_mw - Allocates a memory window.
+ * @pd: The protection domain associated with the memory window.
+ */
+struct ib_mw *ib_alloc_mw(struct ib_pd *pd);
+
+/**
+ * ib_bind_mw - Posts a work request to the send queue of the specified
+ *   QP, which binds the memory window to the given address range and
+ *   remote access attributes.
+ * @qp: QP to post the bind work request on.
+ * @mw: The memory window to bind.
+ * @mw_bind: Specifies information about the memory window, including
+ *   its address range, remote access rights, and associated memory region.
+ */
+static inline int ib_bind_mw(struct ib_qp *qp,
+			     struct ib_mw *mw,
+			     struct ib_mw_bind *mw_bind)
+{
+	/* XXX reference counting in corresponding MR? */
+	return mw->device->bind_mw ?
+		mw->device->bind_mw(qp, mw, mw_bind) :
+		ENOSYS;
+}
+
+/**
+ * ib_dealloc_mw - Deallocates a memory window.
+ * @mw: The memory window to deallocate.
+ */
+int ib_dealloc_mw(struct ib_mw *mw);
+
+/**
+ * ib_alloc_fmr - Allocates a unmapped fast memory region.
+ * @pd: The protection domain associated with the unmapped region.
+ * @mr_access_flags: Specifies the memory access rights.
+ * @fmr_attr: Attributes of the unmapped region.
+ *
+ * A fast memory region must be mapped before it can be used as part of
+ * a work request.
+ */
+struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
+			    int mr_access_flags,
+			    struct ib_fmr_attr *fmr_attr);
+
+/**
+ * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region.
+ * @fmr: The fast memory region to associate with the pages.
+ * @page_list: An array of physical pages to map to the fast memory region.
+ * @list_len: The number of pages in page_list.
+ * @iova: The I/O virtual address to use with the mapped region.
+ */
+static inline int ib_map_phys_fmr(struct ib_fmr *fmr,
+				  u64 *page_list, int list_len,
+				  u64 iova)
+{
+	return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova);
+}
+
+/**
+ * ib_unmap_fmr - Removes the mapping from a list of fast memory regions.
+ * @fmr_list: A linked list of fast memory regions to unmap.
+ */
+int ib_unmap_fmr(struct ib_fmr_list_head *fmr_list);
+
+/**
+ * ib_dealloc_fmr - Deallocates a fast memory region.
+ * @fmr: The fast memory region to deallocate.
+ */
+int ib_dealloc_fmr(struct ib_fmr *fmr);
+
+/**
+ * ib_attach_mcast - Attaches the specified QP to a multicast group.
+ * @qp: QP to attach to the multicast group.  The QP must be type
+ *   IB_QPT_UD.
+ * @gid: Multicast group GID.
+ * @lid: Multicast group LID in host byte order.
+ *
+ * In order to send and receive multicast packets, subnet
+ * administration must have created the multicast group and configured
+ * the fabric appropriately.  The port associated with the specified
+ * QP must also be a member of the multicast group.
+ */
+int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+/**
+ * ib_detach_mcast - Detaches the specified QP from a multicast group.
+ * @qp: QP to detach from the multicast group.
+ * @gid: Multicast group GID.
+ * @lid: Multicast group LID in host byte order.
+ */
+int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+#endif /* IB_VERBS_H */
diff --git a/sys/contrib/rdma/iw_cm.h b/sys/contrib/rdma/iw_cm.h
new file mode 100644
index 000000000000..e594d669375a
--- /dev/null
+++ b/sys/contrib/rdma/iw_cm.h
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+#ifndef IW_CM_H
+#define IW_CM_H
+
+#include <contrib/rdma/ib_cm.h>
+
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+struct iw_cm_id;
+
+enum iw_cm_event_type {
+	IW_CM_EVENT_CONNECT_REQUEST = 1, /* connect request received */
+	IW_CM_EVENT_CONNECT_REPLY,	 /* reply from active connect request */
+	IW_CM_EVENT_ESTABLISHED,	 /* passive side accept successful */
+	IW_CM_EVENT_DISCONNECT,		 /* orderly shutdown */
+	IW_CM_EVENT_CLOSE		 /* close complete */
+};
+
+enum iw_cm_event_status {
+	IW_CM_EVENT_STATUS_OK = 0,	 /* request successful */
+	IW_CM_EVENT_STATUS_ACCEPTED = 0, /* connect request accepted */
+	IW_CM_EVENT_STATUS_REJECTED,	 /* connect request rejected */
+	IW_CM_EVENT_STATUS_TIMEOUT,	 /* the operation timed out */
+	IW_CM_EVENT_STATUS_RESET,	 /* reset from remote peer */
+	IW_CM_EVENT_STATUS_EINVAL,	 /* asynchronous failure for bad parm */
+};
+
+struct iw_cm_event {
+	enum iw_cm_event_type event;
+	enum iw_cm_event_status status;
+	struct sockaddr_in local_addr;
+	struct sockaddr_in remote_addr;
+	void *private_data;
+	u8 private_data_len;
+	void* provider_data;
+	struct socket *so;
+};
+
+/**
+ * iw_cm_handler - Function to be called by the IW CM when delivering events
+ * to the client.
+ *
+ * @cm_id: The IW CM identifier associated with the event.
+ * @event: Pointer to the event structure.
+ */
+typedef int (*iw_cm_handler)(struct iw_cm_id *cm_id,
+			     struct iw_cm_event *event);
+
+/**
+ * iw_event_handler - Function called by the provider when delivering provider
+ * events to the IW CM.  Returns either 0 indicating the event was processed
+ * or -errno if the event could not be processed.
+ *
+ * @cm_id: The IW CM identifier associated with the event.
+ * @event: Pointer to the event structure.
+ */
+typedef int (*iw_event_handler)(struct iw_cm_id *cm_id,
+				 struct iw_cm_event *event);
+
+struct iw_cm_id {
+	iw_cm_handler		cm_handler;      /* client callback function */
+	void		        *context;	 /* client cb context */
+	struct ib_device	*device;
+	struct sockaddr_in      local_addr;
+	struct sockaddr_in	remote_addr;
+	void			*provider_data;	 /* provider private data */
+	iw_event_handler        event_handler;   /* cb for provider
+						    events */
+	/* Used by provider to add and remove refs on IW cm_id */
+	void (*add_ref)(struct iw_cm_id *);
+	void (*rem_ref)(struct iw_cm_id *);
+	struct socket 		*so;
+};
+
+struct iw_cm_conn_param {
+	const void *private_data;
+	u16 private_data_len;
+	u32 ord;
+	u32 ird;
+	u32 qpn;
+};
+
+struct iw_cm_verbs {
+	void		(*add_ref)(struct ib_qp *qp);
+
+	void		(*rem_ref)(struct ib_qp *qp);
+
+	struct ib_qp *	(*get_qp)(struct ib_device *device,
+				  int qpn);
+
+	int		(*connect)(struct iw_cm_id *cm_id,
+				   struct iw_cm_conn_param *conn_param);
+
+	int		(*accept)(struct iw_cm_id *cm_id,
+				  struct iw_cm_conn_param *conn_param);
+
+	int		(*reject)(struct iw_cm_id *cm_id,
+				  const void *pdata, u8 pdata_len);
+
+	int		(*create_listen)(struct iw_cm_id *cm_id,
+					 int backlog);
+
+	int		(*destroy_listen)(struct iw_cm_id *cm_id);
+};
+
+/**
+ * iw_create_cm_id - Create an IW CM identifier.
+ *
+ * @device: The IB device on which to create the IW CM identier.
+ * @so: The socket to be used for establishing the rdma connection.
+ * @event_handler: User callback invoked to report events associated with the
+ *   returned IW CM identifier.
+ * @context: User specified context associated with the id.
+ */
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+				 struct socket *so,
+				 iw_cm_handler cm_handler, void *context);
+
+/**
+ * iw_destroy_cm_id - Destroy an IW CM identifier.
+ *
+ * @cm_id: The previously created IW CM identifier to destroy.
+ *
+ * The client can assume that no events will be delivered for the CM ID after
+ * this function returns.
+ */
+void iw_destroy_cm_id(struct iw_cm_id *cm_id);
+
+/**
+ * iw_cm_bind_qp - Unbind the specified IW CM identifier and QP
+ *
+ * @cm_id: The IW CM idenfier to unbind from the QP.
+ * @qp: The QP
+ *
+ * This is called by the provider when destroying the QP to ensure
+ * that any references held by the IWCM are released. It may also
+ * be called by the IWCM when destroying a CM_ID to that any
+ * references held by the provider are released.
+ */
+void iw_cm_unbind_qp(struct iw_cm_id *cm_id, struct ib_qp *qp);
+
+/**
+ * iw_cm_get_qp - Return the ib_qp associated with a QPN
+ *
+ * @ib_device: The IB device
+ * @qpn: The queue pair number
+ */
+struct ib_qp *iw_cm_get_qp(struct ib_device *device, int qpn);
+
+/**
+ * iw_cm_listen - Listen for incoming connection requests on the
+ * specified IW CM id.
+ *
+ * @cm_id: The IW CM identifier.
+ * @backlog: The maximum number of outstanding un-accepted inbound listen
+ *   requests to queue.
+ *
+ * The source address and port number are specified in the IW CM identifier
+ * structure.
+ */
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog);
+
+/**
+ * iw_cm_accept - Called to accept an incoming connect request.
+ *
+ * @cm_id: The IW CM identifier associated with the connection request.
+ * @iw_param: Pointer to a structure containing connection establishment
+ *   parameters.
+ *
+ * The specified cm_id will have been provided in the event data for a
+ * CONNECT_REQUEST event. Subsequent events related to this connection will be
+ * delivered to the specified IW CM identifier prior and may occur prior to
+ * the return of this function. If this function returns a non-zero value, the
+ * client can assume that no events will be delivered to the specified IW CM
+ * identifier.
+ */
+int iw_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param);
+
+/**
+ * iw_cm_reject - Reject an incoming connection request.
+ *
+ * @cm_id: Connection identifier associated with the request.
+ * @private_daa: Pointer to data to deliver to the remote peer as part of the
+ *   reject message.
+ * @private_data_len: The number of bytes in the private_data parameter.
+ *
+ * The client can assume that no events will be delivered to the specified IW
+ * CM identifier following the return of this function. The private_data
+ * buffer is available for reuse when this function returns.
+ */
+int iw_cm_reject(struct iw_cm_id *cm_id, const void *private_data,
+		 u8 private_data_len);
+
+/**
+ * iw_cm_connect - Called to request a connection to a remote peer.
+ *
+ * @cm_id: The IW CM identifier for the connection.
+ * @iw_param: Pointer to a structure containing connection  establishment
+ *   parameters.
+ *
+ * Events may be delivered to the specified IW CM identifier prior to the
+ * return of this function. If this function returns a non-zero value, the
+ * client can assume that no events will be delivered to the specified IW CM
+ * identifier.
+ */
+int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param);
+
+/**
+ * iw_cm_disconnect - Close the specified connection.
+ *
+ * @cm_id: The IW CM identifier to close.
+ * @abrupt: If 0, the connection will be closed gracefully, otherwise, the
+ *   connection will be reset.
+ *
+ * The IW CM identifier is still active until the IW_CM_EVENT_CLOSE event is
+ * delivered.
+ */
+int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt);
+
+/**
+ * iw_cm_init_qp_attr - Called to initialize the attributes of the QP
+ * associated with a IW CM identifier.
+ *
+ * @cm_id: The IW CM identifier associated with the QP
+ * @qp_attr: Pointer to the QP attributes structure.
+ * @qp_attr_mask: Pointer to a bit vector specifying which QP attributes are
+ *   valid.
+ */
+int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask);
+
+#endif /* IW_CM_H */
diff --git a/sys/contrib/rdma/krping/getopt.c b/sys/contrib/rdma/krping/getopt.c
new file mode 100644
index 000000000000..701910ea8b33
--- /dev/null
+++ b/sys/contrib/rdma/krping/getopt.c
@@ -0,0 +1,77 @@
+/*
+ * lifted from fs/ncpfs/getopt.c
+ *
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/ctype.h>
+#include <sys/param.h>
+#include <sys/libkern.h>
+#include "getopt.h"
+
+/**
+ *	krping_getopt - option parser
+ *	@caller: name of the caller, for error messages
+ *	@options: the options string
+ *	@opts: an array of &struct option entries controlling parser operations
+ *	@optopt: output; will contain the current option
+ *	@optarg: output; will contain the value (if one exists)
+ *	@flag: output; may be NULL; should point to a long for or'ing flags
+ *	@value: output; may be NULL; will be overwritten with the integer value
+ *		of the current argument.
+ *
+ *	Helper to parse options on the format used by mount ("a=b,c=d,e,f").
+ *	Returns opts->val if a matching entry in the 'opts' array is found,
+ *	0 when no more tokens are found, -1 if an error is encountered.
+ */
+int krping_getopt(const char *caller, char **options, 
+		  const struct krping_option *opts, char **optopt, 
+		  char **optarg, unsigned long *value)
+{
+	char *token;
+	char *val;
+
+	do {
+		if ((token = strsep(options, ",")) == NULL)
+			return 0;
+	} while (*token == '\0');
+	if (optopt)
+		*optopt = token;
+
+	if ((val = strchr (token, '=')) != NULL) {
+		*val++ = 0;
+	}
+	*optarg = val;
+	for (; opts->name; opts++) {
+		if (!strcmp(opts->name, token)) {
+			if (!val) {
+				if (opts->has_arg & OPT_NOPARAM) {
+					return opts->val;
+				}
+				printf("%s: the %s option requires "
+				       "an argument\n", caller, token);
+				return -EINVAL;
+			}
+			if (opts->has_arg & OPT_INT) {
+				char* v;
+
+				*value = strtoul(val, &v, 0);
+				if (!*v) {
+					return opts->val;
+				}
+				printf("%s: invalid numeric value "
+				       "in %s=%s\n", caller, token, val);
+				return -EDOM;
+			}
+			if (opts->has_arg & OPT_STRING) {
+				return opts->val;
+			}
+			printf("%s: unexpected argument %s to the "
+			       "%s option\n", caller, val, token);
+			return -EINVAL;
+		}
+	}
+	printf("%s: Unrecognized option %s\n", caller, token);
+	return -EOPNOTSUPP;
+}
diff --git a/sys/contrib/rdma/krping/getopt.h b/sys/contrib/rdma/krping/getopt.h
new file mode 100644
index 000000000000..610ec7625424
--- /dev/null
+++ b/sys/contrib/rdma/krping/getopt.h
@@ -0,0 +1,21 @@
+/*
+ * lifted from fs/ncpfs/getopt.c
+ *
+ * $FreeBSD$
+ */
+#ifndef _KRPING_GETOPT_H
+#define _KRPING_GETOPT_H
+
+#define OPT_NOPARAM	1
+#define OPT_INT		2
+#define OPT_STRING	4
+struct krping_option {
+	const char *name;
+	unsigned int has_arg;
+	int val;
+};
+
+extern int krping_getopt(const char *caller, char **options, const struct krping_option *opts,
+		      char **optopt, char **optarg, unsigned long *value);
+
+#endif /* _KRPING_GETOPT_H */
diff --git a/sys/contrib/rdma/krping/krping.c b/sys/contrib/rdma/krping/krping.c
new file mode 100644
index 000000000000..202daf85eda6
--- /dev/null
+++ b/sys/contrib/rdma/krping/krping.c
@@ -0,0 +1,1865 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/ctype.h>
+
+#include <sys/param.h>
+#include <sys/condvar.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/module.h>
+#include <sys/endian.h>
+#include <sys/limits.h>
+#include <sys/proc.h>
+#include <sys/signalvar.h>
+
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/syslog.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <contrib/rdma/rdma_cm.h>
+
+#include "getopt.h"
+#include "krping.h"
+
+#define PFX "krping: "
+
+static int debug = 0;
+#define DEBUG_LOG if (debug) printf
+
+static const struct krping_option krping_opts[] = {
+	{"count", OPT_INT, 'C'},
+	{"size", OPT_INT, 'S'},
+	{"addr", OPT_STRING, 'a'},
+	{"port", OPT_INT, 'p'},
+	{"verbose", OPT_NOPARAM, 'v'},
+	{"validate", OPT_NOPARAM, 'V'},
+	{"server", OPT_NOPARAM, 's'},
+	{"client", OPT_NOPARAM, 'c'},
+	{"dmamr", OPT_NOPARAM, 'D'},
+	{"debug", OPT_NOPARAM, 'd'},
+	{"wlat", OPT_NOPARAM, 'l'},
+	{"rlat", OPT_NOPARAM, 'L'},
+	{"bw", OPT_NOPARAM, 'B'},
+	{"tx-depth", OPT_INT, 't'},
+  	{"poll", OPT_NOPARAM, 'P'},
+	{NULL, 0, 0}
+};
+
+struct mtx krping_mutex;
+
+/*
+ * List of running krping threads.
+ */
+struct krping_cb_list krping_cbs;
+
+/*
+ * krping "ping/pong" loop:
+ * 	client sends source rkey/addr/len
+ *	server receives source rkey/add/len
+ *	server rdma reads "ping" data from source
+ * 	server sends "go ahead" on rdma read completion
+ *	client sends sink rkey/addr/len
+ * 	server receives sink rkey/addr/len
+ * 	server rdma writes "pong" data to sink
+ * 	server sends "go ahead" on rdma write completion
+ * 	<repeat loop>
+ */
+
+/*
+ * Default max buffer size for IO...
+ */
+#define RPING_BUFSIZE 128*1024
+#define RPING_SQ_DEPTH 32
+
+
+/* lifted from netinet/libalias/alias_proxy.c */
+static int inet_aton(const char *cp, struct in_addr *addr);
+static int
+inet_aton(cp, addr)
+        const char *cp;
+        struct in_addr *addr;
+{
+	u_long parts[4];
+	in_addr_t val;
+	const char *c;
+	char *endptr;
+	int gotend, n;
+
+	c = (const char *)cp;
+	n = 0;
+	/*
+	 * Run through the string, grabbing numbers until
+	 * the end of the string, or some error
+	 */
+	gotend = 0;
+	while (!gotend) {
+		unsigned long l;
+
+		l = strtoul(c, &endptr, 0);
+
+		if (l == ULONG_MAX || (l == 0 && endptr == c))
+			return (0);
+
+		val = (in_addr_t)l;
+		/*
+		 * If the whole string is invalid, endptr will equal
+		 * c.. this way we can make sure someone hasn't
+		 * gone '.12' or something which would get past
+		 * the next check.
+		 */
+		if (endptr == c)
+			return (0);
+		parts[n] = val;
+		c = endptr;
+
+		/* Check the next character past the previous number's end */
+		switch (*c) {
+		case '.' :
+			/* Make sure we only do 3 dots .. */
+			if (n == 3)	/* Whoops. Quit. */
+				return (0);
+			n++;
+			c++;
+			break;
+
+		case '\0':
+			gotend = 1;
+			break;
+
+		default:
+			if (isspace((unsigned char)*c)) {
+				gotend = 1;
+				break;
+			} else
+				return (0);	/* Invalid character, so fail */
+		}
+
+	}
+
+	/*
+	 * Concoct the address according to
+	 * the number of parts specified.
+	 */
+
+	switch (n) {
+	case 0:				/* a -- 32 bits */
+		/*
+		 * Nothing is necessary here.  Overflow checking was
+		 * already done in strtoul().
+		 */
+		break;
+	case 1:				/* a.b -- 8.24 bits */
+		if (val > 0xffffff || parts[0] > 0xff)
+			return (0);
+		val |= parts[0] << 24;
+		break;
+
+	case 2:				/* a.b.c -- 8.8.16 bits */
+		if (val > 0xffff || parts[0] > 0xff || parts[1] > 0xff)
+			return (0);
+		val |= (parts[0] << 24) | (parts[1] << 16);
+		break;
+
+	case 3:				/* a.b.c.d -- 8.8.8.8 bits */
+		if (val > 0xff || parts[0] > 0xff || parts[1] > 0xff ||
+		    parts[2] > 0xff)
+			return (0);
+		val |= (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8);
+		break;
+	}
+
+	if (addr != NULL)
+		addr->s_addr = htonl(val);
+	return (1);
+}
+
+
+static void krping_wait(struct krping_cb *cb, int state)
+{
+	int rc;
+	mtx_lock(&cb->lock);
+	while (cb->state < state) {
+		rc = msleep(cb, &cb->lock, 0, "krping", 0);
+		if (rc && rc != ERESTART) {
+			cb->state = ERROR;
+			break;
+		}
+	}
+	mtx_unlock(&cb->lock);
+}
+
+static int krping_cma_event_handler(struct rdma_cm_id *cma_id,
+				   struct rdma_cm_event *event)
+{
+	int ret;
+	struct krping_cb *cb = cma_id->context;
+
+	DEBUG_LOG(PFX "cma_event type %d cma_id %p (%s)\n", event->event, cma_id,
+		  (cma_id == cb->cm_id) ? "parent" : "child");
+
+	mtx_lock(&cb->lock);
+	switch (event->event) {
+	case RDMA_CM_EVENT_ADDR_RESOLVED:
+		cb->state = ADDR_RESOLVED;
+		ret = rdma_resolve_route(cma_id, 2000);
+		if (ret) {
+			log(LOG_ERR, "rdma_resolve_route error %d\n", 
+			       ret);
+			wakeup(cb);
+		}
+		break;
+
+	case RDMA_CM_EVENT_ROUTE_RESOLVED:
+		cb->state = ROUTE_RESOLVED;
+		wakeup(cb);
+		break;
+
+	case RDMA_CM_EVENT_CONNECT_REQUEST:
+		cb->state = CONNECT_REQUEST;
+		cb->child_cm_id = cma_id;
+		DEBUG_LOG(PFX "child cma %p\n", cb->child_cm_id);
+		wakeup(cb);
+		break;
+
+	case RDMA_CM_EVENT_ESTABLISHED:
+		DEBUG_LOG(PFX "ESTABLISHED\n");
+		if (!cb->server) {
+			cb->state = CONNECTED;
+			wakeup(cb);
+		}
+		break;
+
+	case RDMA_CM_EVENT_ADDR_ERROR:
+	case RDMA_CM_EVENT_ROUTE_ERROR:
+	case RDMA_CM_EVENT_CONNECT_ERROR:
+	case RDMA_CM_EVENT_UNREACHABLE:
+	case RDMA_CM_EVENT_REJECTED:
+		log(LOG_ERR, "cma event %d, error %d\n", event->event,
+		       event->status);
+		cb->state = ERROR;
+		wakeup(cb);
+		break;
+
+	case RDMA_CM_EVENT_DISCONNECTED:
+		DEBUG_LOG(PFX "DISCONNECT EVENT...\n");
+		cb->state = ERROR;
+		wakeup(cb);
+		break;
+
+	case RDMA_CM_EVENT_DEVICE_REMOVAL:
+		DEBUG_LOG(PFX "cma detected device removal!!!!\n");
+		break;
+
+	default:
+		log(LOG_ERR, "oof bad type!\n");
+		wakeup(cb);
+		break;
+	}
+	mtx_unlock(&cb->lock);
+	return 0;
+}
+
+static int server_recv(struct krping_cb *cb, struct ib_wc *wc)
+{
+	if (wc->byte_len != sizeof(cb->recv_buf)) {
+		log(LOG_ERR, "Received bogus data, size %d\n", 
+		       wc->byte_len);
+		return -1;
+	}
+
+	cb->remote_rkey = ntohl(cb->recv_buf.rkey);
+	cb->remote_addr = ntohll(cb->recv_buf.buf);
+	cb->remote_len  = ntohl(cb->recv_buf.size);
+	DEBUG_LOG(PFX "Received rkey %x addr %llx len %d from peer\n",
+		  cb->remote_rkey, (unsigned long long)cb->remote_addr, 
+		  cb->remote_len);
+
+	if (cb->state <= CONNECTED || cb->state == RDMA_WRITE_COMPLETE)
+		cb->state = RDMA_READ_ADV;
+	else
+		cb->state = RDMA_WRITE_ADV;
+
+	return 0;
+}
+
+static int client_recv(struct krping_cb *cb, struct ib_wc *wc)
+{
+	if (wc->byte_len != sizeof(cb->recv_buf)) {
+		log(LOG_ERR, "Received bogus data, size %d\n", 
+		       wc->byte_len);
+		return -1;
+	}
+
+	if (cb->state == RDMA_READ_ADV)
+		cb->state = RDMA_WRITE_ADV;
+	else
+		cb->state = RDMA_WRITE_COMPLETE;
+
+	return 0;
+}
+
+static void krping_cq_event_handler(struct ib_cq *cq, void *ctx)
+{
+	struct krping_cb *cb = ctx;
+	struct ib_wc wc;
+	struct ib_recv_wr *bad_wr;
+	int ret;
+
+	mtx_lock(&cb->lock);
+	KASSERT(cb->cq == cq, ("bad condition"));
+	if (cb->state == ERROR) {
+		log(LOG_ERR,  "cq completion in ERROR state\n");
+		mtx_unlock(&cb->lock);
+		return;
+	}
+	if (!cb->wlat && !cb->rlat && !cb->bw)
+		ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
+	while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) {
+		if (wc.status) {
+			if (wc.status != IB_WC_WR_FLUSH_ERR)
+				log(LOG_ERR, "cq completion failed status %d\n",
+					wc.status);
+			goto error;
+		}
+
+		switch (wc.opcode) {
+		case IB_WC_SEND:
+			DEBUG_LOG(PFX "send completion\n");
+			cb->stats.send_bytes += cb->send_sgl.length;
+			cb->stats.send_msgs++;
+			break;
+
+		case IB_WC_RDMA_WRITE:
+			DEBUG_LOG(PFX "rdma write completion\n");
+			cb->stats.write_bytes += cb->rdma_sq_wr.sg_list->length;
+			cb->stats.write_msgs++;
+			cb->state = RDMA_WRITE_COMPLETE;
+			wakeup(cb);
+			break;
+
+		case IB_WC_RDMA_READ:
+			DEBUG_LOG(PFX "rdma read completion\n");
+			cb->stats.read_bytes += cb->rdma_sq_wr.sg_list->length;
+			cb->stats.read_msgs++;
+			cb->state = RDMA_READ_COMPLETE;
+			wakeup(cb);
+			break;
+
+		case IB_WC_RECV:
+			DEBUG_LOG(PFX "recv completion\n");
+			cb->stats.recv_bytes += sizeof(cb->recv_buf);
+			cb->stats.recv_msgs++;
+			if (cb->wlat || cb->rlat || cb->bw)
+				ret = server_recv(cb, &wc);
+			else
+				ret = cb->server ? server_recv(cb, &wc) :
+					   client_recv(cb, &wc);
+			if (ret) {
+				log(LOG_ERR, "recv wc error: %d\n", ret);
+				goto error;
+			}
+
+			ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
+			if (ret) {
+				log(LOG_ERR, "post recv error: %d\n", 
+				       ret);
+				goto error;
+			}
+			wakeup(cb);
+			break;
+
+		default:
+			log(LOG_ERR, "unknown!!!!! completion\n");
+			goto error;
+		}
+	}
+	if (ret) {
+		log(LOG_ERR, "poll error %d\n", ret);
+		goto error;
+	}
+	mtx_unlock(&cb->lock);
+	return;
+error:
+	cb->state = ERROR;
+	wakeup(cb);
+	mtx_unlock(&cb->lock);
+}
+
+static int krping_accept(struct krping_cb *cb)
+{
+	struct rdma_conn_param conn_param;
+	int ret;
+
+	DEBUG_LOG(PFX "accepting client connection request\n");
+
+	memset(&conn_param, 0, sizeof conn_param);
+	conn_param.responder_resources = 1;
+	conn_param.initiator_depth = 1;
+
+	ret = rdma_accept(cb->child_cm_id, &conn_param);
+	if (ret) {
+		log(LOG_ERR, "rdma_accept error: %d\n", ret);
+		return ret;
+	}
+
+	if (!cb->wlat && !cb->rlat && !cb->bw) {
+		krping_wait(cb, CONNECTED);
+		if (cb->state == ERROR) {
+			log(LOG_ERR,  "wait for CONNECTED state %d\n", cb->state);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static void krping_setup_wr(struct krping_cb *cb)
+{
+	/* XXX X86 only here... not mapping for dma! */
+	cb->recv_sgl.addr = vtophys(&cb->recv_buf);
+	cb->recv_sgl.length = sizeof cb->recv_buf;
+	if (cb->use_dmamr)
+		cb->recv_sgl.lkey = cb->dma_mr->lkey;
+	else
+		cb->recv_sgl.lkey = cb->recv_mr->lkey;
+	cb->rq_wr.sg_list = &cb->recv_sgl;
+	cb->rq_wr.num_sge = 1;
+
+	cb->send_sgl.addr = vtophys(&cb->send_buf);
+	cb->send_sgl.length = sizeof cb->send_buf;
+	if (cb->use_dmamr)
+		cb->send_sgl.lkey = cb->dma_mr->lkey;
+	else
+		cb->send_sgl.lkey = cb->send_mr->lkey;
+
+	cb->sq_wr.opcode = IB_WR_SEND;
+	cb->sq_wr.send_flags = IB_SEND_SIGNALED;
+	cb->sq_wr.sg_list = &cb->send_sgl;
+	cb->sq_wr.num_sge = 1;
+
+	cb->rdma_addr = vtophys(cb->rdma_buf);
+	cb->rdma_sgl.addr = cb->rdma_addr;
+	if (cb->use_dmamr)
+		cb->rdma_sgl.lkey = cb->dma_mr->lkey;
+	else
+		cb->rdma_sgl.lkey = cb->rdma_mr->lkey;
+	cb->rdma_sq_wr.send_flags = IB_SEND_SIGNALED;
+	cb->rdma_sq_wr.sg_list = &cb->rdma_sgl;
+	cb->rdma_sq_wr.num_sge = 1;
+
+	if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
+		cb->start_addr = vtophys(cb->start_buf);
+	}
+}
+
+static int krping_setup_buffers(struct krping_cb *cb)
+{
+	int ret;
+	struct ib_phys_buf buf;
+	u64 iovbase;
+
+	DEBUG_LOG(PFX "krping_setup_buffers called on cb %p\n", cb);
+
+	if (cb->use_dmamr) {
+		cb->dma_mr = ib_get_dma_mr(cb->pd, IB_ACCESS_LOCAL_WRITE|
+					   IB_ACCESS_REMOTE_READ|
+				           IB_ACCESS_REMOTE_WRITE);
+		if (IS_ERR(cb->dma_mr)) {
+			log(LOG_ERR, "reg_dmamr failed\n");
+			return PTR_ERR(cb->dma_mr);
+		}
+	} else {
+
+		buf.addr = vtophys(&cb->recv_buf);
+		buf.size = sizeof cb->recv_buf;
+		iovbase = vtophys(&cb->recv_buf);
+		cb->recv_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
+					     IB_ACCESS_LOCAL_WRITE, 
+					     &iovbase);
+
+		if (IS_ERR(cb->recv_mr)) {
+			log(LOG_ERR, "recv_buf reg_mr failed\n");
+			return PTR_ERR(cb->recv_mr);
+		}
+
+		buf.addr = vtophys(&cb->send_buf);
+		buf.size = sizeof cb->send_buf;
+		iovbase = vtophys(&cb->send_buf);
+		cb->send_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
+					     0, &iovbase);
+
+		if (IS_ERR(cb->send_mr)) {
+			log(LOG_ERR, "send_buf reg_mr failed\n");
+			ib_dereg_mr(cb->recv_mr);
+			return PTR_ERR(cb->send_mr);
+		}
+	}
+
+	cb->rdma_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK, 0, -1UL,
+		PAGE_SIZE, 0);
+
+	if (!cb->rdma_buf) {
+		log(LOG_ERR, "rdma_buf malloc failed\n");
+		ret = ENOMEM;
+		goto err1;
+	}
+	if (!cb->use_dmamr) {
+
+		buf.addr = vtophys(cb->rdma_buf);
+		buf.size = cb->size;
+		iovbase = vtophys(cb->rdma_buf);
+		cb->rdma_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
+					     IB_ACCESS_REMOTE_READ| 
+					     IB_ACCESS_REMOTE_WRITE, 
+					     &iovbase);
+
+		if (IS_ERR(cb->rdma_mr)) {
+			log(LOG_ERR, "rdma_buf reg_mr failed\n");
+			ret = PTR_ERR(cb->rdma_mr);
+			goto err2;
+		}
+	}
+
+	if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
+		cb->start_buf = contigmalloc(cb->size, M_DEVBUF, M_WAITOK,
+			0, -1UL, PAGE_SIZE, 0);
+		if (!cb->start_buf) {
+			log(LOG_ERR, "start_buf malloc failed\n");
+			ret = ENOMEM;
+			goto err2;
+		}
+		if (!cb->use_dmamr) {
+			unsigned flags = IB_ACCESS_REMOTE_READ;
+
+			if (cb->wlat || cb->rlat || cb->bw) 
+				flags |= IB_ACCESS_REMOTE_WRITE;
+			buf.addr = vtophys(cb->start_buf);
+			buf.size = cb->size;
+			iovbase = vtophys(cb->start_buf);
+			cb->start_mr = ib_reg_phys_mr(cb->pd, &buf, 1, 
+					     flags,
+					     &iovbase);
+
+			if (IS_ERR(cb->start_mr)) {
+				log(LOG_ERR, "start_buf reg_mr failed\n");
+				ret = PTR_ERR(cb->start_mr);
+				goto err3;
+			}
+		}
+	}
+
+	krping_setup_wr(cb);
+	DEBUG_LOG(PFX "allocated & registered buffers...\n");
+	return 0;
+err3:
+	contigfree(cb->start_buf, cb->size, M_DEVBUF);
+
+	if (!cb->use_dmamr)
+		ib_dereg_mr(cb->rdma_mr);
+err2:
+	contigfree(cb->rdma_buf, cb->size, M_DEVBUF);
+err1:
+	if (cb->use_dmamr)
+		ib_dereg_mr(cb->dma_mr);
+	else {
+		ib_dereg_mr(cb->recv_mr);
+		ib_dereg_mr(cb->send_mr);
+	}
+	return ret;
+}
+
+static void krping_free_buffers(struct krping_cb *cb)
+{
+	DEBUG_LOG(PFX "krping_free_buffers called on cb %p\n", cb);
+	
+#if 0
+	dma_unmap_single(cb->pd->device->dma_device,
+			 pci_unmap_addr(cb, recv_mapping),
+			 sizeof(cb->recv_buf), DMA_BIDIRECTIONAL);
+	dma_unmap_single(cb->pd->device->dma_device,
+			 pci_unmap_addr(cb, send_mapping),
+			 sizeof(cb->send_buf), DMA_BIDIRECTIONAL);
+	dma_unmap_single(cb->pd->device->dma_device,
+			 pci_unmap_addr(cb, rdma_mapping),
+			 cb->size, DMA_BIDIRECTIONAL);
+#endif
+	contigfree(cb->rdma_buf, cb->size, M_DEVBUF);
+	if (!cb->server || cb->wlat || cb->rlat || cb->bw) {
+#if 0
+		dma_unmap_single(cb->pd->device->dma_device,
+			 pci_unmap_addr(cb, start_mapping),
+			 cb->size, DMA_BIDIRECTIONAL);
+#endif
+		contigfree(cb->start_buf, cb->size, M_DEVBUF);
+	}
+	if (cb->use_dmamr)
+		ib_dereg_mr(cb->dma_mr);
+	else {
+		ib_dereg_mr(cb->send_mr);
+		ib_dereg_mr(cb->recv_mr);
+		ib_dereg_mr(cb->rdma_mr);
+		if (!cb->server)
+			ib_dereg_mr(cb->start_mr);
+	}
+}
+
+static int krping_create_qp(struct krping_cb *cb)
+{
+	struct ib_qp_init_attr init_attr;
+	int ret;
+
+	memset(&init_attr, 0, sizeof(init_attr));
+	init_attr.cap.max_send_wr = cb->txdepth;
+	init_attr.cap.max_recv_wr = 2;
+	init_attr.cap.max_recv_sge = 1;
+	init_attr.cap.max_send_sge = 1;
+	init_attr.qp_type = IB_QPT_RC;
+	init_attr.send_cq = cb->cq;
+	init_attr.recv_cq = cb->cq;
+
+	if (cb->server) {
+		ret = rdma_create_qp(cb->child_cm_id, cb->pd, &init_attr);
+		if (!ret)
+			cb->qp = cb->child_cm_id->qp;
+	} else {
+		ret = rdma_create_qp(cb->cm_id, cb->pd, &init_attr);
+		if (!ret)
+			cb->qp = cb->cm_id->qp;
+	}
+
+	return ret;
+}
+
+static void krping_free_qp(struct krping_cb *cb)
+{
+	ib_destroy_qp(cb->qp);
+	ib_destroy_cq(cb->cq);
+	ib_dealloc_pd(cb->pd);
+}
+
+static int krping_setup_qp(struct krping_cb *cb, struct rdma_cm_id *cm_id)
+{
+	int ret;
+	cb->pd = ib_alloc_pd(cm_id->device);
+	if (IS_ERR(cb->pd)) {
+		log(LOG_ERR, "ib_alloc_pd failed\n");
+		return PTR_ERR(cb->pd);
+	}
+	DEBUG_LOG(PFX "created pd %p\n", cb->pd);
+
+	cb->cq = ib_create_cq(cm_id->device, krping_cq_event_handler, NULL,
+			      cb, cb->txdepth * 2, 0);
+	if (IS_ERR(cb->cq)) {
+		log(LOG_ERR, "ib_create_cq failed\n");
+		ret = PTR_ERR(cb->cq);
+		goto err1;
+	}
+	DEBUG_LOG(PFX "created cq %p\n", cb->cq);
+
+	if (!cb->wlat && !cb->rlat && !cb->bw) {
+		ret = ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
+		if (ret) {
+			log(LOG_ERR, "ib_create_cq failed\n");
+			goto err2;
+		}
+	}
+
+	ret = krping_create_qp(cb);
+	if (ret) {
+		log(LOG_ERR, "krping_create_qp failed: %d\n", ret);
+		goto err2;
+	}
+	DEBUG_LOG(PFX "created qp %p\n", cb->qp);
+	return 0;
+err2:
+	ib_destroy_cq(cb->cq);
+err1:
+	ib_dealloc_pd(cb->pd);
+	return ret;
+}
+
+static void krping_format_send(struct krping_cb *cb, u64 buf, 
+			       struct ib_mr *mr)
+{
+	struct krping_rdma_info *info = &cb->send_buf;
+
+	info->buf = htonll(buf);
+	info->rkey = htonl(mr->rkey);
+	info->size = htonl(cb->size);
+
+	DEBUG_LOG(PFX "RDMA addr %llx rkey %x len %d\n",
+		  (unsigned long long)buf, mr->rkey, cb->size);
+}
+
+static void krping_test_server(struct krping_cb *cb)
+{
+	struct ib_send_wr *bad_wr;
+	int ret;
+
+	while (1) {
+		/* Wait for client's Start STAG/TO/Len */
+		krping_wait(cb, RDMA_READ_ADV);
+		if (cb->state != RDMA_READ_ADV) {
+			DEBUG_LOG(PFX "wait for RDMA_READ_ADV state %d\n",
+				cb->state);
+			break;
+		}
+
+		DEBUG_LOG(PFX "server received sink adv\n");
+
+		/* Issue RDMA Read. */
+		cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ;
+		cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
+		cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
+		cb->rdma_sq_wr.sg_list->length = cb->remote_len;
+
+		ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr);
+		if (ret) {
+			log(LOG_ERR, "post send error %d\n", ret);
+			break;
+		}
+		DEBUG_LOG(PFX "server posted rdma read req \n");
+
+		/* Wait for read completion */
+		krping_wait(cb, RDMA_READ_COMPLETE);
+		if (cb->state != RDMA_READ_COMPLETE) {
+			log(LOG_ERR,  
+			       "wait for RDMA_READ_COMPLETE state %d\n",
+			       cb->state);
+			break;
+		}
+		DEBUG_LOG(PFX "server received read complete\n");
+
+		/* Display data in recv buf */
+		if (cb->verbose)
+			DEBUG_LOG("server ping data: %s\n", cb->rdma_buf);
+
+		/* Tell client to continue */
+		ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+		if (ret) {
+			log(LOG_ERR, "post send error %d\n", ret);
+			break;
+		}
+		DEBUG_LOG(PFX "server posted go ahead\n");
+
+		/* Wait for client's RDMA STAG/TO/Len */
+		krping_wait(cb, RDMA_WRITE_ADV);
+		if (cb->state != RDMA_WRITE_ADV) {
+			log(LOG_ERR,  
+			       "wait for RDMA_WRITE_ADV state %d\n",
+			       cb->state);
+			break;
+		}
+		DEBUG_LOG(PFX "server received sink adv\n");
+
+		/* RDMA Write echo data */
+		cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
+		cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
+		cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
+		cb->rdma_sq_wr.sg_list->length = strlen(cb->rdma_buf) + 1;
+		DEBUG_LOG(PFX "rdma write from lkey %x laddr %llx len %d\n",
+			  cb->rdma_sq_wr.sg_list->lkey,
+			  (unsigned long long)cb->rdma_sq_wr.sg_list->addr,
+			  cb->rdma_sq_wr.sg_list->length);
+
+		ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr);
+		if (ret) {
+			log(LOG_ERR, "post send error %d\n", ret);
+			break;
+		}
+
+		/* Wait for completion */
+		krping_wait(cb, RDMA_WRITE_COMPLETE);
+		if (cb->state != RDMA_WRITE_COMPLETE) {
+			log(LOG_ERR,  
+			       "wait for RDMA_WRITE_COMPLETE state %d\n",
+			       cb->state);
+			break;
+		}
+		DEBUG_LOG(PFX "server rdma write complete \n");
+
+		cb->state = CONNECTED;
+
+		/* Tell client to begin again */
+		ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+		if (ret) {
+			log(LOG_ERR, "post send error %d\n", ret);
+			break;
+		}
+		DEBUG_LOG(PFX "server posted go ahead\n");
+	}
+}
+
+static void rlat_test(struct krping_cb *cb)
+{
+	int scnt;
+	int iters = cb->count;
+	struct timeval start_tv, stop_tv;
+	int ret;
+	struct ib_wc wc;
+	struct ib_send_wr *bad_wr;
+	int ne;
+
+	scnt = 0;
+	cb->rdma_sq_wr.opcode = IB_WR_RDMA_READ;
+	cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
+	cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
+	cb->rdma_sq_wr.sg_list->length = cb->size;
+
+	microtime(&start_tv);
+ 	if (!cb->poll) {
+ 		cb->state = RDMA_READ_ADV;
+ 		ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
+ 	}
+	while (scnt < iters) {
+
+ 		cb->state = RDMA_READ_ADV;
+		ret = ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr);
+		if (ret) {
+			log(LOG_ERR,  
+				"Couldn't post send: ret=%d scnt %d\n",
+				ret, scnt);
+			return;
+		}
+
+		do {
+			if (!cb->poll) {
+				krping_wait(cb, RDMA_READ_COMPLETE);
+				if (cb->state == RDMA_READ_COMPLETE) {
+					ne = 1;
+					ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP);
+				} else {
+					ne = -1;
+				}
+			} else
+				ne = ib_poll_cq(cb->cq, 1, &wc);
+			if (cb->state == ERROR) {
+				log(LOG_ERR, 
+				       "state == ERROR...bailing scnt %d\n", scnt);
+				return;
+			}
+		} while (ne == 0);
+
+		if (ne < 0) {
+			log(LOG_ERR, "poll CQ failed %d\n", ne);
+			return;
+		}
+ 		if (cb->poll && wc.status != IB_WC_SUCCESS) {
+			log(LOG_ERR, "Completion wth error at %s:\n",
+				cb->server ? "server" : "client");
+			log(LOG_ERR, "Failed status %d: wr_id %d\n",
+				wc.status, (int) wc.wr_id);
+			return;
+		}
+		++scnt;
+	}
+	microtime(&stop_tv);
+
+        if (stop_tv.tv_usec < start_tv.tv_usec) {
+                stop_tv.tv_usec += 1000000;
+                stop_tv.tv_sec  -= 1;
+        }
+
+	log(LOG_ERR, "delta sec %lu delta usec %lu iter %d size %d\n",
+		stop_tv.tv_sec - start_tv.tv_sec, 
+		stop_tv.tv_usec - start_tv.tv_usec,
+		scnt, cb->size);
+}
+
+static int alloc_cycle_mem(int cycle_iters,
+				cycles_t **post_cycles_start,
+				cycles_t **post_cycles_stop,
+				cycles_t **poll_cycles_start,
+				cycles_t **poll_cycles_stop,
+				cycles_t **last_poll_cycles_start)
+{
+	*post_cycles_start = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK);
+	if (!*post_cycles_start) {
+		goto fail1;
+	}
+	*post_cycles_stop = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK);
+	if (!*post_cycles_stop) {
+		goto fail2;
+	}
+	*poll_cycles_start = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK);
+	if (!*poll_cycles_start) {
+		goto fail3;
+	}
+	*poll_cycles_stop = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK);
+	if (!*poll_cycles_stop) {
+		goto fail4;
+	}
+	*last_poll_cycles_start = malloc(cycle_iters * sizeof(cycles_t), M_DEVBUF, M_WAITOK);
+	if (!*last_poll_cycles_start) {
+		goto fail5;
+	}
+	return 0;
+fail5:
+	free(*poll_cycles_stop, M_DEVBUF);
+fail4:
+	free(*poll_cycles_start, M_DEVBUF);
+fail3:
+	free(*post_cycles_stop, M_DEVBUF);
+fail2:
+	free(*post_cycles_start, M_DEVBUF);
+fail1:
+	log(LOG_ERR, "%s malloc failed\n", __FUNCTION__);
+	return ENOMEM;
+}
+
+static void free_cycle_mem(cycles_t *post_cycles_start,
+				cycles_t *post_cycles_stop,
+				cycles_t *poll_cycles_start,
+				cycles_t *poll_cycles_stop,
+				cycles_t *last_poll_cycles_start)
+{
+	free(last_poll_cycles_start, M_DEVBUF);
+	free(poll_cycles_stop, M_DEVBUF);
+	free(poll_cycles_start, M_DEVBUF);
+	free(post_cycles_stop, M_DEVBUF);
+	free(post_cycles_start, M_DEVBUF);
+}
+
+static void wlat_test(struct krping_cb *cb)
+{
+	int ccnt, scnt, rcnt;
+	int iters=cb->count;
+	volatile char *poll_buf = (char *) cb->start_buf;
+	char *buf = (char *)cb->rdma_buf;
+	ccnt = 0;
+	scnt = 0;
+	rcnt = 0;
+	struct timeval start_tv, stop_tv;
+	cycles_t *post_cycles_start, *post_cycles_stop;
+	cycles_t *poll_cycles_start, *poll_cycles_stop;
+	cycles_t *last_poll_cycles_start;
+	cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0;
+	int i;
+	int cycle_iters = 1000;
+	int err;
+
+	err = alloc_cycle_mem(cycle_iters, &post_cycles_start, &post_cycles_stop,
+				&poll_cycles_start, &poll_cycles_stop, &last_poll_cycles_start);
+			  
+	if (err) {
+		log(LOG_ERR, "%s malloc failed\n", __FUNCTION__);
+		return;
+	}
+
+	cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
+	cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
+	cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
+	cb->rdma_sq_wr.sg_list->length = cb->size;
+
+	if (cycle_iters > iters)
+		cycle_iters = iters;
+	microtime(&start_tv);
+	while (scnt < iters || ccnt < iters || rcnt < iters) {
+
+		/* Wait till buffer changes. */
+		if (rcnt < iters && !(scnt < 1 && !cb->server)) {
+			++rcnt;
+			while (*poll_buf != (char)rcnt) {
+				if (cb->state == ERROR) {
+					log(LOG_ERR, "state = ERROR, bailing\n");
+					return;
+				}
+			}
+		}
+
+		if (scnt < iters) {
+			struct ib_send_wr *bad_wr;
+
+			*buf = (char)scnt+1;
+			if (scnt < cycle_iters)
+				post_cycles_start[scnt] = get_cycles();
+			if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) {
+				log(LOG_ERR,  "Couldn't post send: scnt=%d\n",
+					scnt);
+				return;
+			}
+			if (scnt < cycle_iters)
+				post_cycles_stop[scnt] = get_cycles();
+			scnt++;
+		}
+
+		if (ccnt < iters) {
+			struct ib_wc wc;
+			int ne;
+
+			if (ccnt < cycle_iters)
+				poll_cycles_start[ccnt] = get_cycles();
+			do {
+				if (ccnt < cycle_iters)
+					last_poll_cycles_start[ccnt] = get_cycles();
+				ne = ib_poll_cq(cb->cq, 1, &wc);
+			} while (ne == 0);
+			if (ccnt < cycle_iters)
+				poll_cycles_stop[ccnt] = get_cycles();
+			++ccnt;
+
+			if (ne < 0) {
+				log(LOG_ERR, "poll CQ failed %d\n", ne);
+				return;
+			}
+			if (wc.status != IB_WC_SUCCESS) {
+				log(LOG_ERR, "Completion wth error at %s:\n",
+					cb->server ? "server" : "client");
+				log(LOG_ERR, "Failed status %d: wr_id %d\n",
+					wc.status, (int) wc.wr_id);
+				log(LOG_ERR, "scnt=%d, rcnt=%d, ccnt=%d\n",
+					scnt, rcnt, ccnt);
+				return;
+			}
+		}
+	}
+	microtime(&stop_tv);
+
+        if (stop_tv.tv_usec < start_tv.tv_usec) {
+                stop_tv.tv_usec += 1000000;
+                stop_tv.tv_sec  -= 1;
+        }
+
+	for (i=0; i < cycle_iters; i++) {
+		sum_post += post_cycles_stop[i] - post_cycles_start[i];
+		sum_poll += poll_cycles_stop[i] - poll_cycles_start[i];
+		sum_last_poll += poll_cycles_stop[i] - last_poll_cycles_start[i];
+	}
+
+	log(LOG_ERR, "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d sum_post %llu sum_poll %llu sum_last_poll %llu\n",
+		stop_tv.tv_sec - start_tv.tv_sec, 
+		stop_tv.tv_usec - start_tv.tv_usec,
+		scnt, cb->size, cycle_iters, 
+		(unsigned long long)sum_post, (unsigned long long)sum_poll, 
+		(unsigned long long)sum_last_poll);
+
+	free_cycle_mem(post_cycles_start, post_cycles_stop, poll_cycles_start, 
+			poll_cycles_stop, last_poll_cycles_start);
+}
+
+static void bw_test(struct krping_cb *cb)
+{
+	int ccnt, scnt, rcnt;
+	int iters=cb->count;
+	ccnt = 0;
+	scnt = 0;
+	rcnt = 0;
+	struct timeval start_tv, stop_tv;
+	cycles_t *post_cycles_start, *post_cycles_stop;
+	cycles_t *poll_cycles_start, *poll_cycles_stop;
+	cycles_t *last_poll_cycles_start;
+	cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0;
+	int i;
+	int cycle_iters = 1000;
+	int err;
+
+	err = alloc_cycle_mem(cycle_iters, &post_cycles_start, &post_cycles_stop,
+				&poll_cycles_start, &poll_cycles_stop, &last_poll_cycles_start);
+			  
+	if (err) {
+		log(LOG_ERR, "%s kmalloc failed\n", __FUNCTION__);
+		return;
+	}
+
+	cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
+	cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
+	cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
+	cb->rdma_sq_wr.sg_list->length = cb->size;
+
+	if (cycle_iters > iters)
+		cycle_iters = iters;
+	microtime(&start_tv);
+	while (scnt < iters || ccnt < iters) {
+
+		while (scnt < iters && scnt - ccnt < cb->txdepth) {
+			struct ib_send_wr *bad_wr;
+
+			if (scnt < cycle_iters)
+				post_cycles_start[scnt] = get_cycles();
+			if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) {
+				log(LOG_ERR,  "Couldn't post send: scnt=%d\n",
+					scnt);
+				return;
+			}
+			if (scnt < cycle_iters)
+				post_cycles_stop[scnt] = get_cycles();
+			++scnt;
+		}
+
+		if (ccnt < iters) {
+			int ne;
+			struct ib_wc wc;
+
+			if (ccnt < cycle_iters)
+				poll_cycles_start[ccnt] = get_cycles();
+			do {
+				if (ccnt < cycle_iters)
+					last_poll_cycles_start[ccnt] = get_cycles();
+				ne = ib_poll_cq(cb->cq, 1, &wc);
+			} while (ne == 0);
+			if (ccnt < cycle_iters)
+				poll_cycles_stop[ccnt] = get_cycles();
+			ccnt += 1;
+
+			if (ne < 0) {
+				log(LOG_ERR, "poll CQ failed %d\n", ne);
+				return;
+			}
+			if (wc.status != IB_WC_SUCCESS) {
+				log(LOG_ERR, "Completion wth error at %s:\n",
+					cb->server ? "server" : "client");
+				log(LOG_ERR, "Failed status %d: wr_id %d\n",
+					wc.status, (int) wc.wr_id);
+				return;
+			}
+		}
+	}
+	microtime(&stop_tv);
+
+        if (stop_tv.tv_usec < start_tv.tv_usec) {
+                stop_tv.tv_usec += 1000000;
+                stop_tv.tv_sec  -= 1;
+        }
+
+	for (i=0; i < cycle_iters; i++) {
+		sum_post += post_cycles_stop[i] - post_cycles_start[i];
+		sum_poll += poll_cycles_stop[i] - poll_cycles_start[i];
+		sum_last_poll += poll_cycles_stop[i] - last_poll_cycles_start[i];
+	}
+
+	log(LOG_ERR, "delta sec %lu delta usec %lu iter %d size %d cycle_iters %d sum_post %llu sum_poll %llu sum_last_poll %llu\n",
+		stop_tv.tv_sec - start_tv.tv_sec, 
+		stop_tv.tv_usec - start_tv.tv_usec,
+		scnt, cb->size, cycle_iters, 
+		(unsigned long long)sum_post, (unsigned long long)sum_poll, 
+		(unsigned long long)sum_last_poll);
+
+	free_cycle_mem(post_cycles_start, post_cycles_stop, poll_cycles_start, 
+			poll_cycles_stop, last_poll_cycles_start);
+}
+
+static void krping_rlat_test_server(struct krping_cb *cb)
+{
+	struct ib_send_wr *bad_wr;
+	struct ib_wc wc;
+	int ret;
+
+	/* Spin waiting for client's Start STAG/TO/Len */
+	while (cb->state < RDMA_READ_ADV) {
+		krping_cq_event_handler(cb->cq, cb);
+	}
+
+	/* Send STAG/TO/Len to client */
+	if (cb->dma_mr)
+		krping_format_send(cb, cb->start_addr, cb->dma_mr);
+	else
+		krping_format_send(cb, cb->start_addr, cb->start_mr);
+	ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+	if (ret) {
+		log(LOG_ERR, "post send error %d\n", ret);
+		return;
+	}
+
+	/* Spin waiting for send completion */
+	while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+	if (ret < 0) {
+		log(LOG_ERR, "poll error %d\n", ret);
+		return;
+	}
+	if (wc.status) {
+		log(LOG_ERR, "send completiong error %d\n", wc.status);
+		return;
+	}
+
+	krping_wait(cb, ERROR);
+}
+
+static void krping_wlat_test_server(struct krping_cb *cb)
+{
+	struct ib_send_wr *bad_wr;
+	struct ib_wc wc;
+	int ret;
+
+	/* Spin waiting for client's Start STAG/TO/Len */
+	while (cb->state < RDMA_READ_ADV) {
+		krping_cq_event_handler(cb->cq, cb);
+	}
+
+	/* Send STAG/TO/Len to client */
+	if (cb->dma_mr)
+		krping_format_send(cb, cb->start_addr, cb->dma_mr);
+	else
+		krping_format_send(cb, cb->start_addr, cb->start_mr);
+	ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+	if (ret) {
+		log(LOG_ERR, "post send error %d\n", ret);
+		return;
+	}
+
+	/* Spin waiting for send completion */
+	while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+	if (ret < 0) {
+		log(LOG_ERR, "poll error %d\n", ret);
+		return;
+	}
+	if (wc.status) {
+		log(LOG_ERR, "send completiong error %d\n", wc.status);
+		return;
+	}
+
+	wlat_test(cb);
+
+}
+
+static void krping_bw_test_server(struct krping_cb *cb)
+{
+	struct ib_send_wr *bad_wr;
+	struct ib_wc wc;
+	int ret;
+
+	/* Spin waiting for client's Start STAG/TO/Len */
+	while (cb->state < RDMA_READ_ADV) {
+		krping_cq_event_handler(cb->cq, cb);
+	}
+
+	/* Send STAG/TO/Len to client */
+	if (cb->dma_mr)
+		krping_format_send(cb, cb->start_addr, cb->dma_mr);
+	else
+		krping_format_send(cb, cb->start_addr, cb->start_mr);
+	ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+	if (ret) {
+		log(LOG_ERR, "post send error %d\n", ret);
+		return;
+	}
+
+	/* Spin waiting for send completion */
+	while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+	if (ret < 0) {
+		log(LOG_ERR, "poll error %d\n", ret);
+		return;
+	}
+	if (wc.status) {
+		log(LOG_ERR, "send completiong error %d\n", wc.status);
+		return;
+	}
+
+	if (cb->duplex)
+		bw_test(cb);
+	krping_wait(cb, ERROR);
+}
+
+static int krping_bind_server(struct krping_cb *cb)
+{
+	struct sockaddr_in sin;
+	int ret;
+
+	memset(&sin, 0, sizeof(sin));
+	sin.sin_len = sizeof sin;
+	sin.sin_family = AF_INET;
+	sin.sin_addr.s_addr = cb->addr.s_addr;
+	sin.sin_port = cb->port;
+
+	ret = rdma_bind_addr(cb->cm_id, (struct sockaddr *) &sin);
+	if (ret) {
+		log(LOG_ERR, "rdma_bind_addr error %d\n", ret);
+		return ret;
+	}
+	DEBUG_LOG(PFX "rdma_bind_addr successful\n");
+
+	DEBUG_LOG(PFX "rdma_listen\n");
+	ret = rdma_listen(cb->cm_id, 3);
+	if (ret) {
+		log(LOG_ERR, "rdma_listen failed: %d\n", ret);
+		return ret;
+	}
+
+	krping_wait(cb, CONNECT_REQUEST);
+	if (cb->state != CONNECT_REQUEST) {
+		log(LOG_ERR,  "wait for CONNECT_REQUEST state %d\n",
+			cb->state);
+		return -1;
+	}
+
+	return 0;
+}
+
+static void krping_run_server(struct krping_cb *cb)
+{
+	struct ib_recv_wr *bad_wr;
+	int ret;
+
+	ret = krping_bind_server(cb);
+	if (ret)
+		return;
+
+	ret = krping_setup_qp(cb, cb->child_cm_id);
+	if (ret) {
+		log(LOG_ERR, "setup_qp failed: %d\n", ret);
+		return;
+	}
+
+	ret = krping_setup_buffers(cb);
+	if (ret) {
+		log(LOG_ERR, "krping_setup_buffers failed: %d\n", ret);
+		goto err1;
+	}
+
+	ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
+	if (ret) {
+		log(LOG_ERR, "ib_post_recv failed: %d\n", ret);
+		goto err2;
+	}
+
+	ret = krping_accept(cb);
+	if (ret) {
+		log(LOG_ERR, "connect error %d\n", ret);
+		goto err2;
+	}
+
+	if (cb->wlat)
+		krping_wlat_test_server(cb);
+	else if (cb->rlat)
+		krping_rlat_test_server(cb);
+	else if (cb->bw)
+		krping_bw_test_server(cb);
+	else
+		krping_test_server(cb);
+
+	rdma_disconnect(cb->child_cm_id);
+	rdma_destroy_id(cb->child_cm_id);
+err2:
+	krping_free_buffers(cb);
+err1:
+	krping_free_qp(cb);
+}
+
+static void krping_test_client(struct krping_cb *cb)
+{
+	int ping, start, cc, i, ret;
+	struct ib_send_wr *bad_wr;
+	unsigned char c;
+
+	start = 65;
+	for (ping = 0; !cb->count || ping < cb->count; ping++) {
+		cb->state = RDMA_READ_ADV;
+
+		/* Put some ascii text in the buffer. */
+		cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping);
+		for (i = cc, c = start; i < cb->size; i++) {
+			cb->start_buf[i] = c;
+			c++;
+			if (c > 122)
+				c = 65;
+		}
+		start++;
+		if (start > 122)
+			start = 65;
+		cb->start_buf[cb->size - 1] = 0;
+
+		if (cb->dma_mr)
+			krping_format_send(cb, cb->start_addr, cb->dma_mr);
+		else
+			krping_format_send(cb, cb->start_addr, cb->start_mr);
+
+		ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+		if (ret) {
+			log(LOG_ERR, "post send error %d\n", ret);
+			break;
+		}
+
+		/* Wait for server to ACK */
+		krping_wait(cb, RDMA_WRITE_ADV);
+		if (cb->state != RDMA_WRITE_ADV) {
+			log(LOG_ERR,  
+			       "wait for RDMA_WRITE_ADV state %d\n",
+			       cb->state);
+			break;
+		}
+
+		if (cb->dma_mr)
+			krping_format_send(cb, cb->rdma_addr, cb->dma_mr);
+		else
+			krping_format_send(cb, cb->rdma_addr, cb->rdma_mr);
+
+		ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+		if (ret) {
+			log(LOG_ERR, "post send error %d\n", ret);
+			break;
+		}
+
+		/* Wait for the server to say the RDMA Write is complete. */
+		krping_wait(cb, RDMA_WRITE_COMPLETE);
+		if (cb->state != RDMA_WRITE_COMPLETE) {
+			log(LOG_ERR,  
+			       "wait for RDMA_WRITE_COMPLETE state %d\n",
+			       cb->state);
+			break;
+		}
+
+		if (cb->validate)
+			if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) {
+				log(LOG_ERR, "data mismatch!\n");
+				break;
+			}
+
+		if (cb->verbose)
+			DEBUG_LOG("ping data: %s\n", cb->rdma_buf);
+	}
+}
+
+static void krping_rlat_test_client(struct krping_cb *cb)
+{
+	struct ib_send_wr *bad_wr;
+	struct ib_wc wc;
+	int ret;
+
+	cb->state = RDMA_READ_ADV;
+
+	/* Send STAG/TO/Len to client */
+	if (cb->dma_mr)
+		krping_format_send(cb, cb->start_addr, cb->dma_mr);
+	else
+		krping_format_send(cb, cb->start_addr, cb->rdma_mr);
+	ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+	if (ret) {
+		log(LOG_ERR, "post send error %d\n", ret);
+		return;
+	}
+
+	/* Spin waiting for send completion */
+	while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+	if (ret < 0) {
+		log(LOG_ERR, "poll error %d\n", ret);
+		return;
+	}
+	if (wc.status) {
+		log(LOG_ERR, "send completion error %d\n", wc.status);
+		return;
+	}
+
+	/* Spin waiting for server's Start STAG/TO/Len */
+	while (cb->state < RDMA_WRITE_ADV) {
+		krping_cq_event_handler(cb->cq, cb);
+	}
+
+#if 0
+{
+	int i;
+	struct timeval start, stop;
+	time_t sec;
+	suseconds_t usec;
+	unsigned long long elapsed;
+	struct ib_wc wc;
+	struct ib_send_wr *bad_wr;
+	int ne;
+	
+	cb->rdma_sq_wr.opcode = IB_WR_RDMA_WRITE;
+	cb->rdma_sq_wr.wr.rdma.rkey = cb->remote_rkey;
+	cb->rdma_sq_wr.wr.rdma.remote_addr = cb->remote_addr;
+	cb->rdma_sq_wr.sg_list->length = 0;
+	cb->rdma_sq_wr.num_sge = 0;
+
+	microtime(&start);
+	for (i=0; i < 100000; i++) {
+		if (ib_post_send(cb->qp, &cb->rdma_sq_wr, &bad_wr)) {
+			log(LOG_ERR,  "Couldn't post send\n");
+			return;
+		}
+		do {
+			ne = ib_poll_cq(cb->cq, 1, &wc);
+		} while (ne == 0);
+		if (ne < 0) {
+			log(LOG_ERR, "poll CQ failed %d\n", ne);
+			return;
+		}
+		if (wc.status != IB_WC_SUCCESS) {
+			log(LOG_ERR, "Completion wth error at %s:\n",
+				cb->server ? "server" : "client");
+			log(LOG_ERR, "Failed status %d: wr_id %d\n",
+				wc.status, (int) wc.wr_id);
+			return;
+		}
+	}
+	microtime(&stop);
+	
+	if (stop.tv_usec < start.tv_usec) {
+		stop.tv_usec += 1000000;
+		stop.tv_sec  -= 1;
+	}
+	sec     = stop.tv_sec - start.tv_sec;
+	usec    = stop.tv_usec - start.tv_usec;
+	elapsed = sec * 1000000 + usec;
+	log(LOG_ERR, "0B-write-lat iters 100000 usec %llu\n", elapsed);
+}
+#endif
+
+	rlat_test(cb);
+}
+
+static void krping_wlat_test_client(struct krping_cb *cb)
+{
+	struct ib_send_wr *bad_wr;
+	struct ib_wc wc;
+	int ret;
+
+	cb->state = RDMA_READ_ADV;
+
+	/* Send STAG/TO/Len to client */
+	if (cb->dma_mr)
+		krping_format_send(cb, cb->start_addr, cb->dma_mr);
+	else
+		krping_format_send(cb, cb->start_addr, cb->start_mr);
+	ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+	if (ret) {
+		log(LOG_ERR, "post send error %d\n", ret);
+		return;
+	}
+
+	/* Spin waiting for send completion */
+	while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+	if (ret < 0) {
+		log(LOG_ERR, "poll error %d\n", ret);
+		return;
+	}
+	if (wc.status) {
+		log(LOG_ERR, "send completion error %d\n", wc.status);
+		return;
+	}
+
+	/* Spin waiting for server's Start STAG/TO/Len */
+	while (cb->state < RDMA_WRITE_ADV) {
+		krping_cq_event_handler(cb->cq, cb);
+	}
+
+	wlat_test(cb);
+}
+
+static void krping_bw_test_client(struct krping_cb *cb)
+{
+	struct ib_send_wr *bad_wr;
+	struct ib_wc wc;
+	int ret;
+
+	cb->state = RDMA_READ_ADV;
+
+	/* Send STAG/TO/Len to client */
+	if (cb->dma_mr)
+		krping_format_send(cb, cb->start_addr, cb->dma_mr);
+	else
+		krping_format_send(cb, cb->start_addr, cb->start_mr);
+	ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr);
+	if (ret) {
+		log(LOG_ERR, "post send error %d\n", ret);
+		return;
+	}
+
+	/* Spin waiting for send completion */
+	while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0));
+	if (ret < 0) {
+		log(LOG_ERR, "poll error %d\n", ret);
+		return;
+	}
+	if (wc.status) {
+		log(LOG_ERR, "send completion error %d\n", wc.status);
+		return;
+	}
+
+	/* Spin waiting for server's Start STAG/TO/Len */
+	while (cb->state < RDMA_WRITE_ADV) {
+		krping_cq_event_handler(cb->cq, cb);
+	}
+
+	bw_test(cb);
+}
+
+static int krping_connect_client(struct krping_cb *cb)
+{
+	struct rdma_conn_param conn_param;
+	int ret;
+
+	memset(&conn_param, 0, sizeof conn_param);
+	conn_param.responder_resources = 1;
+	conn_param.initiator_depth = 1;
+	conn_param.retry_count = 10;
+
+	ret = rdma_connect(cb->cm_id, &conn_param);
+	if (ret) {
+		log(LOG_ERR, "rdma_connect error %d\n", ret);
+		return ret;
+	}
+
+	krping_wait(cb, CONNECTED);
+	if (cb->state == ERROR) {
+		log(LOG_ERR,  "wait for CONNECTED state %d\n", cb->state);
+		return -1;
+	}
+
+	DEBUG_LOG(PFX "rdma_connect successful\n");
+	return 0;
+}
+
+static int krping_bind_client(struct krping_cb *cb)
+{
+	struct sockaddr_in sin;
+	int ret;
+
+	memset(&sin, 0, sizeof(sin));
+	sin.sin_len = sizeof sin;
+	sin.sin_family = AF_INET;
+	sin.sin_addr.s_addr = cb->addr.s_addr;
+	sin.sin_port = cb->port;
+
+	ret = rdma_resolve_addr(cb->cm_id, NULL, (struct sockaddr *) &sin,
+				2000);
+	if (ret) {
+		log(LOG_ERR, "rdma_resolve_addr error %d\n", ret);
+		return ret;
+	}
+
+	krping_wait(cb, ROUTE_RESOLVED);
+	if (cb->state != ROUTE_RESOLVED) {
+		log(LOG_ERR,  
+		       "addr/route resolution did not resolve: state %d\n",
+		       cb->state);
+		return EINTR;
+	}
+
+	DEBUG_LOG(PFX "rdma_resolve_addr - rdma_resolve_route successful\n");
+	return 0;
+}
+
+static void krping_run_client(struct krping_cb *cb)
+{
+	struct ib_recv_wr *bad_wr;
+	int ret;
+
+	ret = krping_bind_client(cb);
+	if (ret)
+		return;
+
+	ret = krping_setup_qp(cb, cb->cm_id);
+	if (ret) {
+		log(LOG_ERR, "setup_qp failed: %d\n", ret);
+		return;
+	}
+
+	ret = krping_setup_buffers(cb);
+	if (ret) {
+		log(LOG_ERR, "krping_setup_buffers failed: %d\n", ret);
+		goto err1;
+	}
+
+	ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
+	if (ret) {
+		log(LOG_ERR, "ib_post_recv failed: %d\n", ret);
+		goto err2;
+	}
+
+	ret = krping_connect_client(cb);
+	if (ret) {
+		log(LOG_ERR, "connect error %d\n", ret);
+		goto err2;
+	}
+
+	if (cb->wlat)
+		krping_wlat_test_client(cb);
+	else if (cb->rlat)
+		krping_rlat_test_client(cb);
+	else if (cb->bw)
+		krping_bw_test_client(cb);
+	else
+		krping_test_client(cb);
+	rdma_disconnect(cb->cm_id);
+err2:
+	krping_free_buffers(cb);
+err1:
+	krping_free_qp(cb);
+}
+
+int krping_doit(char *cmd)
+{
+	struct krping_cb *cb;
+	int op;
+	int ret = 0;
+	char *optarg;
+	unsigned long optint;
+	debug = 0;
+
+	cb = malloc(sizeof(*cb), M_DEVBUF, M_WAITOK);
+	if (!cb)
+		return ENOMEM;
+	bzero(cb, sizeof *cb);
+
+	mtx_lock(&krping_mutex);
+	TAILQ_INSERT_TAIL(&krping_cbs, cb, list);
+	mtx_unlock(&krping_mutex);
+
+	cb->server = -1;
+	cb->state = IDLE;
+	cb->size = 64;
+	cb->txdepth = RPING_SQ_DEPTH;
+	mtx_init(&cb->lock, "krping mtx", NULL, MTX_DUPOK|MTX_DEF);
+
+	while ((op = krping_getopt("krping", &cmd, krping_opts, NULL, &optarg,
+			      &optint)) != 0) {
+		switch (op) {
+		case 'a':
+			cb->addr_str = optarg;
+			DEBUG_LOG(PFX "ipaddr (%s)\n", optarg);
+			if (!inet_aton(optarg, &cb->addr)) {
+				log(LOG_ERR, "bad addr string %s\n", optarg);
+				ret = EINVAL;
+			}
+			break;
+		case 'D':
+			cb->use_dmamr = 1;
+			DEBUG_LOG(PFX "using dma mr\n");
+			break;
+		case 'p':
+			cb->port = htons(optint);
+			DEBUG_LOG(PFX "port %d\n", (int)optint);
+			break;
+		case 'P':
+			cb->poll = 1;
+			DEBUG_LOG("server\n");
+			break;
+		case 's':
+			cb->server = 1;
+			DEBUG_LOG(PFX "server\n");
+			break;
+		case 'c':
+			cb->server = 0;
+			DEBUG_LOG(PFX "client\n");
+			break;
+		case 'S':
+			cb->size = optint;
+			if ((cb->size < 1) ||
+			    (cb->size > RPING_BUFSIZE)) {
+				log(LOG_ERR, "Invalid size %d "
+				       "(valid range is 1 to %d)\n",
+				       cb->size, RPING_BUFSIZE);
+				ret = EINVAL;
+			} else
+				DEBUG_LOG(PFX "size %d\n", (int)optint);
+			break;
+		case 'C':
+			cb->count = optint;
+			if (cb->count < 0) {
+				log(LOG_ERR, "Invalid count %d\n",
+					cb->count);
+				ret = EINVAL;
+			} else
+				DEBUG_LOG(PFX "count %d\n", (int) cb->count);
+			break;
+		case 'v':
+			cb->verbose++;
+			DEBUG_LOG(PFX "verbose\n");
+			break;
+		case 'V':
+			cb->validate++;
+			DEBUG_LOG(PFX "validate data\n");
+			break;
+		case 'L':
+			cb->rlat++;
+			break;
+		case 'l':
+			cb->wlat++;
+			break;
+		case 'B':
+			cb->bw++;
+			break;
+		case 't':
+			cb->txdepth = optint;
+			DEBUG_LOG(PFX "txdepth %d\n", cb->txdepth);
+			break;
+		case 'd':
+			debug++;
+			break;
+		default:
+			log(LOG_ERR, "unknown opt %s\n", optarg);
+			ret = EINVAL;
+			break;
+		}
+	}
+	if (ret)
+		goto out;
+
+	if (cb->server == -1) {
+		log(LOG_ERR, "must be either client or server\n");
+		ret = EINVAL;
+		goto out;
+	}
+	if ((cb->bw + cb->rlat + cb->wlat) > 1) {
+		log(LOG_ERR, "Pick only one test: bw, rlat, wlat\n");
+		ret = EINVAL;
+		goto out;
+	}
+
+
+	cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP);
+	if (IS_ERR(cb->cm_id)) {
+		ret = PTR_ERR(cb->cm_id);
+		log(LOG_ERR, "rdma_create_id error %d\n", ret);
+		goto out;
+	}
+	DEBUG_LOG(PFX "created cm_id %p\n", cb->cm_id);
+	if (cb->server)
+		krping_run_server(cb);
+	else
+		krping_run_client(cb);
+	DEBUG_LOG(PFX "destroy cm_id %p\n", cb->cm_id);
+	rdma_destroy_id(cb->cm_id);
+out:
+	mtx_lock(&krping_mutex);
+	TAILQ_REMOVE(&krping_cbs, cb, list);
+	mtx_unlock(&krping_mutex);
+	free(cb, M_DEVBUF);
+	return ret;
+}
+
+void krping_init(void)
+{
+	mtx_init(&krping_mutex, "krping lock", NULL, MTX_DEF);
+	TAILQ_INIT(&krping_cbs);
+}
diff --git a/sys/contrib/rdma/krping/krping.h b/sys/contrib/rdma/krping/krping.h
new file mode 100644
index 000000000000..8578e7e7979b
--- /dev/null
+++ b/sys/contrib/rdma/krping/krping.h
@@ -0,0 +1,128 @@
+/*
+ * $FreeBSD$
+ */
+#include <contrib/rdma/ib_verbs.h>
+#include <netinet/in.h>
+
+/*
+ * Krping header stuffs...
+ */
+
+struct krping_stats {
+	unsigned send_bytes;
+	unsigned send_msgs;
+	unsigned recv_bytes;
+	unsigned recv_msgs;
+	unsigned write_bytes;
+	unsigned write_msgs;
+	unsigned read_bytes;
+	unsigned read_msgs;
+};
+
+
+/*
+ * These states are used to signal events between the completion handler
+ * and the main client or server thread.
+ *
+ * Once CONNECTED, they cycle through RDMA_READ_ADV, RDMA_WRITE_ADV, 
+ * and RDMA_WRITE_COMPLETE for each ping.
+ */
+enum test_state {
+	IDLE = 1,
+	CONNECT_REQUEST,
+	ADDR_RESOLVED,
+	ROUTE_RESOLVED,
+	CONNECTED,
+	RDMA_READ_ADV,
+	RDMA_READ_COMPLETE,
+	RDMA_WRITE_ADV,
+	RDMA_WRITE_COMPLETE,
+	ERROR
+};
+
+struct krping_rdma_info {
+	uint64_t buf;
+	uint32_t rkey;
+	uint32_t size;
+};
+
+/*
+ * Control block struct.
+ */
+struct krping_cb {
+	int server;			/* 0 iff client */
+	struct ib_cq *cq;
+	struct ib_pd *pd;
+	struct ib_qp *qp;
+	struct ib_mr *dma_mr;
+	int use_dmamr;
+
+	struct ib_recv_wr rq_wr;	/* recv work request record */
+	struct ib_sge recv_sgl;		/* recv single SGE */
+	struct krping_rdma_info recv_buf;/* malloc'd buffer */
+	struct ib_mr *recv_mr;
+
+	struct ib_send_wr sq_wr;	/* send work requrest record */
+	struct ib_sge send_sgl;
+	struct krping_rdma_info send_buf;/* single send buf */
+	struct ib_mr *send_mr;
+
+	struct ib_send_wr rdma_sq_wr;	/* rdma work request record */
+	struct ib_sge rdma_sgl;		/* rdma single SGE */
+	char *rdma_buf;			/* used as rdma sink */
+	u64  rdma_addr;
+	struct ib_mr *rdma_mr;
+
+	uint32_t remote_rkey;		/* remote guys RKEY */
+	uint64_t remote_addr;		/* remote guys TO */
+	uint32_t remote_len;		/* remote guys LEN */
+
+	char *start_buf;		/* rdma read src */
+	u64  start_addr;
+	struct ib_mr *start_mr;
+
+	enum test_state state;		/* used for cond/signalling */
+	struct mtx lock;
+	struct krping_stats stats;
+
+	uint16_t port;			/* dst port in NBO */
+	struct in_addr addr;		/* dst addr in NBO */
+	char *addr_str;			/* dst addr string */
+	int verbose;			/* verbose logging */
+	int count;			/* ping count */
+	int size;			/* ping data size */
+	int validate;			/* validate ping data */
+
+	/* CM stuff */
+	struct rdma_cm_id *cm_id;	/* connection on client side,*/
+					/* listener on service side. */
+	struct rdma_cm_id *child_cm_id;	/* connection on server side */
+	TAILQ_ENTRY(krping_cb) list;	
+	
+	int rlat;			/* run read latency test */
+	int wlat;			/* run write latency test */
+	int bw;				/* run write bw test */
+	int duplex;			/* run write bw full duplex test */
+	int poll;			/* poll vs block in rlat */
+	int txdepth;
+};
+
+static __inline uint64_t
+get_cycles(void)
+{
+	u_int32_t low, high;
+	__asm __volatile("rdtsc" : "=a" (low), "=d" (high));
+	return (low | ((u_int64_t)high << 32)); 
+}
+
+#define htonll(x) htobe64((x))
+#define ntohll(x) be64toh((x))
+
+typedef uint64_t cycles_t;
+
+extern struct mtx krping_mutex;
+TAILQ_HEAD(krping_cb_list, krping_cb);
+extern struct krping_cb_list krping_cbs;
+
+int krping_doit(char *cmd);
+void krping_init(void);
diff --git a/sys/contrib/rdma/krping/krping_dev.c b/sys/contrib/rdma/krping/krping_dev.c
new file mode 100644
index 000000000000..448f19717578
--- /dev/null
+++ b/sys/contrib/rdma/krping/krping_dev.c
@@ -0,0 +1,180 @@
+/*
+ * This code lifted from: 
+ * 	Simple `echo' pseudo-device KLD
+ * 	Murray Stokely
+ * 	Converted to 5.X by Søren (Xride) Straarup
+ */
+
+/*
+ * /bin/echo "server,port=9999,addr=192.168.69.142,validate" > /dev/krping  
+ * /bin/echo "client,port=9999,addr=192.168.69.142,validate" > /dev/krping  
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/module.h>
+#include <sys/systm.h>  /* uprintf */
+#include <sys/errno.h>
+#include <sys/param.h>  /* defines used in kernel.h */
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/conf.h>   /* cdevsw struct */
+#include <sys/uio.h>    /* uio struct */
+#include <sys/malloc.h>
+
+#include "krping.h"
+
+#define BUFFERSIZE 512
+
+/* Function prototypes */
+static d_open_t      krping_open;
+static d_close_t     krping_close;
+static d_read_t      krping_read;
+static d_write_t     krping_write;
+
+/* Character device entry points */
+static struct cdevsw krping_cdevsw = {
+	.d_version = D_VERSION,
+	.d_open = krping_open,
+	.d_close = krping_close,
+	.d_read = krping_read,
+	.d_write = krping_write,
+	.d_name = "krping",
+};
+
+typedef struct s_krping {
+	char msg[BUFFERSIZE];
+	int len;
+} krping_t;
+
+/* vars */
+static struct cdev *krping_dev;
+
+static int
+krping_loader(struct module *m, int what, void *arg)
+{
+	int err = 0;
+
+	switch (what) {
+	case MOD_LOAD:                /* kldload */
+		krping_init();
+		krping_dev = make_dev(&krping_cdevsw, 0, UID_ROOT, GID_WHEEL, 
+					0600, "krping");
+		printf("Krping device loaded.\n");
+		break;
+	case MOD_UNLOAD:
+		destroy_dev(krping_dev);
+		printf("Krping device unloaded.\n");
+		break;
+	default:
+		err = EOPNOTSUPP;
+		break;
+	}
+	return err;
+}
+
+static int
+krping_open(struct cdev *dev, int oflags, int devtype, struct thread *p)
+{
+	int err = 0;
+	return err;
+}
+
+static int
+krping_close(struct cdev *dev, int fflag, int devtype, struct thread *p)
+{
+	return 0;
+}
+
+static int
+krping_read(struct cdev *dev, struct uio *uio, int ioflag)
+{
+	struct krping_cb *cb, *cb2;
+	int num=1;
+	struct krping_cb_list copy_cbs;
+
+	uprintf("krping: %4s %10s %10s %10s %10s %10s %10s %10s %10s %10s\n",
+		"num", "device", "snd bytes", "snd msgs", "rcv bytes", 
+		"rcv msgs", "wr bytes", "wr msgs", "rd bytes", "rd msgs");
+	TAILQ_INIT(&copy_cbs);
+
+	mtx_lock(&krping_mutex);
+	TAILQ_FOREACH(cb, &krping_cbs, list) {
+		cb2 = malloc(sizeof(*cb), M_DEVBUF, M_NOWAIT|M_ZERO);
+		if (!cb2)
+			break;
+		bcopy(cb, cb2, sizeof(*cb));
+		TAILQ_INSERT_TAIL(&copy_cbs, cb2, list);
+	}
+	mtx_unlock(&krping_mutex);
+
+	while (!TAILQ_EMPTY(&copy_cbs)) {
+		
+		cb = TAILQ_FIRST(&copy_cbs);
+		TAILQ_REMOVE(&copy_cbs, cb, list);
+		if (cb->pd) {
+			uprintf("krping: %4d %10s %10u %10u %10u %10u %10u %10u %10u %10u\n",
+			     num++, cb->pd->device->name, cb->stats.send_bytes,
+			     cb->stats.send_msgs, cb->stats.recv_bytes,
+			     cb->stats.recv_msgs, cb->stats.write_bytes,
+			     cb->stats.write_msgs,
+			     cb->stats.read_bytes,
+			     cb->stats.read_msgs);
+		} else {
+			uprintf("krping: %d listen\n", num++);
+		}
+		free(cb, M_DEVBUF);
+	}
+	return 0;
+}
+
+static int
+krping_write(struct cdev *dev, struct uio *uio, int ioflag)
+{
+	int err = 0;
+	int amt;
+	int remain = BUFFERSIZE;
+	char *cp;
+	krping_t *krpingmsg;
+
+	krpingmsg = malloc(sizeof *krpingmsg, M_DEVBUF, M_WAITOK|M_ZERO);
+	if (!krpingmsg) {
+		uprintf("Could not malloc mem!\n");
+		return ENOMEM;
+	}
+
+	cp = krpingmsg->msg;
+	while (uio->uio_resid) {
+		amt = MIN(uio->uio_resid, remain);
+		if (amt == 0)
+			break;
+
+		/* Copy the string in from user memory to kernel memory */
+		err = uiomove(cp, amt, uio);
+		if (err) {
+			uprintf("Write failed: bad address!\n");
+			return err;
+		}
+		cp += amt;
+		remain -= amt;
+	}
+
+	if (uio->uio_resid != 0) {
+		uprintf("Message too big. max size is %d!\n", BUFFERSIZE);
+		return EMSGSIZE;
+	}
+
+	/* null terminate and remove the \n */
+	cp--;
+	*cp = 0;
+	krpingmsg->len = (unsigned long)(cp - krpingmsg->msg);
+	uprintf("krping: write string = |%s|\n", krpingmsg->msg);
+	err = krping_doit(krpingmsg->msg);
+	free(krpingmsg, M_DEVBUF);
+	return(err);
+}
+
+MODULE_DEPEND(krping, rdma_core, 1, 1, 1);
+MODULE_DEPEND(krping, rdma_cma, 1, 1, 1);
+DEV_MODULE(krping,krping_loader,NULL);
diff --git a/sys/contrib/rdma/rdma_addr.c b/sys/contrib/rdma/rdma_addr.c
new file mode 100644
index 000000000000..e052b807a7d9
--- /dev/null
+++ b/sys/contrib/rdma/rdma_addr.c
@@ -0,0 +1,408 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/condvar.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/module.h>
+
+#include <sys/lock.h>
+#include <sys/condvar.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_var.h>
+#include <net/if_arp.h>
+#include <net/route.h>
+
+#include <net80211/ieee80211_freebsd.h>
+
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+
+#include <contrib/rdma/ib_addr.h>
+
+struct addr_req {
+	TAILQ_ENTRY(addr_req) entry;
+	struct sockaddr src_addr;
+	struct sockaddr dst_addr;
+	struct rdma_dev_addr *addr;
+	struct rdma_addr_client *client;
+	void *context;
+	void (*callback)(int status, struct sockaddr *src_addr,
+			 struct rdma_dev_addr *addr, void *context);
+	unsigned long timeout;
+	int status;
+};
+
+static void process_req(void *ctx, int pending);
+
+static struct mtx lock;
+
+static TAILQ_HEAD(addr_req_list, addr_req) req_list;
+static struct task addr_task;
+static struct taskqueue *addr_taskq;
+static struct callout addr_ch;
+static eventhandler_tag route_event_tag;
+
+static void addr_timeout(void *arg)
+{
+	taskqueue_enqueue(addr_taskq, &addr_task);
+}
+
+void rdma_addr_register_client(struct rdma_addr_client *client)
+{
+	mtx_init(&client->lock, "rdma_addr client lock", NULL, MTX_DUPOK|MTX_DEF);
+	cv_init(&client->comp, "rdma_addr cv");
+	client->refcount = 1;
+}
+
+static inline void put_client(struct rdma_addr_client *client)
+{
+	mtx_lock(&client->lock);
+	if (--client->refcount == 0) {
+		cv_broadcast(&client->comp);
+	}
+	mtx_unlock(&client->lock);
+}
+
+void rdma_addr_unregister_client(struct rdma_addr_client *client)
+{
+	put_client(client);
+	mtx_lock(&client->lock);
+	if (client->refcount) {
+		cv_wait(&client->comp, &client->lock);
+	}
+	mtx_unlock(&client->lock);
+}
+
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
+		     const unsigned char *dst_dev_addr)
+{
+	dev_addr->dev_type = RDMA_NODE_RNIC;
+	memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), MAX_ADDR_LEN);
+	memcpy(dev_addr->broadcast, dev->if_broadcastaddr, MAX_ADDR_LEN);
+	if (dst_dev_addr)
+		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
+	return 0;
+}
+
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
+{
+	struct ifaddr *ifa;
+	struct sockaddr_in *sin = (struct sockaddr_in *)addr;
+	uint16_t port = sin->sin_port;
+	
+	sin->sin_port = 0;
+	ifa = ifa_ifwithaddr(addr);
+	sin->sin_port = port;
+	if (!ifa)
+		return (EADDRNOTAVAIL);
+	return rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL);
+}
+
+static void queue_req(struct addr_req *req)
+{
+	struct addr_req *tmp_req = NULL;
+	
+	mtx_lock(&lock);
+	TAILQ_FOREACH_REVERSE(tmp_req, &req_list, addr_req_list, entry)
+	    if (time_after_eq(req->timeout, tmp_req->timeout))
+		    break;
+	
+	if (tmp_req)
+		TAILQ_INSERT_AFTER(&req_list, tmp_req, req, entry);
+	else
+		TAILQ_INSERT_TAIL(&req_list, req, entry);
+	
+	if (TAILQ_FIRST(&req_list) == req)	
+		callout_reset(&addr_ch, req->timeout - ticks, addr_timeout, NULL);
+	mtx_unlock(&lock);
+}
+
+#ifdef needed
+static void addr_send_arp(struct sockaddr_in *dst_in)
+{
+	struct route iproute;
+	struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst;
+	char dmac[ETHER_ADDR_LEN];
+
+	bzero(&iproute, sizeof iproute);
+	*dst = *dst_in;
+
+	rtalloc(&iproute);
+	if (iproute.ro_rt == NULL);
+		return;
+
+	arpresolve(iproute.ro_rt->rt_ifp, iproute.ro_rt, NULL, 
+		   rt_key(iproute.ro_rt), dmac);
+
+	RTFREE(iproute.ro_rt);
+}
+#endif
+
+static int addr_resolve_remote(struct sockaddr_in *src_in,
+			       struct sockaddr_in *dst_in,
+			       struct rdma_dev_addr *addr)
+{
+	int ret = 0;
+	struct route iproute;
+	struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst;
+	char dmac[ETHER_ADDR_LEN];
+
+	bzero(&iproute, sizeof iproute);
+	*dst = *dst_in;
+
+	rtalloc(&iproute);
+	if (iproute.ro_rt == NULL) {
+		ret = EHOSTUNREACH;
+		goto out;
+	}
+
+	/* If the device does ARP internally, return 'done' */
+	if (iproute.ro_rt->rt_ifp->if_flags & IFF_NOARP) {
+		rdma_copy_addr(addr, iproute.ro_rt->rt_ifp, NULL);
+		goto put;
+	}
+ 	ret = arpresolve(iproute.ro_rt->rt_ifp, iproute.ro_rt, NULL, 
+		rt_key(iproute.ro_rt), dmac);
+	if (ret) {
+		goto put;
+	}
+
+	if (!src_in->sin_addr.s_addr) {
+		src_in->sin_len = sizeof *src_in;
+		src_in->sin_family = dst_in->sin_family;
+		src_in->sin_addr.s_addr = ((struct sockaddr_in *)iproute.ro_rt->rt_ifa->ifa_addr)->sin_addr.s_addr;
+	}
+
+	ret = rdma_copy_addr(addr, iproute.ro_rt->rt_ifp, dmac);
+put:
+	RTFREE(iproute.ro_rt);
+out:
+	return ret;
+}
+
+static void process_req(void *ctx, int pending)
+{
+	struct addr_req *req, *tmp_req;
+	struct sockaddr_in *src_in, *dst_in;
+	TAILQ_HEAD(, addr_req) done_list;
+
+	TAILQ_INIT(&done_list);
+
+	mtx_lock(&lock);
+	TAILQ_FOREACH_SAFE(req, &req_list, entry, tmp_req) {
+		if (req->status == EWOULDBLOCK) {
+			src_in = (struct sockaddr_in *) &req->src_addr;
+			dst_in = (struct sockaddr_in *) &req->dst_addr;
+			req->status = addr_resolve_remote(src_in, dst_in,
+							  req->addr);
+			if (req->status && time_after_eq(ticks, req->timeout))
+				req->status = ETIMEDOUT;
+			else if (req->status == EWOULDBLOCK)
+				continue;
+		}
+		TAILQ_REMOVE(&req_list, req, entry);
+		TAILQ_INSERT_TAIL(&done_list, req, entry);
+	}
+
+	if (!TAILQ_EMPTY(&req_list)) {
+		req = TAILQ_FIRST(&req_list);
+		callout_reset(&addr_ch, req->timeout - ticks, addr_timeout, 
+			NULL);
+	}
+	mtx_unlock(&lock);
+
+	TAILQ_FOREACH_SAFE(req, &done_list, entry, tmp_req) {
+		TAILQ_REMOVE(&done_list, req, entry);
+		req->callback(req->status, &req->src_addr, req->addr,
+			      req->context);
+		put_client(req->client);
+		free(req, M_DEVBUF);
+	}
+}
+
+int rdma_resolve_ip(struct rdma_addr_client *client,
+		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
+		    struct rdma_dev_addr *addr, int timeout_ms,
+		    void (*callback)(int status, struct sockaddr *src_addr,
+				     struct rdma_dev_addr *addr, void *context),
+		    void *context)
+{
+	struct sockaddr_in *src_in, *dst_in;
+	struct addr_req *req;
+	int ret = 0;
+
+	req = malloc(sizeof *req, M_DEVBUF, M_NOWAIT);
+	if (!req)
+		return (ENOMEM);
+	memset(req, 0, sizeof *req);
+
+	if (src_addr)
+		memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
+	memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
+	req->addr = addr;
+	req->callback = callback;
+	req->context = context;
+	req->client = client;
+	mtx_lock(&client->lock);
+	client->refcount++;
+	mtx_unlock(&client->lock);
+
+	src_in = (struct sockaddr_in *) &req->src_addr;
+	dst_in = (struct sockaddr_in *) &req->dst_addr;
+
+	req->status = addr_resolve_remote(src_in, dst_in, addr);
+
+	switch (req->status) {
+	case 0:
+		req->timeout = ticks;
+		queue_req(req);
+		break;
+	case EWOULDBLOCK:
+		req->timeout = msecs_to_ticks(timeout_ms) + ticks;
+		queue_req(req);
+#ifdef needed
+		addr_send_arp(dst_in);
+#endif
+		break;
+	default:
+		ret = req->status;
+		mtx_lock(&client->lock);
+		client->refcount--;
+		mtx_unlock(&client->lock);
+		free(req, M_DEVBUF);
+		break;
+	}
+	return ret;
+}
+
+void rdma_addr_cancel(struct rdma_dev_addr *addr)
+{
+	struct addr_req *req, *tmp_req;
+
+	mtx_lock(&lock);
+	TAILQ_FOREACH_SAFE(req, &req_list, entry, tmp_req) {
+		if (req->addr == addr) {
+			req->status = ECANCELED;
+			req->timeout = ticks;
+			TAILQ_REMOVE(&req_list, req, entry);
+			TAILQ_INSERT_HEAD(&req_list, req, entry);
+			callout_reset(&addr_ch, req->timeout - ticks, addr_timeout, NULL);
+			break;
+		}
+	}
+	mtx_unlock(&lock);
+}
+
+static void
+route_event_arp_update(void *unused, struct rtentry *rt0, uint8_t *enaddr,
+	struct sockaddr *sa)
+{
+		callout_stop(&addr_ch);
+		taskqueue_enqueue(addr_taskq, &addr_task);
+}
+
+static int addr_init(void)
+{
+	TAILQ_INIT(&req_list);
+	mtx_init(&lock, "rdma_addr req_list lock", NULL, MTX_DEF);
+
+	addr_taskq = taskqueue_create("rdma_addr_taskq", M_NOWAIT,
+		taskqueue_thread_enqueue, &addr_taskq);
+        if (addr_taskq == NULL) {
+                printf("failed to allocate rdma_addr taskqueue\n");
+                return (ENOMEM);
+        }
+        taskqueue_start_threads(&addr_taskq, 1, PI_NET, "rdma_addr taskq");
+        TASK_INIT(&addr_task, 0, process_req, NULL);
+
+	callout_init(&addr_ch, TRUE);
+
+	route_event_tag = EVENTHANDLER_REGISTER(route_arp_update_event, 
+		route_event_arp_update, NULL, EVENTHANDLER_PRI_ANY);
+
+	return 0;
+}
+
+static void addr_cleanup(void)
+{
+	EVENTHANDLER_DEREGISTER(route_event_arp_update, route_event_tag);
+	callout_stop(&addr_ch);
+	taskqueue_drain(addr_taskq, &addr_task);
+	taskqueue_free(addr_taskq);
+}
+
+static int 
+addr_load(module_t mod, int cmd, void *arg)
+{
+        int err = 0;
+
+        switch (cmd) {
+        case MOD_LOAD:
+                printf("Loading rdma_addr.\n");
+
+                addr_init();
+                break;
+        case MOD_QUIESCE:
+                break;
+        case MOD_UNLOAD:
+                printf("Unloading rdma_addr.\n");
+		addr_cleanup();
+                break;
+        case MOD_SHUTDOWN:
+                break;
+        default:
+                err = EOPNOTSUPP;
+                break;
+        }
+
+        return (err);
+}
+
+static moduledata_t mod_data = {
+	"rdma_addr",
+	addr_load,
+	0
+};
+
+MODULE_VERSION(rdma_addr, 1);
+DECLARE_MODULE(rdma_addr, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
diff --git a/sys/contrib/rdma/rdma_cache.c b/sys/contrib/rdma/rdma_cache.c
new file mode 100644
index 000000000000..dced8ebd0110
--- /dev/null
+++ b/sys/contrib/rdma/rdma_cache.c
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: cache.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/taskqueue.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/mutex.h>
+#include <sys/module.h>
+#include <sys/syslog.h>
+
+
+#ifdef needed
+#include <sys/condvar.h>
+#include <sys/socket.h>
+#include <sys/condvar.h>
+#endif
+
+#include <contrib/rdma/ib_cache.h>
+
+#include "core_priv.h"
+
+struct ib_pkey_cache {
+	int             table_len;
+	u16             table[0];
+};
+
+struct ib_gid_cache {
+	int             table_len;
+	union ib_gid    table[0];
+};
+
+struct ib_update_work {
+	struct task 	  task;
+	struct ib_device  *device;
+	u8                 port_num;
+};
+
+static inline int start_port(struct ib_device *device)
+{
+	return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
+}
+
+static inline int end_port(struct ib_device *device)
+{
+	return (device->node_type == RDMA_NODE_IB_SWITCH) ?
+		0 : device->phys_port_cnt;
+}
+
+int ib_get_cached_gid(struct ib_device *device,
+		      u8                port_num,
+		      int               index,
+		      union ib_gid     *gid)
+{
+	struct ib_gid_cache *cache;
+	int ret = 0;
+
+	if (port_num < start_port(device) || port_num > end_port(device))
+		return -EINVAL;
+
+	mtx_lock(&device->cache.lock);
+
+	cache = device->cache.gid_cache[port_num - start_port(device)];
+
+	if (index < 0 || index >= cache->table_len)
+		ret = -EINVAL;
+	else
+		*gid = cache->table[index];
+
+	mtx_unlock(&device->cache.lock);
+
+	return ret;
+}
+
+int ib_find_cached_gid(struct ib_device *device,
+		       union ib_gid	*gid,
+		       u8               *port_num,
+		       u16              *index)
+{
+	struct ib_gid_cache *cache;
+	int p, i;
+	int ret = -ENOENT;
+
+	*port_num = -1;
+	if (index)
+		*index = -1;
+
+	mtx_lock(&device->cache.lock);
+
+	for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+		cache = device->cache.gid_cache[p];
+		for (i = 0; i < cache->table_len; ++i) {
+			if (!memcmp(gid, &cache->table[i], 6)) { /* XXX */
+				*port_num = p + start_port(device);
+				if (index)
+					*index = i;
+				ret = 0;
+				goto found;
+			}
+		}
+	}
+found:
+	mtx_unlock(&device->cache.lock);
+
+	return ret;
+}
+
+int ib_get_cached_pkey(struct ib_device *device,
+		       u8                port_num,
+		       int               index,
+		       u16              *pkey)
+{
+	struct ib_pkey_cache *cache;
+	int ret = 0;
+
+	if (port_num < start_port(device) || port_num > end_port(device))
+		return -EINVAL;
+
+	mtx_lock(&device->cache.lock);
+
+	cache = device->cache.pkey_cache[port_num - start_port(device)];
+
+	if (index < 0 || index >= cache->table_len)
+		ret = -EINVAL;
+	else
+		*pkey = cache->table[index];
+
+	mtx_unlock(&device->cache.lock);
+
+	return ret;
+}
+
+int ib_find_cached_pkey(struct ib_device *device,
+			u8                port_num,
+			u16               pkey,
+			u16              *index)
+{
+	struct ib_pkey_cache *cache;
+	int i;
+	int ret = -ENOENT;
+
+	if (port_num < start_port(device) || port_num > end_port(device))
+		return -EINVAL;
+
+	mtx_lock(&device->cache.lock);
+
+	cache = device->cache.pkey_cache[port_num - start_port(device)];
+
+	*index = -1;
+
+	for (i = 0; i < cache->table_len; ++i)
+		if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
+			*index = i;
+			ret = 0;
+			break;
+		}
+
+	mtx_unlock(&device->cache.lock);
+
+	return ret;
+}
+
+int ib_get_cached_lmc(struct ib_device *device,
+		      u8                port_num,
+		      u8                *lmc)
+{
+	int ret = 0;
+
+	if (port_num < start_port(device) || port_num > end_port(device))
+		return -EINVAL;
+
+	mtx_lock(&device->cache.lock);
+	*lmc = device->cache.lmc_cache[port_num - start_port(device)];
+	mtx_unlock(&device->cache.lock);
+
+	return ret;
+}
+
+static void ib_cache_update(struct ib_device *device,
+			    u8                port)
+{
+	struct ib_port_attr       *tprops = NULL;
+	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
+	struct ib_gid_cache       *gid_cache = NULL, *old_gid_cache;
+	int                        i;
+	int                        ret;
+
+	tprops = malloc(sizeof *tprops, M_DEVBUF, M_NOWAIT);
+	if (!tprops)
+		return;
+
+	ret = ib_query_port(device, port, tprops);
+	if (ret) {
+		log(LOG_WARNING, "ib_query_port failed (%d) for %s\n",
+		       ret, device->name);
+		goto err;
+	}
+
+	pkey_cache = malloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
+			     sizeof *pkey_cache->table, M_DEVBUF, M_NOWAIT);
+	if (!pkey_cache)
+		goto err;
+
+	pkey_cache->table_len = tprops->pkey_tbl_len;
+
+	gid_cache = malloc(sizeof *gid_cache + tprops->gid_tbl_len *
+			    sizeof *gid_cache->table, M_DEVBUF, M_NOWAIT);
+	if (!gid_cache)
+		goto err;
+
+	gid_cache->table_len = tprops->gid_tbl_len;
+
+	for (i = 0; i < pkey_cache->table_len; ++i) {
+		ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
+		if (ret) {
+			log(LOG_WARNING, "ib_query_pkey failed (%d) for %s (index %d)\n",
+			       ret, device->name, i);
+			goto err;
+		}
+	}
+
+	for (i = 0; i < gid_cache->table_len; ++i) {
+		ret = ib_query_gid(device, port, i, gid_cache->table + i);
+		if (ret) {
+			log(LOG_WARNING, "ib_query_gid failed (%d) for %s (index %d)\n",
+			       ret, device->name, i);
+			goto err;
+		}
+	}
+
+	mtx_lock(&device->cache.lock);
+
+	old_pkey_cache = device->cache.pkey_cache[port - start_port(device)];
+	old_gid_cache  = device->cache.gid_cache [port - start_port(device)];
+
+	device->cache.pkey_cache[port - start_port(device)] = pkey_cache;
+	device->cache.gid_cache [port - start_port(device)] = gid_cache;
+
+	device->cache.lmc_cache[port - start_port(device)] = tprops->lmc;
+
+	mtx_unlock(&device->cache.lock);
+
+	free(old_pkey_cache, M_DEVBUF);
+	free(old_gid_cache, M_DEVBUF);
+	free(tprops, M_DEVBUF);
+	return;
+
+err:
+	free(pkey_cache, M_DEVBUF);
+	free(gid_cache, M_DEVBUF);
+	free(tprops, M_DEVBUF);
+}
+
+static void ib_cache_task(void *context, int pending)
+{
+	struct ib_update_work *work = context;
+
+	ib_cache_update(work->device, work->port_num);
+	free(work, M_DEVBUF);
+}
+
+static void ib_cache_event(struct ib_event_handler *handler,
+			   struct ib_event *event)
+{
+	struct ib_update_work *work;
+
+	if (event->event == IB_EVENT_PORT_ERR    ||
+	    event->event == IB_EVENT_PORT_ACTIVE ||
+	    event->event == IB_EVENT_LID_CHANGE  ||
+	    event->event == IB_EVENT_PKEY_CHANGE ||
+	    event->event == IB_EVENT_SM_CHANGE   ||
+	    event->event == IB_EVENT_CLIENT_REREGISTER) {
+		work = malloc(sizeof *work, M_DEVBUF, M_NOWAIT);
+		if (work) {
+			TASK_INIT(&work->task, 0, ib_cache_task, work);
+			work->device   = event->device;
+			work->port_num = event->element.port_num;
+			taskqueue_enqueue(taskqueue_thread, &work->task);
+		}
+	}
+}
+
+static void ib_cache_setup_one(struct ib_device *device)
+{
+	int p;
+
+	mtx_init(&device->cache.lock, "ib device cache", NULL, 
+		MTX_DUPOK|MTX_DEF);
+
+	device->cache.pkey_cache =
+		malloc(sizeof *device->cache.pkey_cache *
+			(end_port(device) - start_port(device) + 1), M_DEVBUF, 
+			M_NOWAIT);
+	device->cache.gid_cache =
+		malloc(sizeof *device->cache.gid_cache *
+			(end_port(device) - start_port(device) + 1), M_DEVBUF, 
+			M_NOWAIT);
+
+	device->cache.lmc_cache = malloc(sizeof *device->cache.lmc_cache *
+					  (end_port(device) -
+					  start_port(device) + 1),
+					  M_DEVBUF, M_NOWAIT);
+
+	if (!device->cache.pkey_cache || !device->cache.gid_cache ||
+	    !device->cache.lmc_cache) {
+		log(LOG_WARNING, "Couldn't allocate cache "
+		       "for %s\n", device->name);
+		goto err;
+	}
+
+	for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+		device->cache.pkey_cache[p] = NULL;
+		device->cache.gid_cache [p] = NULL;
+		ib_cache_update(device, p + start_port(device));
+	}
+
+	INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
+			      device, ib_cache_event);
+	if (ib_register_event_handler(&device->cache.event_handler))
+		goto err_cache;
+
+	return;
+
+err_cache:
+	for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+		free(device->cache.pkey_cache[p], M_DEVBUF);
+		free(device->cache.gid_cache[p], M_DEVBUF);
+	}
+
+err:
+	free(device->cache.pkey_cache, M_DEVBUF);
+	free(device->cache.gid_cache, M_DEVBUF);
+	free(device->cache.lmc_cache, M_DEVBUF);
+}
+
+static void ib_cache_cleanup_one(struct ib_device *device)
+{
+	int p;
+
+	ib_unregister_event_handler(&device->cache.event_handler);
+#ifdef XXX
+	flush_scheduled_work();
+#endif
+
+	for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+		free(device->cache.pkey_cache[p], M_DEVBUF);
+		free(device->cache.gid_cache[p], M_DEVBUF);
+	}
+
+	free(device->cache.pkey_cache, M_DEVBUF);
+	free(device->cache.gid_cache, M_DEVBUF);
+	free(device->cache.lmc_cache, M_DEVBUF);
+}
+
+static struct ib_client cache_client = {
+	.name   = "cache",
+	.add    = ib_cache_setup_one,
+	.remove = ib_cache_cleanup_one
+};
+
+int ib_cache_setup(void)
+{
+	return ib_register_client(&cache_client);
+}
+
+void ib_cache_cleanup(void)
+{
+	ib_unregister_client(&cache_client);
+}
diff --git a/sys/contrib/rdma/rdma_cm.h b/sys/contrib/rdma/rdma_cm.h
new file mode 100644
index 000000000000..1b30d04435d5
--- /dev/null
+++ b/sys/contrib/rdma/rdma_cm.h
@@ -0,0 +1,318 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * $FreeBSD$
+ */
+
+#if !defined(RDMA_CM_H)
+#define RDMA_CM_H
+
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <contrib/rdma/ib_addr.h>
+#include <contrib/rdma/ib_sa.h>
+
+/*
+ * Upon receiving a device removal event, users must destroy the associated
+ * RDMA identifier and release all resources allocated with the device.
+ */
+enum rdma_cm_event_type {
+	RDMA_CM_EVENT_ADDR_RESOLVED,
+	RDMA_CM_EVENT_ADDR_ERROR,
+	RDMA_CM_EVENT_ROUTE_RESOLVED,
+	RDMA_CM_EVENT_ROUTE_ERROR,
+	RDMA_CM_EVENT_CONNECT_REQUEST,
+	RDMA_CM_EVENT_CONNECT_RESPONSE,
+	RDMA_CM_EVENT_CONNECT_ERROR,
+	RDMA_CM_EVENT_UNREACHABLE,
+	RDMA_CM_EVENT_REJECTED,
+	RDMA_CM_EVENT_ESTABLISHED,
+	RDMA_CM_EVENT_DISCONNECTED,
+	RDMA_CM_EVENT_DEVICE_REMOVAL,
+	RDMA_CM_EVENT_MULTICAST_JOIN,
+	RDMA_CM_EVENT_MULTICAST_ERROR
+};
+
+enum rdma_port_space {
+	RDMA_PS_SDP  = 0x0001,
+	RDMA_PS_IPOIB= 0x0002,
+	RDMA_PS_TCP  = 0x0106,
+	RDMA_PS_UDP  = 0x0111,
+	RDMA_PS_SCTP = 0x0183
+};
+
+struct rdma_addr {
+	struct sockaddr src_addr;
+	u8		src_pad[sizeof(struct sockaddr_in6) -
+				sizeof(struct sockaddr)];
+	struct sockaddr dst_addr;
+	u8		dst_pad[sizeof(struct sockaddr_in6) -
+				sizeof(struct sockaddr)];
+	struct rdma_dev_addr dev_addr;
+};
+
+struct rdma_route {
+	struct rdma_addr addr;
+	struct ib_sa_path_rec *path_rec;
+	int num_paths;
+};
+
+struct rdma_conn_param {
+	const void *private_data;
+	u8 private_data_len;
+	u8 responder_resources;
+	u8 initiator_depth;
+	u8 flow_control;
+	u8 retry_count;		/* ignored when accepting */
+	u8 rnr_retry_count;
+	/* Fields below ignored if a QP is created on the rdma_cm_id. */
+	u8 srq;
+	u32 qp_num;
+};
+
+struct rdma_ud_param {
+	const void *private_data;
+	u8 private_data_len;
+	struct ib_ah_attr ah_attr;
+	u32 qp_num;
+	u32 qkey;
+};
+
+struct rdma_cm_event {
+	enum rdma_cm_event_type	 event;
+	int			 status;
+	union {
+		struct rdma_conn_param	conn;
+		struct rdma_ud_param	ud;
+	} param;
+};
+
+struct rdma_cm_id;
+
+/**
+ * rdma_cm_event_handler - Callback used to report user events.
+ *
+ * Notes: Users may not call rdma_destroy_id from this callback to destroy
+ *   the passed in id, or a corresponding listen id.  Returning a
+ *   non-zero value from the callback will destroy the passed in id.
+ */
+typedef int (*rdma_cm_event_handler)(struct rdma_cm_id *id,
+				     struct rdma_cm_event *event);
+
+struct rdma_cm_id {
+	struct ib_device	*device;
+	void			*context;
+	struct ib_qp		*qp;
+	rdma_cm_event_handler	 event_handler;
+	struct rdma_route	 route;
+	enum rdma_port_space	 ps;
+	u8			 port_num;
+};
+
+/**
+ * rdma_create_id - Create an RDMA identifier.
+ *
+ * @event_handler: User callback invoked to report events associated with the
+ *   returned rdma_id.
+ * @context: User specified context associated with the id.
+ * @ps: RDMA port space.
+ */
+struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
+				  void *context, enum rdma_port_space ps);
+
+/**
+  * rdma_destroy_id - Destroys an RDMA identifier.
+  *
+  * @id: RDMA identifier.
+  *
+  * Note: calling this function has the effect of canceling in-flight
+  * asynchronous operations associated with the id.
+  */
+void rdma_destroy_id(struct rdma_cm_id *id);
+
+/**
+ * rdma_bind_addr - Bind an RDMA identifier to a source address and
+ *   associated RDMA device, if needed.
+ *
+ * @id: RDMA identifier.
+ * @addr: Local address information.  Wildcard values are permitted.
+ *
+ * This associates a source address with the RDMA identifier before calling
+ * rdma_listen.  If a specific local address is given, the RDMA identifier will
+ * be bound to a local RDMA device.
+ */
+int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr);
+
+/**
+ * rdma_resolve_addr - Resolve destination and optional source addresses
+ *   from IP addresses to an RDMA address.  If successful, the specified
+ *   rdma_cm_id will be bound to a local device.
+ *
+ * @id: RDMA identifier.
+ * @src_addr: Source address information.  This parameter may be NULL.
+ * @dst_addr: Destination address information.
+ * @timeout_ms: Time to wait for resolution to complete.
+ */
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+		      struct sockaddr *dst_addr, int timeout_ms);
+
+/**
+ * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier
+ *   into route information needed to establish a connection.
+ *
+ * This is called on the client side of a connection.
+ * Users must have first called rdma_resolve_addr to resolve a dst_addr
+ * into an RDMA address before calling this routine.
+ */
+int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms);
+
+/**
+ * rdma_create_qp - Allocate a QP and associate it with the specified RDMA
+ * identifier.
+ *
+ * QPs allocated to an rdma_cm_id will automatically be transitioned by the CMA
+ * through their states.
+ */
+int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
+		   struct ib_qp_init_attr *qp_init_attr);
+
+/**
+ * rdma_destroy_qp - Deallocate the QP associated with the specified RDMA
+ * identifier.
+ *
+ * Users must destroy any QP associated with an RDMA identifier before
+ * destroying the RDMA ID.
+ */
+void rdma_destroy_qp(struct rdma_cm_id *id);
+
+/**
+ * rdma_init_qp_attr - Initializes the QP attributes for use in transitioning
+ *   to a specified QP state.
+ * @id: Communication identifier associated with the QP attributes to
+ *   initialize.
+ * @qp_attr: On input, specifies the desired QP state.  On output, the
+ *   mandatory and desired optional attributes will be set in order to
+ *   modify the QP to the specified state.
+ * @qp_attr_mask: The QP attribute mask that may be used to transition the
+ *   QP to the specified state.
+ *
+ * Users must set the @qp_attr->qp_state to the desired QP state.  This call
+ * will set all required attributes for the given transition, along with
+ * known optional attributes.  Users may override the attributes returned from
+ * this call before calling ib_modify_qp.
+ *
+ * Users that wish to have their QP automatically transitioned through its
+ * states can associate a QP with the rdma_cm_id by calling rdma_create_qp().
+ */
+int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask);
+
+/**
+ * rdma_connect - Initiate an active connection request.
+ * @id: Connection identifier to connect.
+ * @conn_param: Connection information used for connected QPs.
+ *
+ * Users must have resolved a route for the rdma_cm_id to connect with
+ * by having called rdma_resolve_route before calling this routine.
+ *
+ * This call will either connect to a remote QP or obtain remote QP
+ * information for unconnected rdma_cm_id's.  The actual operation is
+ * based on the rdma_cm_id's port space.
+ */
+int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
+
+/**
+ * rdma_listen - This function is called by the passive side to
+ *   listen for incoming connection requests.
+ *
+ * Users must have bound the rdma_cm_id to a local address by calling
+ * rdma_bind_addr before calling this routine.
+ */
+int rdma_listen(struct rdma_cm_id *id, int backlog);
+
+/**
+ * rdma_accept - Called to accept a connection request or response.
+ * @id: Connection identifier associated with the request.
+ * @conn_param: Information needed to establish the connection.  This must be
+ *   provided if accepting a connection request.  If accepting a connection
+ *   response, this parameter must be NULL.
+ *
+ * Typically, this routine is only called by the listener to accept a connection
+ * request.  It must also be called on the active side of a connection if the
+ * user is performing their own QP transitions.
+ *
+ * In the case of error, a reject message is sent to the remote side and the
+ * state of the qp associated with the id is modified to error, such that any
+ * previously posted receive buffers would be flushed.
+ */
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
+
+/**
+ * rdma_notify - Notifies the RDMA CM of an asynchronous event that has
+ * occurred on the connection.
+ * @id: Connection identifier to transition to established.
+ * @event: Asynchronous event.
+ *
+ * This routine should be invoked by users to notify the CM of relevant
+ * communication events.  Events that should be reported to the CM and
+ * when to report them are:
+ *
+ * IB_EVENT_COMM_EST - Used when a message is received on a connected
+ *    QP before an RTU has been received.
+ */
+int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event);
+
+/**
+ * rdma_reject - Called to reject a connection request or response.
+ */
+int rdma_reject(struct rdma_cm_id *id, const void *private_data,
+		u8 private_data_len);
+
+/**
+ * rdma_disconnect - This function disconnects the associated QP and
+ *   transitions it into the error state.
+ */
+int rdma_disconnect(struct rdma_cm_id *id);
+
+/**
+ * rdma_join_multicast - Join the multicast group specified by the given
+ *   address.
+ * @id: Communication identifier associated with the request.
+ * @addr: Multicast address identifying the group to join.
+ * @context: User-defined context associated with the join request, returned
+ * to the user through the private_data pointer in multicast events.
+ */
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+			void *context);
+
+/**
+ * rdma_leave_multicast - Leave the multicast group specified by the given
+ *   address.
+ */
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
+
+#endif /* RDMA_CM_H */
diff --git a/sys/contrib/rdma/rdma_cm_ib.h b/sys/contrib/rdma/rdma_cm_ib.h
new file mode 100644
index 000000000000..b69f66613bd9
--- /dev/null
+++ b/sys/contrib/rdma/rdma_cm_ib.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * $FreeBSD$
+ */
+
+#if !defined(RDMA_CM_IB_H)
+#define RDMA_CM_IB_H
+
+#include <rdma/rdma_cm.h>
+
+/**
+ * rdma_set_ib_paths - Manually sets the path records used to establish a
+ *   connection.
+ * @id: Connection identifier associated with the request.
+ * @path_rec: Reference to the path record
+ *
+ * This call permits a user to specify routing information for rdma_cm_id's
+ * bound to Infiniband devices.  It is called on the client side of a
+ * connection and replaces the call to rdma_resolve_route.
+ */
+int rdma_set_ib_paths(struct rdma_cm_id *id,
+		      struct ib_sa_path_rec *path_rec, int num_paths);
+
+/* Global qkey for UDP QPs and multicast groups. */
+#define RDMA_UDP_QKEY 0x01234567
+
+#endif /* RDMA_CM_IB_H */
diff --git a/sys/contrib/rdma/rdma_cma.c b/sys/contrib/rdma/rdma_cma.c
new file mode 100644
index 000000000000..8dddf6ce157d
--- /dev/null
+++ b/sys/contrib/rdma/rdma_cma.c
@@ -0,0 +1,2998 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/condvar.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/libkern.h>
+#include <sys/socket.h>
+#include <sys/module.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/priv.h>
+#include <sys/syslog.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+
+#include <contrib/rdma/rdma_cm.h>
+#include <contrib/rdma/ib_cache.h>
+#include <contrib/rdma/ib_cm.h>
+#include <contrib/rdma/ib_sa.h>
+#include <contrib/rdma/iw_cm.h>
+
+#define CMA_CM_RESPONSE_TIMEOUT 20
+#define CMA_MAX_CM_RETRIES 15
+
+static void cma_add_one(struct ib_device *device);
+static void cma_remove_one(struct ib_device *device);
+
+static struct ib_client cma_client = {
+	.name   = "cma",
+	.add    = cma_add_one,
+	.remove = cma_remove_one
+};
+
+#ifdef IB_SUPPORTED
+static struct ib_sa_client sa_client;
+#endif
+static struct rdma_addr_client addr_client;
+static TAILQ_HEAD(, cma_device) dev_list;
+static LIST_HEAD(, rdma_id_private) listen_any_list;
+static struct mtx lock;
+static struct taskqueue *cma_wq;
+static DEFINE_KVL(sdp_ps);
+static DEFINE_KVL(tcp_ps);
+static DEFINE_KVL(udp_ps);
+static DEFINE_KVL(ipoib_ps);
+static int next_port;
+
+struct cma_device {
+	struct ib_device	*device;
+	struct mtx		lock;
+	struct cv		comp;
+	int			refcount;
+
+	LIST_HEAD(, rdma_id_private) id_list;
+	TAILQ_ENTRY(cma_device) list;
+};
+
+enum cma_state {
+	CMA_IDLE,
+	CMA_ADDR_QUERY,
+	CMA_ADDR_RESOLVED,
+	CMA_ROUTE_QUERY,
+	CMA_ROUTE_RESOLVED,
+	CMA_CONNECT,
+	CMA_DISCONNECT,
+	CMA_ADDR_BOUND,
+	CMA_LISTEN,
+	CMA_DEVICE_REMOVAL,
+	CMA_DESTROYING
+};
+
+struct rdma_bind_list {
+	struct kvl		*ps;
+	TAILQ_HEAD(, rdma_id_private) owners;
+	unsigned short		port;
+};
+
+/*
+ * Device removal can occur at anytime, so we need extra handling to
+ * serialize notifying the user of device removal with other callbacks.
+ * We do this by disabling removal notification while a callback is in process,
+ * and reporting it after the callback completes.
+ */
+struct rdma_id_private {
+	struct rdma_cm_id	id;
+
+	struct rdma_bind_list	*bind_list;
+	struct socket		*so;
+	TAILQ_ENTRY(rdma_id_private) node;
+	LIST_ENTRY(rdma_id_private) list; /* listen_any_list or cma_dev.list */
+	LIST_HEAD(, rdma_id_private) listen_list; /* per-device listens */
+	LIST_ENTRY(rdma_id_private) listen_entry; 
+	struct cma_device	*cma_dev;
+#ifdef IB_SUPPORTED	
+	LIST_HEAD(, cma_multicast) mc_list;
+#endif
+	enum cma_state		state;
+	struct mtx		lock;
+	struct cv		comp;
+	int			refcount;
+	struct cv		wait_remove;
+	int			dev_remove;
+
+	int			backlog;
+	int			timeout_ms;
+	struct ib_sa_query	*query;
+	int			query_id;
+	union {
+		struct ib_cm_id	*ib;
+		struct iw_cm_id	*iw;
+	} cm_id;
+
+	u32			seq_num;
+	u32			qkey;
+	u32			qp_num;
+	u8			srq;
+};
+
+#ifdef IB_SUPPORTED
+struct cma_multicast {
+	struct rdma_id_private *id_priv;
+	union {
+		struct ib_sa_multicast *ib;
+	} multicast;
+	struct list_head	list;
+	void			*context;
+	struct sockaddr		addr;
+	u8			pad[sizeof(struct sockaddr_in6) -
+				    sizeof(struct sockaddr)];
+};
+#endif
+
+struct cma_work {
+	struct task		task;
+	struct rdma_id_private	*id;
+	enum cma_state		old_state;
+	enum cma_state		new_state;
+	struct rdma_cm_event	event;
+};
+
+union cma_ip_addr {
+	struct in6_addr ip6;
+	struct {
+		__u32 pad[3];
+		__u32 addr;
+	} ip4;
+};
+
+struct cma_hdr {
+	u8 cma_version;
+	u8 ip_version;	/* IP version: 7:4 */
+	__u16 port;
+	union cma_ip_addr src_addr;
+	union cma_ip_addr dst_addr;
+};
+
+struct sdp_hh {
+	u8 bsdh[16];
+	u8 sdp_version; /* Major version: 7:4 */
+	u8 ip_version;	/* IP version: 7:4 */
+	u8 sdp_specific1[10];
+	__u16 port;
+	__u16 sdp_specific2;
+	union cma_ip_addr src_addr;
+	union cma_ip_addr dst_addr;
+};
+
+struct sdp_hah {
+	u8 bsdh[16];
+	u8 sdp_version;
+};
+
+#define CMA_VERSION 0x00
+#define SDP_MAJ_VERSION 0x2
+
+static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
+{
+	int ret;
+
+	mtx_lock(&id_priv->lock);
+	ret = (id_priv->state == comp);
+	mtx_unlock(&id_priv->lock);
+	return ret;
+}
+
+static int cma_comp_exch(struct rdma_id_private *id_priv,
+			 enum cma_state comp, enum cma_state exch)
+{
+	int ret;
+
+	mtx_lock(&id_priv->lock);
+	if ((ret = (id_priv->state == comp)))
+		id_priv->state = exch;
+	mtx_unlock(&id_priv->lock);
+	return ret;
+}
+
+static enum cma_state cma_exch(struct rdma_id_private *id_priv,
+			       enum cma_state exch)
+{
+	enum cma_state old;
+
+	mtx_lock(&id_priv->lock);
+	old = id_priv->state;
+	id_priv->state = exch;
+	mtx_unlock(&id_priv->lock);
+	return old;
+}
+
+static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
+{
+	return hdr->ip_version >> 4;
+}
+
+static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
+{
+	hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
+}
+
+static inline u8 sdp_get_majv(u8 sdp_version)
+{
+	return sdp_version >> 4;
+}
+
+static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
+{
+	return hh->ip_version >> 4;
+}
+
+static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
+{
+	hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
+}
+
+static inline int cma_is_ud_ps(enum rdma_port_space ps)
+{
+	return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
+}
+
+static void cma_attach_to_dev(struct rdma_id_private *id_priv,
+			      struct cma_device *cma_dev)
+{
+	mtx_lock(&cma_dev->lock);
+	cma_dev->refcount++;
+	mtx_unlock(&cma_dev->lock);
+	id_priv->cma_dev = cma_dev;
+	id_priv->id.device = cma_dev->device;
+	LIST_INSERT_HEAD(&cma_dev->id_list, id_priv, list);
+}
+
+static inline void cma_deref_dev(struct cma_device *cma_dev)
+{
+	mtx_lock(&cma_dev->lock);
+	if (--cma_dev->refcount == 0)
+		cv_broadcast(&cma_dev->comp);
+	mtx_unlock(&cma_dev->lock);
+}
+
+static void cma_detach_from_dev(struct rdma_id_private *id_priv)
+{
+	LIST_REMOVE(id_priv, list);
+	cma_deref_dev(id_priv->cma_dev);
+	id_priv->cma_dev = NULL;
+}
+
+#ifdef IB_SUPPORTED
+static int cma_set_qkey(struct ib_device *device, u8 port_num,
+			enum rdma_port_space ps,
+			struct rdma_dev_addr *dev_addr, u32 *qkey)
+{
+	struct ib_sa_mcmember_rec rec;
+	int ret = 0;
+
+	switch (ps) {
+	case RDMA_PS_UDP:
+		*qkey = RDMA_UDP_QKEY;
+		break;
+	case RDMA_PS_IPOIB:
+		ib_addr_get_mgid(dev_addr, &rec.mgid);
+		ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec);
+		*qkey = be32_to_cpu(rec.qkey);
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+#endif
+
+static int cma_acquire_dev(struct rdma_id_private *id_priv)
+{
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	struct cma_device *cma_dev;
+	union ib_gid gid;
+	int ret = ENODEV;
+
+	switch (rdma_node_get_transport(dev_addr->dev_type)) {
+#ifdef IB_SUPPORTED
+	case RDMA_TRANSPORT_IB:
+		ib_addr_get_sgid(dev_addr, &gid);
+		break;
+#endif
+	case RDMA_TRANSPORT_IWARP:
+		iw_addr_get_sgid(dev_addr, &gid);
+		break;
+	default:
+		return (ENODEV);
+	}
+
+	TAILQ_FOREACH(cma_dev, &dev_list, list) {
+		ret = ib_find_cached_gid(cma_dev->device, &gid,
+					 &id_priv->id.port_num, NULL);
+		if (!ret) {
+#ifdef IB_SUPPORTED
+			ret = cma_set_qkey(cma_dev->device,
+					   id_priv->id.port_num,
+					   id_priv->id.ps, dev_addr,
+					   &id_priv->qkey);
+			if (!ret)
+#endif
+				cma_attach_to_dev(id_priv, cma_dev);
+			break;
+		}
+	}
+	return ret;
+}
+
+static void cma_deref_id(struct rdma_id_private *id_priv)
+{
+	mtx_lock(&id_priv->lock);
+	if (--id_priv->refcount == 0) {
+		cv_broadcast(&id_priv->comp);
+	}
+	mtx_unlock(&id_priv->lock);
+}
+
+static int cma_disable_remove(struct rdma_id_private *id_priv,
+			      enum cma_state state)
+{
+	int ret;
+
+	mtx_lock(&id_priv->lock);
+	if (id_priv->state == state) {
+		id_priv->dev_remove++;
+		ret = 0;
+	} else
+		ret = EINVAL;
+	mtx_unlock(&id_priv->lock);
+	return ret;
+}
+
+static void cma_enable_remove(struct rdma_id_private *id_priv)
+{
+	mtx_lock(&id_priv->lock);
+	if (--id_priv->dev_remove == 0)
+		cv_broadcast(&id_priv->wait_remove);
+	mtx_unlock(&id_priv->lock);
+}
+
+static int cma_has_cm_dev(struct rdma_id_private *id_priv)
+{
+	return (id_priv->id.device && id_priv->cm_id.ib);
+}
+
+struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
+				  void *context, enum rdma_port_space ps)
+{
+	struct rdma_id_private *id_priv;
+
+	id_priv = malloc(sizeof *id_priv, M_DEVBUF, M_NOWAIT);
+	if (!id_priv)
+		return ERR_PTR(-ENOMEM);
+	bzero(id_priv, sizeof *id_priv);
+
+	id_priv->state = CMA_IDLE;
+	id_priv->id.context = context;
+	id_priv->id.event_handler = event_handler;
+	id_priv->id.ps = ps;
+	mtx_init(&id_priv->lock, "rdma_cm_id_priv", NULL, MTX_DUPOK|MTX_DEF);
+	cv_init(&id_priv->comp, "rdma_cm_id_priv");
+	id_priv->refcount = 1;
+	cv_init(&id_priv->wait_remove, "id priv wait remove");
+	LIST_INIT(&id_priv->listen_list);
+	arc4rand(&id_priv->seq_num, sizeof id_priv->seq_num, 0);
+
+	return &id_priv->id;
+}
+
+static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+	int qp_attr_mask, ret;
+
+	qp_attr.qp_state = IB_QPS_INIT;
+	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
+	if (ret)
+		return ret;
+
+	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+	if (ret)
+		return ret;
+
+	qp_attr.qp_state = IB_QPS_RTR;
+	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+	if (ret)
+		return ret;
+
+	qp_attr.qp_state = IB_QPS_RTS;
+	qp_attr.sq_psn = 0;
+	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
+
+	return ret;
+}
+
+static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+	int qp_attr_mask, ret;
+
+	qp_attr.qp_state = IB_QPS_INIT;
+	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
+	if (ret)
+		return ret;
+
+	return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+}
+
+int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
+		   struct ib_qp_init_attr *qp_init_attr)
+{
+	struct rdma_id_private *id_priv;
+	struct ib_qp *qp;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (id->device != pd->device)
+		return (EINVAL);
+
+	qp = ib_create_qp(pd, qp_init_attr);
+	if (IS_ERR(qp))
+		return PTR_ERR(qp);
+	if (cma_is_ud_ps(id_priv->id.ps))
+		ret = cma_init_ud_qp(id_priv, qp);
+	else
+		ret = cma_init_conn_qp(id_priv, qp);
+	if (ret)
+		goto err;
+
+	id->qp = qp;
+	id_priv->qp_num = qp->qp_num;
+	id_priv->srq = (qp->srq != NULL);
+	return 0;
+err:
+	ib_destroy_qp(qp);
+	return ret;
+}
+
+void rdma_destroy_qp(struct rdma_cm_id *id)
+{
+	ib_destroy_qp(id->qp);
+}
+
+static int cma_modify_qp_rtr(struct rdma_cm_id *id)
+{
+	struct ib_qp_attr qp_attr;
+	int qp_attr_mask, ret;
+
+	if (!id->qp)
+		return 0;
+
+	/* Need to update QP attributes from default values. */
+	qp_attr.qp_state = IB_QPS_INIT;
+	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+	if (ret)
+		return ret;
+
+	ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+	if (ret)
+		return ret;
+
+	qp_attr.qp_state = IB_QPS_RTR;
+	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+	if (ret)
+		return ret;
+
+	return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+}
+
+#ifdef IB_SUPPORTED
+static int cma_modify_qp_rts(struct rdma_cm_id *id)
+{
+	struct ib_qp_attr qp_attr;
+	int qp_attr_mask, ret;
+
+	if (!id->qp)
+		return 0;
+
+	qp_attr.qp_state = IB_QPS_RTS;
+	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+	if (ret)
+		return ret;
+
+	return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+}
+#endif
+
+static int cma_modify_qp_err(struct rdma_cm_id *id)
+{
+	struct ib_qp_attr qp_attr;
+
+	if (!id->qp)
+		return 0;
+
+	qp_attr.qp_state = IB_QPS_ERR;
+	return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
+}
+
+#ifdef IB_SUPPORTED
+static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
+			       struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+{
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	int ret;
+
+	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
+				  ib_addr_get_pkey(dev_addr),
+				  &qp_attr->pkey_index);
+	if (ret)
+		return ret;
+
+	qp_attr->port_num = id_priv->id.port_num;
+	*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
+
+	if (cma_is_ud_ps(id_priv->id.ps)) {
+		qp_attr->qkey = id_priv->qkey;
+		*qp_attr_mask |= IB_QP_QKEY;
+	} else {
+		qp_attr->qp_access_flags = 0;
+		*qp_attr_mask |= IB_QP_ACCESS_FLAGS;
+	}
+	return 0;
+}
+#endif
+
+int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask)
+{
+	struct rdma_id_private *id_priv;
+	int ret = 0;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+#ifdef IB_SUPPORTED
+	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
+			ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
+		else
+			ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
+						 qp_attr_mask);
+		if (qp_attr->qp_state == IB_QPS_RTR)
+			qp_attr->rq_psn = id_priv->seq_num;
+		break;
+	case RDMA_TRANSPORT_IWARP:
+#endif
+		if (!id_priv->cm_id.iw) {
+			qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+			*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
+		} else
+			ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
+						 qp_attr_mask);
+#ifdef IB_SUPPORTED
+		break;
+	default:
+		ret = ENOSYS;
+		break;
+	}
+#endif
+
+	return ret;
+}
+
+static inline int cma_zero_addr(struct sockaddr *addr)
+{
+	struct in6_addr *ip6;
+
+	if (addr->sa_family == AF_INET)
+		return in_nullhost(((struct sockaddr_in *) addr)->sin_addr);
+	else {
+		ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
+		return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
+			ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
+	}
+}
+
+static inline int cma_loopback_addr(struct sockaddr *addr)
+{
+	return ((struct sockaddr_in *)addr)->sin_addr.s_addr == INADDR_LOOPBACK;
+}
+
+static inline int cma_any_addr(struct sockaddr *addr)
+{
+	return cma_zero_addr(addr) || cma_loopback_addr(addr);
+}
+
+static inline __be16 cma_port(struct sockaddr *addr)
+{
+	if (addr->sa_family == AF_INET)
+		return ((struct sockaddr_in *) addr)->sin_port;
+	else
+		return ((struct sockaddr_in6 *) addr)->sin6_port;
+}
+
+static inline int cma_any_port(struct sockaddr *addr)
+{
+	return !cma_port(addr);
+}
+
+#ifdef IB_SUPPORTED
+static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
+			    u8 *ip_ver, __u16 *port,
+			    union cma_ip_addr **src, union cma_ip_addr **dst)
+{
+	switch (ps) {
+	case RDMA_PS_SDP:
+		if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
+		    SDP_MAJ_VERSION)
+			return (EINVAL);
+
+		*ip_ver	= sdp_get_ip_ver(hdr);
+		*port	= ((struct sdp_hh *) hdr)->port;
+		*src	= &((struct sdp_hh *) hdr)->src_addr;
+		*dst	= &((struct sdp_hh *) hdr)->dst_addr;
+		break;
+	default:
+		if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
+			return (EINVAL);
+
+		*ip_ver	= cma_get_ip_ver(hdr);
+		*port	= ((struct cma_hdr *) hdr)->port;
+		*src	= &((struct cma_hdr *) hdr)->src_addr;
+		*dst	= &((struct cma_hdr *) hdr)->dst_addr;
+		break;
+	}
+
+	if (*ip_ver != 4 && *ip_ver != 6)
+		return (EINVAL);
+	return 0;
+}
+
+static void cma_save_net_info(struct rdma_addr *addr,
+			      struct rdma_addr *listen_addr,
+			      u8 ip_ver, __u16 port,
+			      union cma_ip_addr *src, union cma_ip_addr *dst)
+{
+	struct sockaddr_in *listen4, *ip4;
+	struct sockaddr_in6 *listen6, *ip6;
+
+	switch (ip_ver) {
+	case 4:
+		listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
+		ip4 = (struct sockaddr_in *) &addr->src_addr;
+		ip4->sin_family = listen4->sin_family;
+		ip4->sin_addr.s_addr = dst->ip4.addr;
+		ip4->sin_port = listen4->sin_port;
+
+		ip4 = (struct sockaddr_in *) &addr->dst_addr;
+		ip4->sin_family = listen4->sin_family;
+		ip4->sin_addr.s_addr = src->ip4.addr;
+		ip4->sin_port = port;
+		break;
+	case 6:
+		listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
+		ip6 = (struct sockaddr_in6 *) &addr->src_addr;
+		ip6->sin6_family = listen6->sin6_family;
+		ip6->sin6_addr = dst->ip6;
+		ip6->sin6_port = listen6->sin6_port;
+
+		ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
+		ip6->sin6_family = listen6->sin6_family;
+		ip6->sin6_addr = src->ip6;
+		ip6->sin6_port = port;
+		break;
+	default:
+		break;
+	}
+}
+#endif
+
+static inline int cma_user_data_offset(enum rdma_port_space ps)
+{
+	switch (ps) {
+	case RDMA_PS_SDP:
+		return 0;
+	default:
+		return sizeof(struct cma_hdr);
+	}
+}
+
+static void cma_cancel_route(struct rdma_id_private *id_priv)
+{
+#ifdef IB_SUPPORTED
+	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		if (id_priv->query)
+			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
+		break;
+	default:
+		break;
+	}
+#endif
+}
+
+static inline int cma_internal_listen(struct rdma_id_private *id_priv)
+{
+	return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
+	       cma_any_addr(&id_priv->id.route.addr.src_addr);
+}
+
+static void cma_destroy_listen(struct rdma_id_private *id_priv)
+{
+	cma_exch(id_priv, CMA_DESTROYING);
+
+	if (id_priv->cma_dev) {
+#ifdef IB_SUPPORTED
+		switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+		case RDMA_TRANSPORT_IB:
+			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+				ib_destroy_cm_id(id_priv->cm_id.ib);
+			break;
+		case RDMA_TRANSPORT_IWARP:
+#endif
+			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+				iw_destroy_cm_id(id_priv->cm_id.iw);
+#ifdef IB_SUPPORTED
+			break;
+		default:
+			break;
+		}
+#endif
+		cma_detach_from_dev(id_priv);
+	}
+	LIST_REMOVE(id_priv, listen_entry);
+
+	cma_deref_id(id_priv);
+	mtx_lock(&id_priv->lock);
+	if (id_priv->refcount)
+		cv_wait(&id_priv->comp, &id_priv->lock);
+	mtx_unlock(&id_priv->lock);
+
+	free(id_priv, M_DEVBUF);
+}
+
+static void cma_cancel_listens(struct rdma_id_private *id_priv)
+{
+	struct rdma_id_private *dev_id_priv;
+
+	mtx_lock(&lock);
+	LIST_REMOVE(id_priv, list);
+
+	while (!LIST_EMPTY(&id_priv->listen_list)) {
+		dev_id_priv = LIST_FIRST(&id_priv->listen_list);
+		cma_destroy_listen(dev_id_priv);
+	}
+	mtx_unlock(&lock);
+}
+
+static void cma_cancel_operation(struct rdma_id_private *id_priv,
+				 enum cma_state state)
+{
+	switch (state) {
+	case CMA_ADDR_QUERY:
+		rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
+		break;
+	case CMA_ROUTE_QUERY:
+		cma_cancel_route(id_priv);
+		break;
+	case CMA_LISTEN:
+		if (cma_any_addr(&id_priv->id.route.addr.src_addr) &&
+		    !id_priv->cma_dev)
+			cma_cancel_listens(id_priv);
+		break;
+	default:
+		break;
+	}
+}
+
+static void cma_release_port(struct rdma_id_private *id_priv)
+{
+	struct rdma_bind_list *bind_list = id_priv->bind_list;
+
+	if (!bind_list)
+		return;
+
+	mtx_lock(&lock);
+	TAILQ_REMOVE(&bind_list->owners, id_priv, node);
+	if (TAILQ_EMPTY(&bind_list->owners)) {
+		kvl_delete(bind_list->ps, bind_list->port);
+		free(bind_list, M_DEVBUF);
+	}
+	mtx_unlock(&lock);
+	if (id_priv->so)
+		soclose(id_priv->so);
+}
+
+#ifdef IB_SUPPORTED
+static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
+{
+	struct cma_multicast *mc;
+
+	while (!LIST_EMPTY(&id_priv->mc_list)) {
+		mc = LIST_FIRST(&id_priv->mc_list);
+		LIST_REMOVE(mc, list);
+		ib_sa_free_multicast(mc->multicast.ib);
+		free(mc, M_DEVBUF);
+	}
+}
+#endif
+
+void rdma_destroy_id(struct rdma_cm_id *id)
+{
+	struct rdma_id_private *id_priv;
+	enum cma_state state;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	state = cma_exch(id_priv, CMA_DESTROYING);
+	cma_cancel_operation(id_priv, state);
+
+	mtx_lock(&lock);
+	if (id_priv->cma_dev) {
+		mtx_unlock(&lock);
+#ifdef IB_SUPPORTED
+		switch (rdma_node_get_transport(id->device->node_type)) {
+		case RDMA_TRANSPORT_IB:
+			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+				ib_destroy_cm_id(id_priv->cm_id.ib);
+			break;
+		case RDMA_TRANSPORT_IWARP:
+#endif
+			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+				iw_destroy_cm_id(id_priv->cm_id.iw);
+#ifdef IB_SUPPORTED
+			break;
+		default:
+			break;
+		}
+		cma_leave_mc_groups(id_priv);
+#endif
+		mtx_lock(&lock);
+		cma_detach_from_dev(id_priv);
+	}
+	mtx_unlock(&lock);
+	cma_release_port(id_priv);
+	cma_deref_id(id_priv);
+	mtx_lock(&id_priv->lock);
+	PANIC_IF(id_priv->refcount < 0);
+	if (id_priv->refcount)
+		cv_wait(&id_priv->comp, &id_priv->lock);
+	mtx_unlock(&id_priv->lock);
+	free(id_priv->id.route.path_rec, M_DEVBUF);
+	free(id_priv, M_DEVBUF);
+}
+
+#ifdef IB_SUPPORTED
+static int cma_rep_recv(struct rdma_id_private *id_priv)
+{
+	int ret;
+
+	ret = cma_modify_qp_rtr(&id_priv->id);
+	if (ret)
+		goto reject;
+
+	ret = cma_modify_qp_rts(&id_priv->id);
+	if (ret)
+		goto reject;
+
+	ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
+	if (ret)
+		goto reject;
+
+	return 0;
+reject:
+	cma_modify_qp_err(&id_priv->id);
+	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
+		       NULL, 0, NULL, 0);
+	return ret;
+}
+
+static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
+{
+	if (id_priv->id.ps == RDMA_PS_SDP &&
+	    sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
+	    SDP_MAJ_VERSION)
+		return (EINVAL);
+
+	return 0;
+}
+
+static void cma_set_rep_event_data(struct rdma_cm_event *event,
+				   struct ib_cm_rep_event_param *rep_data,
+				   void *private_data)
+{
+	event->param.conn.private_data = private_data;
+	event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
+	event->param.conn.responder_resources = rep_data->responder_resources;
+	event->param.conn.initiator_depth = rep_data->initiator_depth;
+	event->param.conn.flow_control = rep_data->flow_control;
+	event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
+	event->param.conn.srq = rep_data->srq;
+	event->param.conn.qp_num = rep_data->remote_qpn;
+}
+
+static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
+{
+	struct rdma_id_private *id_priv = cm_id->context;
+	struct rdma_cm_event event;
+	int ret = 0;
+
+	if (cma_disable_remove(id_priv, CMA_CONNECT))
+		return 0;
+
+	memset(&event, 0, sizeof event);
+	switch (ib_event->event) {
+	case IB_CM_REQ_ERROR:
+	case IB_CM_REP_ERROR:
+		event.event = RDMA_CM_EVENT_UNREACHABLE;
+		event.status = ETIMEDOUT;
+		break;
+	case IB_CM_REP_RECEIVED:
+		event.status = cma_verify_rep(id_priv, ib_event->private_data);
+		if (event.status)
+			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
+		else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
+			event.status = cma_rep_recv(id_priv);
+			event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
+						     RDMA_CM_EVENT_ESTABLISHED;
+		} else
+			event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
+		cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
+				       ib_event->private_data);
+		break;
+	case IB_CM_RTU_RECEIVED:
+	case IB_CM_USER_ESTABLISHED:
+		event.event = RDMA_CM_EVENT_ESTABLISHED;
+		break;
+	case IB_CM_DREQ_ERROR:
+		event.status = ETIMEDOUT; /* fall through */
+	case IB_CM_DREQ_RECEIVED:
+	case IB_CM_DREP_RECEIVED:
+		if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
+			goto out;
+		event.event = RDMA_CM_EVENT_DISCONNECTED;
+		break;
+	case IB_CM_TIMEWAIT_EXIT:
+	case IB_CM_MRA_RECEIVED:
+		/* ignore event */
+		goto out;
+	case IB_CM_REJ_RECEIVED:
+		cma_modify_qp_err(&id_priv->id);
+		event.status = ib_event->param.rej_rcvd.reason;
+		event.event = RDMA_CM_EVENT_REJECTED;
+		event.param.conn.private_data = ib_event->private_data;
+		event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
+		break;
+	default:
+		log(LOG_ERR, "RDMA CMA: unexpected IB CM event: %d",
+		       ib_event->event);
+		goto out;
+	}
+
+	ret = id_priv->id.event_handler(&id_priv->id, &event);
+	if (ret) {
+		/* Destroy the CM ID by returning a non-zero value. */
+		id_priv->cm_id.ib = NULL;
+		cma_exch(id_priv, CMA_DESTROYING);
+		cma_enable_remove(id_priv);
+		rdma_destroy_id(&id_priv->id);
+		return ret;
+	}
+out:
+	cma_enable_remove(id_priv);
+	return ret;
+}
+
+static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
+					       struct ib_cm_event *ib_event)
+{
+	struct rdma_id_private *id_priv;
+	struct rdma_cm_id *id;
+	struct rdma_route *rt;
+	union cma_ip_addr *src, *dst;
+	__u16 port;
+	u8 ip_ver;
+
+	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
+			     &ip_ver, &port, &src, &dst))
+		goto err;
+
+	id = rdma_create_id(listen_id->event_handler, listen_id->context,
+			    listen_id->ps);
+	if (IS_ERR(id))
+		goto err;
+
+	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
+			  ip_ver, port, src, dst);
+
+	rt = &id->route;
+	rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
+	rt->path_rec = malloc(sizeof *rt->path_rec * rt->num_paths,
+			       M_DEVBUF, M_NOWAIT);
+	if (!rt->path_rec)
+		goto destroy_id;
+
+	rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
+	if (rt->num_paths == 2)
+		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
+
+	ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
+	ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
+	ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
+	rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	id_priv->state = CMA_CONNECT;
+	return id_priv;
+
+destroy_id:
+	rdma_destroy_id(id);
+err:
+	return NULL;
+}
+
+static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
+					      struct ib_cm_event *ib_event)
+{
+	struct rdma_id_private *id_priv;
+	struct rdma_cm_id *id;
+	union cma_ip_addr *src, *dst;
+	__u16 port;
+	u8 ip_ver;
+	int ret;
+
+	id = rdma_create_id(listen_id->event_handler, listen_id->context,
+			    listen_id->ps);
+	if (IS_ERR(id))
+		return NULL;
+
+
+	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
+			     &ip_ver, &port, &src, &dst))
+		goto err;
+
+	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
+			  ip_ver, port, src, dst);
+
+	ret = rdma_translate_ip(&id->route.addr.src_addr,
+				&id->route.addr.dev_addr);
+	if (ret)
+		goto err;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	id_priv->state = CMA_CONNECT;
+	return id_priv;
+err:
+	rdma_destroy_id(id);
+	return NULL;
+}
+
+static void cma_set_req_event_data(struct rdma_cm_event *event,
+				   struct ib_cm_req_event_param *req_data,
+				   void *private_data, int offset)
+{
+	event->param.conn.private_data = private_data + offset;
+	event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
+	event->param.conn.responder_resources = req_data->responder_resources;
+	event->param.conn.initiator_depth = req_data->initiator_depth;
+	event->param.conn.flow_control = req_data->flow_control;
+	event->param.conn.retry_count = req_data->retry_count;
+	event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
+	event->param.conn.srq = req_data->srq;
+	event->param.conn.qp_num = req_data->remote_qpn;
+}
+
+static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
+{
+	struct rdma_id_private *listen_id, *conn_id;
+	struct rdma_cm_event event;
+	int offset, ret;
+
+	listen_id = cm_id->context;
+	if (cma_disable_remove(listen_id, CMA_LISTEN))
+		return (ECONNABORTED);
+
+	memset(&event, 0, sizeof event);
+	offset = cma_user_data_offset(listen_id->id.ps);
+	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
+	if (cma_is_ud_ps(listen_id->id.ps)) {
+		conn_id = cma_new_udp_id(&listen_id->id, ib_event);
+		event.param.ud.private_data = ib_event->private_data + offset;
+		event.param.ud.private_data_len =
+				IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
+	} else {
+		conn_id = cma_new_conn_id(&listen_id->id, ib_event);
+		cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
+				       ib_event->private_data, offset);
+	}
+	if (!conn_id) {
+		ret = ENOMEM;
+		goto out;
+	}
+
+	mtx_lock(&conn_id->lock);
+	conn_id->dev_remove++;
+	mtx_unlock(&conn_id->lock);
+	mtx_lock(&lock);
+	ret = cma_acquire_dev(conn_id);
+	mtx_unlock(&lock);
+	if (ret)
+		goto release_conn_id;
+
+	conn_id->cm_id.ib = cm_id;
+	cm_id->context = conn_id;
+	cm_id->cm_handler = cma_ib_handler;
+
+	ret = conn_id->id.event_handler(&conn_id->id, &event);
+	if (!ret)
+		goto out;
+
+	/* Destroy the CM ID by returning a non-zero value. */
+	conn_id->cm_id.ib = NULL;
+
+release_conn_id:
+	cma_exch(conn_id, CMA_DESTROYING);
+	cma_enable_remove(conn_id);
+	rdma_destroy_id(&conn_id->id);
+
+out:
+	cma_enable_remove(listen_id);
+	return ret;
+}
+
+static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
+{
+	return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
+}
+
+static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
+				 struct ib_cm_compare_data *compare)
+{
+	struct cma_hdr *cma_data, *cma_mask;
+	struct sdp_hh *sdp_data, *sdp_mask;
+	__u32 ip4_addr;
+	struct in6_addr ip6_addr;
+
+	memset(compare, 0, sizeof *compare);
+	cma_data = (void *) compare->data;
+	cma_mask = (void *) compare->mask;
+	sdp_data = (void *) compare->data;
+	sdp_mask = (void *) compare->mask;
+
+	switch (addr->sa_family) {
+	case AF_INET:
+		ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
+		if (ps == RDMA_PS_SDP) {
+			sdp_set_ip_ver(sdp_data, 4);
+			sdp_set_ip_ver(sdp_mask, 0xF);
+			sdp_data->dst_addr.ip4.addr = ip4_addr;
+			sdp_mask->dst_addr.ip4.addr = ~0;
+		} else {
+			cma_set_ip_ver(cma_data, 4);
+			cma_set_ip_ver(cma_mask, 0xF);
+			cma_data->dst_addr.ip4.addr = ip4_addr;
+			cma_mask->dst_addr.ip4.addr = ~0;
+		}
+		break;
+	case AF_INET6:
+		ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
+		if (ps == RDMA_PS_SDP) {
+			sdp_set_ip_ver(sdp_data, 6);
+			sdp_set_ip_ver(sdp_mask, 0xF);
+			sdp_data->dst_addr.ip6 = ip6_addr;
+			memset(&sdp_mask->dst_addr.ip6, 0xFF,
+			       sizeof sdp_mask->dst_addr.ip6);
+		} else {
+			cma_set_ip_ver(cma_data, 6);
+			cma_set_ip_ver(cma_mask, 0xF);
+			cma_data->dst_addr.ip6 = ip6_addr;
+			memset(&cma_mask->dst_addr.ip6, 0xFF,
+			       sizeof cma_mask->dst_addr.ip6);
+		}
+		break;
+	default:
+		break;
+	}
+}
+#endif /* IB_SUPPORTED */
+
+static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
+{
+	struct rdma_id_private *id_priv = iw_id->context;
+	struct rdma_cm_event event;
+	struct sockaddr_in *sin;
+	int ret = 0;
+
+	if (cma_disable_remove(id_priv, CMA_CONNECT))
+		return 0;
+
+	memset(&event, 0, sizeof event);
+	switch (iw_event->event) {
+	case IW_CM_EVENT_CLOSE:
+		event.event = RDMA_CM_EVENT_DISCONNECTED;
+		break;
+	case IW_CM_EVENT_CONNECT_REPLY:
+		sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+		*sin = iw_event->local_addr;
+		sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
+		*sin = iw_event->remote_addr;
+		switch (iw_event->status) {
+		case 0:
+			event.event = RDMA_CM_EVENT_ESTABLISHED;
+			break;
+		case ECONNRESET:
+		case ECONNREFUSED:
+			event.event = RDMA_CM_EVENT_REJECTED;
+			break;
+		case ETIMEDOUT:
+			event.event = RDMA_CM_EVENT_UNREACHABLE;
+			break;
+		default:
+			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
+			break;
+		}
+		break;
+	case IW_CM_EVENT_ESTABLISHED:
+		event.event = RDMA_CM_EVENT_ESTABLISHED;
+		break;
+	default:
+		panic("unknown event type %d", iw_event->event);
+		
+	}
+
+	event.status = iw_event->status;
+	event.param.conn.private_data = iw_event->private_data;
+	event.param.conn.private_data_len = iw_event->private_data_len;
+	ret = id_priv->id.event_handler(&id_priv->id, &event);
+	if (ret) {
+		/* Destroy the CM ID by returning a non-zero value. */
+		id_priv->cm_id.iw = NULL;
+		cma_exch(id_priv, CMA_DESTROYING);
+		cma_enable_remove(id_priv);
+		rdma_destroy_id(&id_priv->id);
+		return ret;
+	}
+
+	cma_enable_remove(id_priv);
+	return ret;
+}
+
+static int iw_conn_req_handler(struct iw_cm_id *cm_id,
+			       struct iw_cm_event *iw_event)
+{
+	struct rdma_cm_id *new_cm_id;
+	struct rdma_id_private *listen_id, *conn_id;
+	struct sockaddr_in *sin;
+	struct ifnet *dev;
+	struct rdma_cm_event event;
+	int ret;
+	struct ifaddr *ifa;
+	uint16_t port;
+
+	listen_id = cm_id->context;
+	if (cma_disable_remove(listen_id, CMA_LISTEN))
+		return (ECONNABORTED);
+
+	/* Create a new RDMA id for the new IW CM ID */
+	new_cm_id = rdma_create_id(listen_id->id.event_handler,
+				   listen_id->id.context,
+				   RDMA_PS_TCP);
+	if (!new_cm_id) {
+		ret = ENOMEM;
+		goto out;
+	}
+	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
+	mtx_lock(&conn_id->lock);
+	++conn_id->dev_remove;
+	mtx_unlock(&conn_id->lock);
+	conn_id->state = CMA_CONNECT;
+
+	port = iw_event->local_addr.sin_port;
+	iw_event->local_addr.sin_port = 0;
+	ifa = ifa_ifwithaddr((struct sockaddr *)&iw_event->local_addr);
+	iw_event->local_addr.sin_port = port;
+	if (!ifa) {
+		ret = EADDRNOTAVAIL;
+		cma_enable_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+	dev = ifa->ifa_ifp;
+	ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
+	if (ret) {
+		cma_enable_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+
+	mtx_lock(&lock);
+	ret = cma_acquire_dev(conn_id);
+	mtx_unlock(&lock);
+	if (ret) {
+		cma_enable_remove(conn_id);
+		rdma_destroy_id(new_cm_id);
+		goto out;
+	}
+
+	conn_id->cm_id.iw = cm_id;
+	cm_id->context = conn_id;
+	cm_id->cm_handler = cma_iw_handler;
+
+	sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
+	*sin = iw_event->local_addr;
+	sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
+	*sin = iw_event->remote_addr;
+	conn_id->so = cm_id->so;
+
+	memset(&event, 0, sizeof event);
+	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
+	event.param.conn.private_data = iw_event->private_data;
+	event.param.conn.private_data_len = iw_event->private_data_len;
+	ret = conn_id->id.event_handler(&conn_id->id, &event);
+	if (ret) {
+		/* User wants to destroy the CM ID */
+		conn_id->cm_id.iw = NULL;
+		cma_exch(conn_id, CMA_DESTROYING);
+		cma_enable_remove(conn_id);
+		rdma_destroy_id(&conn_id->id);
+	}
+
+out:
+	cma_enable_remove(listen_id);
+	return ret;
+}
+
+#ifdef IB_SUPPORTED
+static int cma_ib_listen(struct rdma_id_private *id_priv)
+{
+	struct ib_cm_compare_data compare_data;
+	struct sockaddr *addr;
+	__be64 svc_id;
+	int ret;
+
+	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
+					    id_priv);
+	if (IS_ERR(id_priv->cm_id.ib))
+		return PTR_ERR(id_priv->cm_id.ib);
+
+	addr = &id_priv->id.route.addr.src_addr;
+	svc_id = cma_get_service_id(id_priv->id.ps, addr);
+	if (cma_any_addr(addr))
+		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
+	else {
+		cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
+		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
+	}
+
+	if (ret) {
+		ib_destroy_cm_id(id_priv->cm_id.ib);
+		id_priv->cm_id.ib = NULL;
+	}
+
+	return ret;
+}
+#endif
+
+static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
+{
+	int ret;
+	struct sockaddr_in *sin;
+
+	id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device, id_priv->so,
+					    iw_conn_req_handler, id_priv);
+	if (IS_ERR(id_priv->cm_id.iw))
+		return PTR_ERR(id_priv->cm_id.iw);
+
+	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+	id_priv->cm_id.iw->local_addr = *sin;
+
+	ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
+
+	if (ret) {
+		iw_destroy_cm_id(id_priv->cm_id.iw);
+		id_priv->cm_id.iw = NULL;
+	}
+
+	return ret;
+}
+
+static int cma_listen_handler(struct rdma_cm_id *id,
+			      struct rdma_cm_event *event)
+{
+	struct rdma_id_private *id_priv = id->context;
+
+	id->context = id_priv->id.context;
+	id->event_handler = id_priv->id.event_handler;
+	return id_priv->id.event_handler(id, event);
+}
+
+static void cma_listen_on_dev(struct rdma_id_private *id_priv,
+			      struct cma_device *cma_dev)
+{
+	struct rdma_id_private *dev_id_priv;
+	struct rdma_cm_id *id;
+	int ret;
+
+	id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
+	if (IS_ERR(id))
+		return;
+
+	dev_id_priv = container_of(id, struct rdma_id_private, id);
+
+	dev_id_priv->state = CMA_ADDR_BOUND;
+	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
+	       ip_addr_size(&id_priv->id.route.addr.src_addr));
+	dev_id_priv->so = id_priv->so; /* XXX */
+
+	cma_attach_to_dev(dev_id_priv, cma_dev);
+	LIST_INSERT_HEAD(&id_priv->listen_list, dev_id_priv, listen_entry);
+
+	ret = rdma_listen(id, id_priv->backlog);
+	if (ret)
+		goto err;
+
+	return;
+err:
+	cma_destroy_listen(dev_id_priv);
+}
+
+static void cma_listen_on_all(struct rdma_id_private *id_priv)
+{
+	struct cma_device *cma_dev;
+
+	mtx_lock(&lock);
+	LIST_INSERT_HEAD(&listen_any_list, id_priv, list);
+	TAILQ_FOREACH(cma_dev, &dev_list, list)
+		cma_listen_on_dev(id_priv, cma_dev);
+	mtx_unlock(&lock);
+}
+
+static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
+{
+	struct sockaddr_in addr_in;
+
+	memset(&addr_in, 0, sizeof addr_in);
+	addr_in.sin_family = af;
+	addr_in.sin_len = sizeof addr_in;
+	return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
+}
+
+int rdma_listen(struct rdma_cm_id *id, int backlog)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (id_priv->state == CMA_IDLE) {
+		ret = cma_bind_any(id, AF_INET);
+		if (ret)
+			return ret;
+	}
+
+	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
+		return (EINVAL);
+
+	id_priv->backlog = backlog;
+	if (id->device) {
+#ifdef IB_SUPPORTED
+		switch (rdma_node_get_transport(id->device->node_type)) {
+		case RDMA_TRANSPORT_IB:
+			ret = cma_ib_listen(id_priv);
+			if (ret)
+				goto err;
+			break;
+		case RDMA_TRANSPORT_IWARP:
+#endif
+			ret = cma_iw_listen(id_priv, backlog);
+			if (ret)
+				goto err;
+#ifdef IB_SUPPORTED
+			break;
+		default:
+			ret = ENOSYS;
+			goto err;
+		}
+#endif
+	} else
+		cma_listen_on_all(id_priv);
+
+	return 0;
+err:
+	id_priv->backlog = 0;
+	cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
+	return ret;
+}
+
+#ifdef IB_SUPPORTED
+static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
+			      void *context)
+{
+	struct cma_work *work = context;
+	struct rdma_route *route;
+
+	route = &work->id->id.route;
+
+	if (!status) {
+		route->num_paths = 1;
+		*route->path_rec = *path_rec;
+	} else {
+		work->old_state = CMA_ROUTE_QUERY;
+		work->new_state = CMA_ADDR_RESOLVED;
+		work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
+		work->event.status = status;
+	}
+
+	taskqueue_enqueue(cma_wq, &work->task);
+}
+
+static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
+			      struct cma_work *work)
+{
+	struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr;
+	struct ib_sa_path_rec path_rec;
+
+	memset(&path_rec, 0, sizeof path_rec);
+	ib_addr_get_sgid(addr, &path_rec.sgid);
+	ib_addr_get_dgid(addr, &path_rec.dgid);
+	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
+	path_rec.numb_path = 1;
+	path_rec.reversible = 1;
+
+	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
+				id_priv->id.port_num, &path_rec,
+				IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
+				IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
+				IB_SA_PATH_REC_REVERSIBLE,
+				timeout_ms, M_NOWAIT,
+				cma_query_handler, work, &id_priv->query);
+
+	return (id_priv->query_id < 0) ? id_priv->query_id : 0;
+}
+#endif
+
+static void cma_work_handler(void *context, int pending)
+{
+	struct cma_work *work = context;
+	struct rdma_id_private *id_priv = work->id;
+	int destroy = 0;
+
+	mtx_lock(&id_priv->lock);
+	++id_priv->dev_remove;
+	mtx_unlock(&id_priv->lock);
+	if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
+		goto out;
+
+	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
+		cma_exch(id_priv, CMA_DESTROYING);
+		destroy = 1;
+	}
+out:
+	cma_enable_remove(id_priv);
+	cma_deref_id(id_priv);
+	if (destroy)
+		rdma_destroy_id(&id_priv->id);
+	free(work, M_DEVBUF);
+}
+
+#ifdef IB_SUPPORTED
+static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
+{
+	struct rdma_route *route = &id_priv->id.route;
+	struct cma_work *work;
+	int ret;
+
+	work = malloc(sizeof *work, M_DEVBUF, M_NOWAIT);
+	if (!work)
+		return (ENOMEM);
+	bzero(work, sizeof *work);
+
+	work->id = id_priv;
+        TASK_INIT(&work->task, 0, cma_work_handler, work);
+	work->old_state = CMA_ROUTE_QUERY;
+	work->new_state = CMA_ROUTE_RESOLVED;
+	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+
+	route->path_rec = malloc(sizeof *route->path_rec, M_DEVBUF, M_NOWAIT);
+	if (!route->path_rec) {
+		ret = ENOMEM;
+		goto err1;
+	}
+
+	ret = cma_query_ib_route(id_priv, timeout_ms, work);
+	if (ret)
+		goto err2;
+
+	return 0;
+err2:
+	free(route->path_rec, M_DEVBUF);
+	route->path_rec = NULL;
+err1:
+	free(work, M_DEVBUF);
+	return ret;
+}
+
+int rdma_set_ib_paths(struct rdma_cm_id *id,
+		      struct ib_sa_path_rec *path_rec, int num_paths)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
+		return (EINVAL);
+
+	id->route.path_rec = malloc(sizeof *path_rec * num_paths, M_DEVBUF, M_NOWAIT);
+	if (!id->route.path_rec) {
+		ret = ENOMEM;
+		goto err;
+	}
+
+	memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
+	return 0;
+err:
+	cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
+	return ret;
+}
+#endif
+
+static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
+{
+	struct cma_work *work;
+
+	work = malloc(sizeof *work, M_DEVBUF, M_NOWAIT);
+	if (!work)
+		return (ENOMEM);
+	bzero(work, sizeof *work);
+
+	work->id = id_priv;
+        TASK_INIT(&work->task, 0, cma_work_handler, work);
+	work->old_state = CMA_ROUTE_QUERY;
+	work->new_state = CMA_ROUTE_RESOLVED;
+	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+	taskqueue_enqueue(cma_wq, &work->task);
+	return 0;
+}
+
+int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
+		return (EINVAL);
+
+	mtx_lock(&id_priv->lock);
+	id_priv->refcount++;
+	mtx_unlock(&id_priv->lock);
+#ifdef IB_SUPPORTED
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		ret = cma_resolve_ib_route(id_priv, timeout_ms);
+		break;
+	case RDMA_TRANSPORT_IWARP:
+#endif
+		ret = cma_resolve_iw_route(id_priv, timeout_ms);
+#ifdef IB_SUPPORTED
+		break;
+	default:
+		ret = ENOSYS;
+		break;
+	}
+#endif
+	if (ret)
+		goto err;
+
+	return 0;
+err:
+	cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
+	cma_deref_id(id_priv);
+	return ret;
+}
+
+static int cma_bind_loopback(struct rdma_id_private *id_priv)
+{
+	struct cma_device *cma_dev;
+	struct ib_port_attr port_attr;
+	union ib_gid gid;
+	u16 pkey;
+	int ret;
+	u8 p;
+
+	mtx_lock(&lock);
+	if (TAILQ_EMPTY(&dev_list)) {
+		ret = ENODEV;
+		goto out;
+	}
+	TAILQ_FOREACH(cma_dev, &dev_list, list)
+		for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
+			if (!ib_query_port(cma_dev->device, p, &port_attr) &&
+			    port_attr.state == IB_PORT_ACTIVE)
+				goto port_found;
+
+	p = 1;
+	cma_dev = TAILQ_FIRST(&dev_list);
+
+port_found:
+	ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
+	if (ret)
+		goto out;
+
+	ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
+	if (ret)
+		goto out;
+
+	ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
+	id_priv->id.port_num = p;
+	cma_attach_to_dev(id_priv, cma_dev);
+out:
+	mtx_unlock(&lock);
+	return ret;
+}
+
+static void addr_handler(int status, struct sockaddr *src_addr,
+			 struct rdma_dev_addr *dev_addr, void *context)
+{
+	struct rdma_id_private *id_priv = context;
+	struct rdma_cm_event event;
+
+	memset(&event, 0, sizeof event);
+	mtx_lock(&id_priv->lock);
+	++id_priv->dev_remove;
+	mtx_unlock(&id_priv->lock);
+
+	/*
+	 * Grab mutex to block rdma_destroy_id() from removing the device while
+	 * we're trying to acquire it.
+	 */
+	mtx_lock(&lock);
+	if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
+		mtx_unlock(&lock);
+		goto out;
+	}
+
+	if (!status && !id_priv->cma_dev)
+		status = cma_acquire_dev(id_priv);
+	mtx_unlock(&lock);
+
+	if (status) {
+		if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
+			goto out;
+		event.event = RDMA_CM_EVENT_ADDR_ERROR;
+		event.status = status;
+	} else {
+		memcpy(&id_priv->id.route.addr.src_addr, src_addr,
+		       ip_addr_size(src_addr));
+		event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+	}
+
+	if (id_priv->id.event_handler(&id_priv->id, &event)) {
+		cma_exch(id_priv, CMA_DESTROYING);
+		cma_enable_remove(id_priv);
+		cma_deref_id(id_priv);
+		rdma_destroy_id(&id_priv->id);
+		return;
+	}
+out:
+	cma_enable_remove(id_priv);
+	cma_deref_id(id_priv);
+}
+
+static int cma_resolve_loopback(struct rdma_id_private *id_priv)
+{
+	struct cma_work *work;
+	struct sockaddr_in *src_in, *dst_in;
+	union ib_gid gid;
+	int ret;
+
+	work = malloc(sizeof *work, M_DEVBUF, M_NOWAIT);
+	if (!work)
+		return (ENOMEM);
+	bzero(work, sizeof *work);
+
+	if (!id_priv->cma_dev) {
+		ret = cma_bind_loopback(id_priv);
+		if (ret)
+			goto err;
+	}
+
+	ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+	ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
+
+	if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) {
+		src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
+		dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
+		src_in->sin_family = dst_in->sin_family;
+		src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
+	}
+
+	work->id = id_priv;
+        TASK_INIT(&work->task, 0, cma_work_handler, work);
+	work->old_state = CMA_ADDR_QUERY;
+	work->new_state = CMA_ADDR_RESOLVED;
+	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+	taskqueue_enqueue(cma_wq, &work->task);
+	return 0;
+err:
+	free(work, M_DEVBUF);
+	return ret;
+}
+
+static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+			 struct sockaddr *dst_addr)
+{
+	if (src_addr && src_addr->sa_family)
+		return rdma_bind_addr(id, src_addr);
+	else
+		return cma_bind_any(id, dst_addr->sa_family);
+}
+
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+		      struct sockaddr *dst_addr, int timeout_ms)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (id_priv->state == CMA_IDLE) {
+		ret = cma_bind_addr(id, src_addr, dst_addr);
+		if (ret)
+			return ret;
+	}
+
+	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
+		return (EINVAL);
+
+	mtx_lock(&id_priv->lock);
+	id_priv->refcount++;
+	mtx_unlock(&id_priv->lock);
+	memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
+	if (cma_any_addr(dst_addr))
+		ret = cma_resolve_loopback(id_priv);
+	else
+		ret = rdma_resolve_ip(&addr_client, &id->route.addr.src_addr,
+				      dst_addr, &id->route.addr.dev_addr,
+				      timeout_ms, addr_handler, id_priv);
+	if (ret)
+		goto err;
+
+	return 0;
+err:
+	cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
+	cma_deref_id(id_priv);
+	return ret;
+}
+
+static void cma_bind_port(struct rdma_bind_list *bind_list,
+			  struct rdma_id_private *id_priv)
+{
+	struct sockaddr_in *sin;
+
+	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+	sin->sin_port = htons(bind_list->port);
+	id_priv->bind_list = bind_list;
+	TAILQ_INSERT_HEAD(&bind_list->owners, id_priv, node);
+}
+
+static int cma_alloc_port(struct kvl *ps, struct rdma_id_private *id_priv,
+			  unsigned short snum)
+{
+	struct rdma_bind_list *bind_list;
+	int port, ret;
+
+	bind_list = malloc(sizeof *bind_list, M_DEVBUF, M_NOWAIT);
+	if (!bind_list)
+		return (ENOMEM);
+	bzero(bind_list, sizeof *bind_list);
+
+	do {
+		ret = kvl_alloc_above(ps, bind_list, snum, &port);
+	} while (ret == EAGAIN);
+
+	if (ret)
+		goto err1;
+
+	if (port != snum) {
+		ret = EADDRNOTAVAIL;
+		goto err2;
+	}
+
+	bind_list->ps = ps;
+	bind_list->port = (unsigned short) port;
+	cma_bind_port(bind_list, id_priv);
+	return 0;
+err2:
+	kvl_delete(ps, port);
+err1:
+	free(bind_list, M_DEVBUF);
+	return ret;
+}
+
+static int cma_alloc_any_port(struct kvl *ps, struct rdma_id_private *id_priv)
+{
+	struct rdma_bind_list *bind_list;
+	int port, ret;
+
+	bind_list = malloc(sizeof *bind_list, M_DEVBUF, M_NOWAIT);
+	if (!bind_list)
+		return (ENOMEM);
+	bzero(bind_list, sizeof *bind_list);
+
+retry:
+	do {
+		ret = kvl_alloc_above(ps, bind_list, next_port, &port);
+	} while (ret == EAGAIN);
+
+	if (ret)
+		goto err1;
+
+	if (port > ipport_lastauto) {
+		if (next_port != ipport_firstauto) {
+			kvl_delete(ps, port);
+			next_port = ipport_firstauto;
+			goto retry;
+		}
+		ret = EADDRNOTAVAIL;
+		goto err2;
+	}
+
+	if (port == ipport_lastauto)
+		next_port = ipport_firstauto;
+	else
+		next_port = port + 1;
+
+	bind_list->ps = ps;
+	bind_list->port = (unsigned short) port;
+	cma_bind_port(bind_list, id_priv);
+	return 0;
+err2:
+	kvl_delete(ps, port);
+err1:
+	free(bind_list, M_DEVBUF);
+	return ret;
+}
+
+static int cma_use_port(struct kvl *ps, struct rdma_id_private *id_priv)
+{
+	struct rdma_id_private *cur_id;
+	struct sockaddr_in *sin, *cur_sin;
+	struct rdma_bind_list *bind_list;
+	unsigned short snum;
+
+	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+	snum = ntohs(sin->sin_port);
+	if (snum <= ipport_reservedhigh && snum >= ipport_reservedlow &&
+	    priv_check(curthread, PRIV_NETINET_RESERVEDPORT))
+		return (EACCES);
+
+	bind_list = kvl_lookup(ps, snum);
+	if (!bind_list)
+		return cma_alloc_port(ps, id_priv, snum);
+
+	/*
+	 * We don't support binding to any address if anyone is bound to
+	 * a specific address on the same port.
+	 */
+	if (cma_any_addr(&id_priv->id.route.addr.src_addr))
+		return (EADDRNOTAVAIL);
+
+	TAILQ_FOREACH(cur_id, &bind_list->owners, node) {
+		if (cma_any_addr(&cur_id->id.route.addr.src_addr))
+			return (EADDRNOTAVAIL);
+
+		cur_sin = (struct sockaddr_in *)&cur_id->id.route.addr.src_addr;
+		if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
+			return (EADDRINUSE);
+	}
+
+	cma_bind_port(bind_list, id_priv);
+	return 0;
+}
+
+static int cma_get_tcp_port(struct rdma_id_private *id_priv)
+{
+	int ret;
+	struct socket *so;
+
+	ret = socreate(AF_INET, &so, SOCK_STREAM, IPPROTO_TCP, 
+				curthread->td_ucred, curthread);
+	if (ret) {
+		printf("%s socreate err %d\n", __FUNCTION__, ret);
+		return ret;
+	}
+
+	ret = sobind(so, (struct sockaddr *)&id_priv->id.route.addr.src_addr,
+			curthread);
+	if (ret) {
+		soclose(so);
+		return ret;
+	}
+	id_priv->so = so;
+	return 0;       
+}
+
+static int cma_get_port(struct rdma_id_private *id_priv)
+{
+	struct kvl *ps;
+	int ret;
+
+	switch (id_priv->id.ps) {
+	case RDMA_PS_SDP:
+		ps = &sdp_ps;
+		break;
+	case RDMA_PS_TCP:
+		ps = &tcp_ps;
+		ret = cma_get_tcp_port(id_priv); /* Synch with native stack */
+		if (ret)
+			return ret;
+		break;
+	case RDMA_PS_UDP:
+		ps = &udp_ps;
+		break;
+	case RDMA_PS_IPOIB:
+		ps = &ipoib_ps;
+		break;
+	default:
+		return (EPROTONOSUPPORT);
+	}
+
+	mtx_lock(&lock);
+	if (cma_any_port(&id_priv->id.route.addr.src_addr))
+		ret = cma_alloc_any_port(ps, id_priv);
+	else
+		ret = cma_use_port(ps, id_priv);
+	mtx_unlock(&lock);
+
+	return ret;
+}
+
+int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	if (addr->sa_family != AF_INET)
+		return (EAFNOSUPPORT);
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
+		return (EINVAL);
+
+	if (!cma_any_addr(addr)) {
+		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
+		if (ret)
+			goto err1;
+
+		mtx_lock(&lock);
+		ret = cma_acquire_dev(id_priv);
+		mtx_unlock(&lock);
+		if (ret)
+			goto err1;
+	}
+
+	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
+	ret = cma_get_port(id_priv);
+	if (ret)
+		goto err2;
+
+	return 0;
+err2:
+	if (!cma_any_addr(addr)) {
+		mtx_lock(&lock);
+		cma_detach_from_dev(id_priv);
+		mtx_unlock(&lock);
+	}
+err1:
+	cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
+	return ret;
+}
+
+#ifdef IB_SUPPORTED
+static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
+			  struct rdma_route *route)
+{
+	struct sockaddr_in *src4, *dst4;
+	struct cma_hdr *cma_hdr;
+	struct sdp_hh *sdp_hdr;
+
+	src4 = (struct sockaddr_in *) &route->addr.src_addr;
+	dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
+
+	switch (ps) {
+	case RDMA_PS_SDP:
+		sdp_hdr = hdr;
+		if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
+			return (EINVAL);
+		sdp_set_ip_ver(sdp_hdr, 4);
+		sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
+		sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
+		sdp_hdr->port = src4->sin_port;
+		break;
+	default:
+		cma_hdr = hdr;
+		cma_hdr->cma_version = CMA_VERSION;
+		cma_set_ip_ver(cma_hdr, 4);
+		cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
+		cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
+		cma_hdr->port = src4->sin_port;
+		break;
+	}
+	return 0;
+}
+
+static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
+				struct ib_cm_event *ib_event)
+{
+	struct rdma_id_private *id_priv = cm_id->context;
+	struct rdma_cm_event event;
+	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
+	int ret = 0;
+
+	if (cma_disable_remove(id_priv, CMA_CONNECT))
+		return 0;
+
+	memset(&event, 0, sizeof event);
+	switch (ib_event->event) {
+	case IB_CM_SIDR_REQ_ERROR:
+		event.event = RDMA_CM_EVENT_UNREACHABLE;
+		event.status = ETIMEDOUT;
+		break;
+	case IB_CM_SIDR_REP_RECEIVED:
+		event.param.ud.private_data = ib_event->private_data;
+		event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
+		if (rep->status != IB_SIDR_SUCCESS) {
+			event.event = RDMA_CM_EVENT_UNREACHABLE;
+			event.status = ib_event->param.sidr_rep_rcvd.status;
+			break;
+		}
+		if (id_priv->qkey != rep->qkey) {
+			event.event = RDMA_CM_EVENT_UNREACHABLE;
+			event.status = EINVAL;
+			break;
+		}
+		ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
+				     id_priv->id.route.path_rec,
+				     &event.param.ud.ah_attr);
+		event.param.ud.qp_num = rep->qpn;
+		event.param.ud.qkey = rep->qkey;
+		event.event = RDMA_CM_EVENT_ESTABLISHED;
+		event.status = 0;
+		break;
+	default:
+		log(LOG_ERR, "RDMA CMA: unexpected IB CM event: %d",
+		       ib_event->event);
+		goto out;
+	}
+
+	ret = id_priv->id.event_handler(&id_priv->id, &event);
+	if (ret) {
+		/* Destroy the CM ID by returning a non-zero value. */
+		id_priv->cm_id.ib = NULL;
+		cma_exch(id_priv, CMA_DESTROYING);
+		cma_enable_remove(id_priv);
+		rdma_destroy_id(&id_priv->id);
+		return ret;
+	}
+out:
+	cma_enable_remove(id_priv);
+	return ret;
+}
+
+static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
+			      struct rdma_conn_param *conn_param)
+{
+	struct ib_cm_sidr_req_param req;
+	struct rdma_route *route;
+	int ret;
+
+	req.private_data_len = sizeof(struct cma_hdr) +
+			       conn_param->private_data_len;
+	req.private_data = malloc(req.private_data_len, M_DEVBUF, M_NOWAIT);
+	if (!req.private_data)
+		return (ENOMEM);
+	bzero((void *)req.private_data, req.private_data_len);
+
+	if (conn_param->private_data && conn_param->private_data_len)
+		memcpy((caddr_t) req.private_data + sizeof(struct cma_hdr),
+		       conn_param->private_data, conn_param->private_data_len);
+
+	route = &id_priv->id.route;
+	ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
+	if (ret)
+		goto out;
+
+	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device,
+					    cma_sidr_rep_handler, id_priv);
+	if (IS_ERR(id_priv->cm_id.ib)) {
+		ret = PTR_ERR(id_priv->cm_id.ib);
+		goto out;
+	}
+
+	req.path = route->path_rec;
+	req.service_id = cma_get_service_id(id_priv->id.ps,
+					    &route->addr.dst_addr);
+	req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
+	req.max_cm_retries = CMA_MAX_CM_RETRIES;
+
+	ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
+	if (ret) {
+		ib_destroy_cm_id(id_priv->cm_id.ib);
+		id_priv->cm_id.ib = NULL;
+	}
+out:
+	free(req.private_data, M_DEVBUF);
+	return ret;
+}
+
+static int cma_connect_ib(struct rdma_id_private *id_priv,
+			  struct rdma_conn_param *conn_param)
+{
+	struct ib_cm_req_param req;
+	struct rdma_route *route;
+	void *private_data;
+	int offset, ret;
+
+	memset(&req, 0, sizeof req);
+	offset = cma_user_data_offset(id_priv->id.ps);
+	req.private_data_len = offset + conn_param->private_data_len;
+	private_data = malloc(req.private_data_len, M_DEVBUF, M_NOWAIT);
+	if (!private_data)
+		return (ENOMEM);
+	bzero(private_data, req.private_data_len);
+
+	if (conn_param->private_data && conn_param->private_data_len)
+		memcpy(private_data + offset, conn_param->private_data,
+		       conn_param->private_data_len);
+
+	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
+					    id_priv);
+	if (IS_ERR(id_priv->cm_id.ib)) {
+		ret = PTR_ERR(id_priv->cm_id.ib);
+		goto out;
+	}
+
+	route = &id_priv->id.route;
+	ret = cma_format_hdr(private_data, id_priv->id.ps, route);
+	if (ret)
+		goto out;
+	req.private_data = private_data;
+
+	req.primary_path = &route->path_rec[0];
+	if (route->num_paths == 2)
+		req.alternate_path = &route->path_rec[1];
+
+	req.service_id = cma_get_service_id(id_priv->id.ps,
+					    &route->addr.dst_addr);
+	req.qp_num = id_priv->qp_num;
+	req.qp_type = IB_QPT_RC;
+	req.starting_psn = id_priv->seq_num;
+	req.responder_resources = conn_param->responder_resources;
+	req.initiator_depth = conn_param->initiator_depth;
+	req.flow_control = conn_param->flow_control;
+	req.retry_count = conn_param->retry_count;
+	req.rnr_retry_count = conn_param->rnr_retry_count;
+	req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
+	req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
+	req.max_cm_retries = CMA_MAX_CM_RETRIES;
+	req.srq = id_priv->srq ? 1 : 0;
+
+	ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
+out:
+	if (ret && !IS_ERR(id_priv->cm_id.ib)) {
+		ib_destroy_cm_id(id_priv->cm_id.ib);
+		id_priv->cm_id.ib = NULL;
+	}
+
+	free(private_data, M_DEVBUF);
+	return ret;
+}
+#endif
+
+static int cma_connect_iw(struct rdma_id_private *id_priv,
+			  struct rdma_conn_param *conn_param)
+{
+	struct iw_cm_id *cm_id;
+	struct sockaddr_in* sin;
+	int ret;
+	struct iw_cm_conn_param iw_param;
+
+	cm_id = iw_create_cm_id(id_priv->id.device, id_priv->so,
+				 cma_iw_handler, id_priv);
+	if (IS_ERR(cm_id)) {
+		ret = PTR_ERR(cm_id);
+		goto out;
+	}
+
+	id_priv->cm_id.iw = cm_id;
+
+	sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
+	cm_id->local_addr = *sin;
+
+	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
+	cm_id->remote_addr = *sin;
+
+	ret = cma_modify_qp_rtr(&id_priv->id);
+	if (ret)
+		goto out;
+
+	iw_param.ord = conn_param->initiator_depth;
+	iw_param.ird = conn_param->responder_resources;
+	iw_param.private_data = conn_param->private_data;
+	iw_param.private_data_len = conn_param->private_data_len;
+	if (id_priv->id.qp)
+		iw_param.qpn = id_priv->qp_num;
+	else
+		iw_param.qpn = conn_param->qp_num;
+	ret = iw_cm_connect(cm_id, &iw_param);
+out:
+	if (ret && !IS_ERR(cm_id)) {
+		iw_destroy_cm_id(cm_id);
+		id_priv->cm_id.iw = NULL;
+	}
+	return ret;
+}
+
+int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
+		return (EINVAL);
+
+	if (!id->qp) {
+		id_priv->qp_num = conn_param->qp_num;
+		id_priv->srq = conn_param->srq;
+	}
+
+#ifdef IB_SUPPORTED
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		if (cma_is_ud_ps(id->ps))
+			ret = cma_resolve_ib_udp(id_priv, conn_param);
+		else
+			ret = cma_connect_ib(id_priv, conn_param);
+		break;
+	case RDMA_TRANSPORT_IWARP:
+#endif
+		ret = cma_connect_iw(id_priv, conn_param);
+#ifdef IB_SUPPORTED
+		break;
+	default:
+		ret = ENOSYS;
+		break;
+	}
+#endif
+	if (ret)
+		goto err;
+
+	return 0;
+err:
+	cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
+	return ret;
+}
+
+#ifdef IB_SUPPORTED
+static int cma_accept_ib(struct rdma_id_private *id_priv,
+			 struct rdma_conn_param *conn_param)
+{
+	struct ib_cm_rep_param rep;
+	struct ib_qp_attr qp_attr;
+	int qp_attr_mask, ret;
+
+	if (id_priv->id.qp) {
+		ret = cma_modify_qp_rtr(&id_priv->id);
+		if (ret)
+			goto out;
+
+		qp_attr.qp_state = IB_QPS_RTS;
+		ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, &qp_attr,
+					 &qp_attr_mask);
+		if (ret)
+			goto out;
+
+		qp_attr.max_rd_atomic = conn_param->initiator_depth;
+		ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
+		if (ret)
+			goto out;
+	}
+
+	memset(&rep, 0, sizeof rep);
+	rep.qp_num = id_priv->qp_num;
+	rep.starting_psn = id_priv->seq_num;
+	rep.private_data = conn_param->private_data;
+	rep.private_data_len = conn_param->private_data_len;
+	rep.responder_resources = conn_param->responder_resources;
+	rep.initiator_depth = conn_param->initiator_depth;
+	rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT;
+	rep.failover_accepted = 0;
+	rep.flow_control = conn_param->flow_control;
+	rep.rnr_retry_count = conn_param->rnr_retry_count;
+	rep.srq = id_priv->srq ? 1 : 0;
+
+	ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
+out:
+	return ret;
+}
+#endif
+
+static int cma_accept_iw(struct rdma_id_private *id_priv,
+		  struct rdma_conn_param *conn_param)
+{
+	struct iw_cm_conn_param iw_param;
+	int ret;
+
+	ret = cma_modify_qp_rtr(&id_priv->id);
+	if (ret)
+		return ret;
+
+	iw_param.ord = conn_param->initiator_depth;
+	iw_param.ird = conn_param->responder_resources;
+	iw_param.private_data = conn_param->private_data;
+	iw_param.private_data_len = conn_param->private_data_len;
+	if (id_priv->id.qp) {
+		iw_param.qpn = id_priv->qp_num;
+	} else
+		iw_param.qpn = conn_param->qp_num;
+
+	return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
+}
+
+#ifdef IB_SUPPORTED
+static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
+			     enum ib_cm_sidr_status status,
+			     const void *private_data, int private_data_len)
+{
+	struct ib_cm_sidr_rep_param rep;
+
+	memset(&rep, 0, sizeof rep);
+	rep.status = status;
+	if (status == IB_SIDR_SUCCESS) {
+		rep.qp_num = id_priv->qp_num;
+		rep.qkey = id_priv->qkey;
+	}
+	rep.private_data = private_data;
+	rep.private_data_len = private_data_len;
+
+	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
+}
+#endif
+
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_comp(id_priv, CMA_CONNECT))
+		return (EINVAL);
+
+	if (!id->qp && conn_param) {
+		id_priv->qp_num = conn_param->qp_num;
+		id_priv->srq = conn_param->srq;
+	}
+
+#ifdef IB_SUPPORTED
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		if (cma_is_ud_ps(id->ps))
+			ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
+						conn_param->private_data,
+						conn_param->private_data_len);
+		else if (conn_param)
+			ret = cma_accept_ib(id_priv, conn_param);
+		else
+			ret = cma_rep_recv(id_priv);
+		break;
+	case RDMA_TRANSPORT_IWARP:
+#endif
+		ret = cma_accept_iw(id_priv, conn_param);
+#ifdef IB_SUPPORTED
+		break;
+	default:
+		ret = ENOSYS;
+		break;
+	}
+#endif
+
+	if (ret)
+		goto reject;
+
+	return 0;
+reject:
+	cma_modify_qp_err(id);
+	rdma_reject(id, NULL, 0);
+	return ret;
+}
+
+int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_has_cm_dev(id_priv))
+		return (EINVAL);
+#ifdef IB_SUPPORTED
+	switch (id->device->node_type) {
+	case RDMA_NODE_IB_CA:
+		ret = ib_cm_notify(id_priv->cm_id.ib, event);
+		break;
+	default:
+#endif
+		ret = 0;
+#ifdef IB_SUPPORTED
+		break;
+	}
+#endif
+	return ret;
+}
+
+int rdma_reject(struct rdma_cm_id *id, const void *private_data,
+		u8 private_data_len)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_has_cm_dev(id_priv))
+		return (EINVAL);
+
+#ifdef IB_SUPPORTED
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		if (cma_is_ud_ps(id->ps))
+			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
+						private_data, private_data_len);
+		else
+			ret = ib_send_cm_rej(id_priv->cm_id.ib,
+					     IB_CM_REJ_CONSUMER_DEFINED, NULL,
+					     0, private_data, private_data_len);
+		break;
+	case RDMA_TRANSPORT_IWARP:
+#endif
+		ret = iw_cm_reject(id_priv->cm_id.iw,
+				   private_data, private_data_len);
+#ifdef IB_SUPPORTED
+		break;
+	default:
+		ret = ENOSYS;
+		break;
+	}
+#endif
+	return ret;
+}
+
+int rdma_disconnect(struct rdma_cm_id *id)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_has_cm_dev(id_priv))
+		return (EINVAL);
+
+#ifdef IB_SUPPORTED
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		ret = cma_modify_qp_err(id);
+		if (ret)
+			goto out;
+		/* Initiate or respond to a disconnect. */
+		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
+			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
+		break;
+	case RDMA_TRANSPORT_IWARP:
+#endif
+		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
+#ifdef IB_SUPPORTED
+		break;
+	default:
+		ret = EINVAL;
+		break;
+	}
+out:
+#endif
+	return ret;
+}
+
+#ifdef IB_SUPPORTED
+static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+{
+	struct rdma_id_private *id_priv;
+	struct cma_multicast *mc = multicast->context;
+	struct rdma_cm_event event;
+	int ret;
+
+	id_priv = mc->id_priv;
+	if (cma_disable_remove(id_priv, CMA_ADDR_BOUND) &&
+	    cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
+		return 0;
+
+	if (!status && id_priv->id.qp)
+		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
+					 multicast->rec.mlid);
+
+	memset(&event, 0, sizeof event);
+	event.status = status;
+	event.param.ud.private_data = mc->context;
+	if (!status) {
+		event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
+		ib_init_ah_from_mcmember(id_priv->id.device,
+					 id_priv->id.port_num, &multicast->rec,
+					 &event.param.ud.ah_attr);
+		event.param.ud.qp_num = 0xFFFFFF;
+		event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+	} else
+		event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+
+	ret = id_priv->id.event_handler(&id_priv->id, &event);
+	if (ret) {
+		cma_exch(id_priv, CMA_DESTROYING);
+		cma_enable_remove(id_priv);
+		rdma_destroy_id(&id_priv->id);
+		return 0;
+	}
+
+	cma_enable_remove(id_priv);
+	return 0;
+}
+
+static void cma_set_mgid(struct rdma_id_private *id_priv,
+			 struct sockaddr *addr, union ib_gid *mgid)
+{
+	unsigned char mc_map[MAX_ADDR_LEN];
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	struct sockaddr_in *sin = (struct sockaddr_in *) addr;
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
+
+	if (cma_any_addr(addr)) {
+		memset(mgid, 0, sizeof *mgid);
+	} else if ((addr->sa_family == AF_INET6) &&
+		   ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
+								 0xFF10A01B)) {
+		/* IPv6 address is an SA assigned MGID. */
+		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+	} else {
+		ip_ib_mc_map(sin->sin_addr.s_addr, mc_map);
+		if (id_priv->id.ps == RDMA_PS_UDP)
+			mc_map[7] = 0x01;	/* Use RDMA CM signature */
+		mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8;
+		mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr);
+		*mgid = *(union ib_gid *) (mc_map + 4);
+	}
+}
+
+static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
+				 struct cma_multicast *mc)
+{
+	struct ib_sa_mcmember_rec rec;
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	ib_sa_comp_mask comp_mask;
+	int ret;
+
+	ib_addr_get_mgid(dev_addr, &rec.mgid);
+	ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
+				     &rec.mgid, &rec);
+	if (ret)
+		return ret;
+
+	cma_set_mgid(id_priv, &mc->addr, &rec.mgid);
+	if (id_priv->id.ps == RDMA_PS_UDP)
+		rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+	ib_addr_get_sgid(dev_addr, &rec.port_gid);
+	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
+	rec.join_state = 1;
+
+	comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
+		    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
+		    IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
+		    IB_SA_MCMEMBER_REC_FLOW_LABEL |
+		    IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
+
+	mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
+						id_priv->id.port_num, &rec,
+						comp_mask, M_NOWAIT,
+						cma_ib_mc_handler, mc);
+	if (IS_ERR(mc->multicast.ib))
+		return PTR_ERR(mc->multicast.ib);
+
+	return 0;
+}
+
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+			void *context)
+{
+	struct rdma_id_private *id_priv;
+	struct cma_multicast *mc;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+	    !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+		return (EINVAL);
+
+	mc = malloc(sizeof *mc, M_DEVBUF, M_NOWAIT);
+	if (!mc)
+		return (ENOMEM);
+
+	memcpy(&mc->addr, addr, ip_addr_size(addr));
+	mc->context = context;
+	mc->id_priv = id_priv;
+
+	mtx_lock(&id_priv->lock);
+	LIST_INSERT_HEAD(&id_priv->mc_list, mc, list);
+	mtx_unlock(&id_priv->lock);
+
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		ret = cma_join_ib_multicast(id_priv, mc);
+		break;
+	default:
+		ret = ENOSYS;
+		break;
+	}
+
+	if (ret) {
+		mtx_lock(&id_priv->lock);
+		list_del(&mc->list);
+		mtx_unlock(&id_priv->lock);
+		free(mc, M_DEVBUF);
+	}
+	return ret;
+}
+
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+	struct rdma_id_private *id_priv;
+	struct cma_multicast *mc;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	mtx_lock(&id_priv->lock);
+	LIST_FOREACH(mc, &id_priv->mc_list, list) {
+		if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
+			list_del(&mc->list);
+			mtx_unlock(&id_priv->lock);
+
+			if (id->qp)
+				ib_detach_mcast(id->qp,
+						&mc->multicast.ib->rec.mgid,
+						mc->multicast.ib->rec.mlid);
+			ib_sa_free_multicast(mc->multicast.ib, M_DEVBUF);
+			free(mc, M_DEVBUF);
+			return;
+		}
+	}
+	mtx_unlock(&id_priv->lock);
+}
+#endif
+
+static void cma_add_one(struct ib_device *device)
+{
+	struct cma_device *cma_dev;
+	struct rdma_id_private *id_priv;
+
+	cma_dev = malloc(sizeof *cma_dev, M_DEVBUF, M_NOWAIT|M_ZERO);
+	if (!cma_dev)
+		return;
+
+	cma_dev->device = device;
+
+	cv_init(&cma_dev->comp, "cma_device");
+	mtx_init(&cma_dev->lock, "cma_device", NULL, MTX_DUPOK|MTX_DEF);
+	cma_dev->refcount = 1;
+	LIST_INIT(&cma_dev->id_list);
+	ib_set_client_data(device, &cma_client, cma_dev);
+
+	mtx_lock(&lock);
+	TAILQ_INSERT_TAIL(&dev_list, cma_dev, list);
+	LIST_FOREACH(id_priv, &listen_any_list, list)
+		cma_listen_on_dev(id_priv, cma_dev);
+	mtx_unlock(&lock);
+}
+
+static int cma_remove_id_dev(struct rdma_id_private *id_priv)
+{
+	struct rdma_cm_event event;
+	enum cma_state state;
+
+	/* Record that we want to remove the device */
+	state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
+	if (state == CMA_DESTROYING)
+		return 0;
+
+	cma_cancel_operation(id_priv, state);
+	mtx_lock(&id_priv->lock);
+	PANIC_IF(id_priv->dev_remove < 0);
+	if (id_priv->dev_remove)
+		cv_wait(&id_priv->wait_remove, &id_priv->lock);
+	mtx_unlock(&id_priv->lock);
+
+	/* Check for destruction from another callback. */
+	if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
+		return 0;
+
+	memset(&event, 0, sizeof event);
+	event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
+	return id_priv->id.event_handler(&id_priv->id, &event);
+}
+
+static void cma_process_remove(struct cma_device *cma_dev)
+{
+	struct rdma_id_private *id_priv;
+	int ret;
+
+	mtx_lock(&lock);
+	while (!LIST_EMPTY(&cma_dev->id_list)) {
+		id_priv = LIST_FIRST(&cma_dev->id_list);
+
+		if (cma_internal_listen(id_priv)) {
+			cma_destroy_listen(id_priv);
+			continue;
+		}
+
+		LIST_REMOVE(id_priv, list);
+		mtx_lock(&id_priv->lock);
+		id_priv->refcount++;
+		mtx_unlock(&id_priv->lock);
+		mtx_unlock(&lock);
+
+		ret = cma_remove_id_dev(id_priv);
+		cma_deref_id(id_priv);
+		if (ret)
+			rdma_destroy_id(&id_priv->id);
+
+		mtx_lock(&lock);
+	}
+	mtx_unlock(&lock);
+
+	cma_deref_dev(cma_dev);
+	mtx_lock(&cma_dev->lock);
+	PANIC_IF(cma_dev->refcount < 0);
+	if (cma_dev->refcount)
+		cv_wait(&cma_dev->comp, &cma_dev->lock);
+	mtx_unlock(&cma_dev->lock);
+}
+
+static void cma_remove_one(struct ib_device *device)
+{
+	struct cma_device *cma_dev;
+
+	cma_dev = ib_get_client_data(device, &cma_client);
+	if (!cma_dev)
+		return;
+
+	mtx_lock(&lock);
+	TAILQ_REMOVE(&dev_list, cma_dev, list);
+	mtx_unlock(&lock);
+
+	cma_process_remove(cma_dev);
+	free(cma_dev, M_DEVBUF);
+}
+
+static int cma_init(void)
+{
+	int ret;
+
+	LIST_INIT(&listen_any_list);
+	TAILQ_INIT(&dev_list);
+	mtx_init(&lock, "cma_device list", NULL, MTX_DEF);
+
+	arc4rand(&next_port, sizeof next_port, 0);
+	next_port = ((unsigned int) next_port %
+		    (ipport_lastauto - ipport_firstauto)) +
+		    ipport_firstauto;
+	cma_wq = taskqueue_create("rdma_cm", M_NOWAIT, taskqueue_thread_enqueue,
+		&cma_wq);
+
+	if (!cma_wq)
+		return (ENOMEM);
+	
+	taskqueue_start_threads(&cma_wq, 1, PI_NET, "cma_wq thread"); 
+#ifdef IB_SUPPORTED
+	ib_sa_register_client(&sa_client);
+#endif
+	rdma_addr_register_client(&addr_client);
+
+	ret = ib_register_client(&cma_client);
+	if (ret)
+		goto err;
+	return 0;
+
+err:
+	rdma_addr_unregister_client(&addr_client);
+#ifdef IB_SUPPORTED
+	ib_sa_unregister_client(&sa_client);
+#endif
+	taskqueue_free(cma_wq);
+	return ret;
+}
+
+static void cma_cleanup(void)
+{
+	ib_unregister_client(&cma_client);
+	rdma_addr_unregister_client(&addr_client);
+#ifdef IB_SUPPORTED
+	ib_sa_unregister_client(&sa_client);
+#endif
+	taskqueue_free(cma_wq);
+	kvl_free(&sdp_ps);
+	kvl_free(&tcp_ps);
+	kvl_free(&udp_ps);
+	kvl_free(&ipoib_ps);
+}
+
+static int 
+cma_load(module_t mod, int cmd, void *arg)
+{
+        int err = 0;
+
+        switch (cmd) {
+        case MOD_LOAD:
+                printf("Loading rdma_cma.\n");
+                cma_init();
+                break;
+        case MOD_QUIESCE:
+                break;
+        case MOD_UNLOAD:
+                printf("Unloading rdma_cma.\n");
+		cma_cleanup();
+                break;
+        case MOD_SHUTDOWN:
+                break;
+        default:
+                err = EOPNOTSUPP;
+                break;
+        }
+
+        return (err);
+}
+
+static moduledata_t mod_data = {
+	"rdma_cma",
+	cma_load,
+	0
+};
+
+MODULE_VERSION(rdma_cma, 1);
+MODULE_DEPEND(rdma_cma, rdma_core, 1, 1, 1);
+MODULE_DEPEND(rdma_cma, rdma_addr, 1, 1, 1);
+MODULE_DEPEND(rdma_cma, rdma_iwcm, 1, 1, 1);
+DECLARE_MODULE(rdma_cma, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
diff --git a/sys/contrib/rdma/rdma_device.c b/sys/contrib/rdma/rdma_device.c
new file mode 100644
index 000000000000..53cf31fb28d9
--- /dev/null
+++ b/sys/contrib/rdma/rdma_device.c
@@ -0,0 +1,776 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: device.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/condvar.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/libkern.h>
+#include <sys/socket.h>
+#include <sys/module.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/priv.h>
+#include <sys/syslog.h>
+
+#include <contrib/rdma/core_priv.h>
+
+struct ib_client_data {
+	TAILQ_ENTRY(ib_client_data) list;
+	struct ib_client *client;
+	void *            data;
+};
+
+static TAILQ_HEAD(, ib_device) device_list;
+static TAILQ_HEAD(client_list_s, ib_client) client_list;
+
+/*
+ * device_mutex protects access to both device_list and client_list.
+ * There's no real point to using multiple locks or something fancier
+ * like an rwsem: we always access both lists, and we're always
+ * modifying one list or the other list.  In any case this is not a
+ * hot path so there's no point in trying to optimize.
+ */
+static struct mtx device_mutex;
+
+static int ib_device_check_mandatory(struct ib_device *device)
+{
+#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x }
+#define MANDATORY_TABLE_DEPTH 19
+	static const struct {
+		size_t offset;
+		char  *name;
+	} mandatory_table[] = {
+		IB_MANDATORY_FUNC(query_device),
+		IB_MANDATORY_FUNC(query_port),
+		IB_MANDATORY_FUNC(query_pkey),
+		IB_MANDATORY_FUNC(query_gid),
+		IB_MANDATORY_FUNC(alloc_pd),
+		IB_MANDATORY_FUNC(dealloc_pd),
+		IB_MANDATORY_FUNC(create_ah),
+		IB_MANDATORY_FUNC(destroy_ah),
+		IB_MANDATORY_FUNC(create_qp),
+		IB_MANDATORY_FUNC(modify_qp),
+		IB_MANDATORY_FUNC(destroy_qp),
+		IB_MANDATORY_FUNC(post_send),
+		IB_MANDATORY_FUNC(post_recv),
+		IB_MANDATORY_FUNC(create_cq),
+		IB_MANDATORY_FUNC(destroy_cq),
+		IB_MANDATORY_FUNC(poll_cq),
+		IB_MANDATORY_FUNC(req_notify_cq),
+		IB_MANDATORY_FUNC(get_dma_mr),
+		IB_MANDATORY_FUNC(dereg_mr)
+	};
+	int i;
+
+	for (i = 0; i < MANDATORY_TABLE_DEPTH; ++i) {
+		if (!*(void **) ((void *) ((unsigned long)device + mandatory_table[i].offset))) {
+			log(LOG_WARNING, "Device %s is missing mandatory function %s\n",
+			       device->name, mandatory_table[i].name);
+			return (EINVAL);
+		}
+	}
+
+	return 0;
+}
+
+static struct ib_device *__ib_device_get_by_name(const char *name)
+{
+	struct ib_device *device;
+
+	TAILQ_FOREACH(device, &device_list, core_list)
+		if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX))
+			return device;
+
+	return NULL;
+}
+
+
+static int alloc_name(char *name)
+{
+	long *inuse;
+	char buf[IB_DEVICE_NAME_MAX];
+	struct ib_device *device;
+	int i;
+
+	inuse = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+	if (!inuse)
+		return (ENOMEM);
+
+	TAILQ_FOREACH(device, &device_list, core_list) {
+		if (!sscanf(device->name, name, &i))
+			continue;
+		if (i < 0 || i >= PAGE_SIZE * 8)
+			continue;
+		snprintf(buf, sizeof buf, name, i);
+		if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX))
+			setbit(inuse, i);
+	}
+
+	i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
+	free(inuse, M_DEVBUF);
+	snprintf(buf, sizeof buf, name, i);
+
+	if (__ib_device_get_by_name(buf))
+		return (ENFILE);
+
+	strlcpy(name, buf, IB_DEVICE_NAME_MAX);
+	return 0;
+}
+
+static int start_port(struct ib_device *device)
+{
+	return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
+}
+
+
+static int end_port(struct ib_device *device)
+{
+	return (device->node_type == RDMA_NODE_IB_SWITCH) ?
+		0 : device->phys_port_cnt;
+}
+
+/**
+ * ib_alloc_device - allocate an IB device struct
+ * @size:size of structure to allocate
+ *
+ * Low-level drivers should use ib_alloc_device() to allocate &struct
+ * ib_device.  @size is the size of the structure to be allocated,
+ * including any private data used by the low-level driver.
+ * ib_dealloc_device() must be used to free structures allocated with
+ * ib_alloc_device().
+ */
+struct ib_device *ib_alloc_device(size_t size)
+{
+	void *dev;
+
+	if (size < sizeof (struct ib_device))
+		panic("size=%zd < sizeof (struct ib_device)=%zd)",
+		    size, sizeof (struct ib_device));
+	
+	dev = malloc(size, M_DEVBUF, M_NOWAIT);
+	if (dev)
+		bzero(dev, size);
+	return dev;
+}
+
+/**
+ * ib_dealloc_device - free an IB device struct
+ * @device:structure to free
+ *
+ * Free a structure allocated with ib_alloc_device().
+ */
+void ib_dealloc_device(struct ib_device *device)
+{
+	if (device->reg_state == IB_DEV_UNINITIALIZED) {
+		free(device, M_DEVBUF);
+		return;
+	}
+
+	if (device->reg_state != IB_DEV_UNREGISTERED)
+		panic("device->reg_state=%d != IB_DEV_UNREGISTERED)",
+		    device->reg_state);
+#ifdef notyet
+	ib_device_unregister_sysfs(device);
+#endif
+}
+
+static int add_client_context(struct ib_device *device, struct ib_client *client)
+{
+	struct ib_client_data *context;
+
+	context = malloc(sizeof *context, M_DEVBUF, M_NOWAIT);
+	if (!context) {
+		log(LOG_WARNING, "Couldn't allocate client context for %s/%s\n",
+		       device->name, client->name);
+		return (ENOMEM);
+	}
+
+	context->client = client;
+	context->data   = NULL;
+
+	mtx_lock(&device->client_data_lock);
+	TAILQ_INSERT_TAIL(&device->client_data_list, context, list);
+	mtx_unlock(&device->client_data_lock);
+
+	return 0;
+}
+
+static int read_port_table_lengths(struct ib_device *device)
+{
+	struct ib_port_attr *tprops = NULL;
+	int num_ports, ret = ENOMEM;
+	u8 port_index;
+
+	tprops = malloc(sizeof *tprops, M_DEVBUF, M_NOWAIT);
+	if (!tprops)
+		goto out;
+
+	num_ports = end_port(device) - start_port(device) + 1;
+
+	device->pkey_tbl_len = malloc(sizeof *device->pkey_tbl_len * num_ports,
+				       M_DEVBUF, M_NOWAIT);
+	device->gid_tbl_len = malloc(sizeof *device->gid_tbl_len * num_ports,
+				      M_DEVBUF, M_NOWAIT);
+	if (!device->pkey_tbl_len || !device->gid_tbl_len)
+		goto err;
+
+	for (port_index = 0; port_index < num_ports; ++port_index) {
+		ret = ib_query_port(device, port_index + start_port(device),
+					tprops);
+		if (ret)
+			goto err;
+		device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len;
+		device->gid_tbl_len[port_index]  = tprops->gid_tbl_len;
+	}
+
+	ret = 0;
+	goto out;
+
+err:
+	free(device->gid_tbl_len, M_DEVBUF);
+	free(device->pkey_tbl_len, M_DEVBUF);
+out:
+	free(tprops, M_DEVBUF);
+	return ret;
+}
+
+/**
+ * ib_register_device - Register an IB device with IB core
+ * @device:Device to register
+ *
+ * Low-level drivers use ib_register_device() to register their
+ * devices with the IB core.  All registered clients will receive a
+ * callback for each device that is added. @device must be allocated
+ * with ib_alloc_device().
+ */
+int ib_register_device(struct ib_device *device)
+{
+	int ret;
+
+	mtx_lock(&device_mutex);
+
+	if (strchr(device->name, '%')) {
+		ret = alloc_name(device->name);
+		if (ret)
+			goto out;
+	}
+
+	if (ib_device_check_mandatory(device)) {
+		ret = EINVAL;
+		goto out;
+	}
+
+	TAILQ_INIT(&device->event_handler_list);
+	TAILQ_INIT(&device->client_data_list);
+	mtx_init(&device->event_handler_lock, "ib event handler", NULL, 
+		 MTX_DUPOK|MTX_DEF);
+	mtx_init(&device->client_data_lock, "ib client data", NULL, 
+		 MTX_DUPOK|MTX_DEF);
+
+	ret = read_port_table_lengths(device);
+	if (ret) {
+		log(LOG_WARNING, "Couldn't create table lengths cache for device %s\n",
+		       device->name);
+		goto out;
+	}
+
+#ifdef notyet
+	ret = ib_device_register_sysfs(device);
+	if (ret) {
+		log(LOG_WARNING, "Couldn't register device %s with driver model\n",
+		       device->name);
+		free(device->gid_tbl_len, M_DEVBUF);
+		free(device->pkey_tbl_len, M_DEVBUF);
+		goto out;
+	}
+#endif
+
+	TAILQ_INSERT_TAIL(&device_list, device, core_list);
+
+	device->reg_state = IB_DEV_REGISTERED;
+
+	{
+		struct ib_client *client;
+
+		TAILQ_FOREACH(client, &client_list, list)
+			if (client->add && !add_client_context(device, client))
+				client->add(device);
+	}
+
+ out:
+	mtx_unlock(&device_mutex);
+	return ret;
+}
+
+/**
+ * ib_unregister_device - Unregister an IB device
+ * @device:Device to unregister
+ *
+ * Unregister an IB device.  All clients will receive a remove callback.
+ */
+void ib_unregister_device(struct ib_device *device)
+{
+	struct ib_client *client;
+	struct ib_client_data *context, *tmp;
+
+	mtx_lock(&device_mutex);
+
+	TAILQ_FOREACH_REVERSE(client, &client_list, client_list_s, list)
+		if (client->remove)
+			client->remove(device);
+
+	TAILQ_REMOVE(&device_list, device, core_list);
+
+	free(device->gid_tbl_len, M_DEVBUF);
+	free(device->pkey_tbl_len, M_DEVBUF);
+
+	mtx_unlock(&device_mutex);
+
+	mtx_lock(&device->client_data_lock);
+	TAILQ_FOREACH_SAFE(context, &device->client_data_list, list, tmp)
+		free(context, M_DEVBUF);
+	mtx_unlock(&device->client_data_lock);
+
+	device->reg_state = IB_DEV_UNREGISTERED;
+}
+
+/**
+ * ib_register_client - Register an IB client
+ * @client:Client to register
+ *
+ * Upper level users of the IB drivers can use ib_register_client() to
+ * register callbacks for IB device addition and removal.  When an IB
+ * device is added, each registered client's add method will be called
+ * (in the order the clients were registered), and when a device is
+ * removed, each client's remove method will be called (in the reverse
+ * order that clients were registered).  In addition, when
+ * ib_register_client() is called, the client will receive an add
+ * callback for all devices already registered.
+ */
+int ib_register_client(struct ib_client *client)
+{
+	struct ib_device *device;
+
+	mtx_lock(&device_mutex);
+
+	TAILQ_INSERT_TAIL(&client_list, client, list);
+	TAILQ_FOREACH(device, &device_list, core_list)
+		if (client->add && !add_client_context(device, client))
+			client->add(device);
+
+	mtx_unlock(&device_mutex);
+
+	return 0;
+}
+
+/**
+ * ib_unregister_client - Unregister an IB client
+ * @client:Client to unregister
+ *
+ * Upper level users use ib_unregister_client() to remove their client
+ * registration.  When ib_unregister_client() is called, the client
+ * will receive a remove callback for each IB device still registered.
+ */
+void ib_unregister_client(struct ib_client *client)
+{
+	struct ib_client_data *context, *tmp;
+	struct ib_device *device;
+
+	mtx_lock(&device_mutex);
+
+	TAILQ_FOREACH(device, &device_list, core_list) {
+		if (client->remove)
+			client->remove(device);
+
+		mtx_lock(&device->client_data_lock);
+		TAILQ_FOREACH_SAFE(context, &device->client_data_list, list,tmp)
+			if (context->client == client) {
+				TAILQ_REMOVE(&device->client_data_list, context,
+					list);
+				free(context, M_DEVBUF);
+			}
+		mtx_unlock(&device->client_data_lock);
+	}
+	TAILQ_REMOVE(&client_list, client, list);
+
+	mtx_unlock(&device_mutex);
+}
+
+/**
+ * ib_get_client_data - Get IB client context
+ * @device:Device to get context for
+ * @client:Client to get context for
+ *
+ * ib_get_client_data() returns client context set with
+ * ib_set_client_data().
+ */
+void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
+{
+	struct ib_client_data *context;
+	void *ret = NULL;
+
+	mtx_lock(&device->client_data_lock);
+	TAILQ_FOREACH(context, &device->client_data_list, list)
+		if (context->client == client) {
+			ret = context->data;
+			break;
+		}
+	mtx_unlock(&device->client_data_lock);
+
+	return ret;
+}
+
+/**
+ * ib_set_client_data - Set IB client context
+ * @device:Device to set context for
+ * @client:Client to set context for
+ * @data:Context to set
+ *
+ * ib_set_client_data() sets client context that can be retrieved with
+ * ib_get_client_data().
+ */
+void ib_set_client_data(struct ib_device *device, struct ib_client *client,
+			void *data)
+{
+	struct ib_client_data *context;
+
+	mtx_lock(&device->client_data_lock);
+	TAILQ_FOREACH(context, &device->client_data_list, list)
+		if (context->client == client) {
+			context->data = data;
+			goto out;
+		}
+
+	log(LOG_WARNING, "No client context found for %s/%s\n",
+	       device->name, client->name);
+
+out:
+	mtx_unlock(&device->client_data_lock);
+}
+
+/**
+ * ib_register_event_handler - Register an IB event handler
+ * @event_handler:Handler to register
+ *
+ * ib_register_event_handler() registers an event handler that will be
+ * called back when asynchronous IB events occur (as defined in
+ * chapter 11 of the InfiniBand Architecture Specification).  This
+ * callback may occur in interrupt context.
+ */
+int ib_register_event_handler  (struct ib_event_handler *event_handler)
+{
+	mtx_lock(&event_handler->device->event_handler_lock);
+	TAILQ_INSERT_TAIL(&event_handler->device->event_handler_list, 
+		event_handler, list);
+	mtx_unlock(&event_handler->device->event_handler_lock);
+
+	return 0;
+}
+
+/**
+ * ib_unregister_event_handler - Unregister an event handler
+ * @event_handler:Handler to unregister
+ *
+ * Unregister an event handler registered with
+ * ib_register_event_handler().
+ */
+int ib_unregister_event_handler(struct ib_event_handler *event_handler)
+{
+	mtx_lock(&event_handler->device->event_handler_lock);
+	TAILQ_REMOVE(&event_handler->device->event_handler_list, event_handler,
+		list);
+	mtx_unlock(&event_handler->device->event_handler_lock);
+
+	return 0;
+}
+
+/**
+ * ib_dispatch_event - Dispatch an asynchronous event
+ * @event:Event to dispatch
+ *
+ * Low-level drivers must call ib_dispatch_event() to dispatch the
+ * event to all registered event handlers when an asynchronous event
+ * occurs.
+ */
+void ib_dispatch_event(struct ib_event *event)
+{
+	struct ib_event_handler *handler;
+
+	mtx_lock(&event->device->event_handler_lock);
+
+	TAILQ_FOREACH(handler, &event->device->event_handler_list, list)
+		handler->handler(handler, event);
+
+	mtx_unlock(&event->device->event_handler_lock);
+}
+
+/**
+ * ib_query_device - Query IB device attributes
+ * @device:Device to query
+ * @device_attr:Device attributes
+ *
+ * ib_query_device() returns the attributes of a device through the
+ * @device_attr pointer.
+ */
+int ib_query_device(struct ib_device *device,
+		    struct ib_device_attr *device_attr)
+{
+	return device->query_device(device, device_attr);
+}
+
+/**
+ * ib_query_port - Query IB port attributes
+ * @device:Device to query
+ * @port_num:Port number to query
+ * @port_attr:Port attributes
+ *
+ * ib_query_port() returns the attributes of a port through the
+ * @port_attr pointer.
+ */
+int ib_query_port(struct ib_device *device,
+		  u8 port_num,
+		  struct ib_port_attr *port_attr)
+{
+	if (port_num < start_port(device) || port_num > end_port(device))
+		return (EINVAL);
+
+	return device->query_port(device, port_num, port_attr);
+}
+
+/**
+ * ib_query_gid - Get GID table entry
+ * @device:Device to query
+ * @port_num:Port number to query
+ * @index:GID table index to query
+ * @gid:Returned GID
+ *
+ * ib_query_gid() fetches the specified GID table entry.
+ */
+int ib_query_gid(struct ib_device *device,
+		 u8 port_num, int index, union ib_gid *gid)
+{
+	return device->query_gid(device, port_num, index, gid);
+}
+
+/**
+ * ib_query_pkey - Get P_Key table entry
+ * @device:Device to query
+ * @port_num:Port number to query
+ * @index:P_Key table index to query
+ * @pkey:Returned P_Key
+ *
+ * ib_query_pkey() fetches the specified P_Key table entry.
+ */
+int ib_query_pkey(struct ib_device *device,
+		  u8 port_num, u16 index, u16 *pkey)
+{
+	return device->query_pkey(device, port_num, index, pkey);
+}
+
+/**
+ * ib_modify_device - Change IB device attributes
+ * @device:Device to modify
+ * @device_modify_mask:Mask of attributes to change
+ * @device_modify:New attribute values
+ *
+ * ib_modify_device() changes a device's attributes as specified by
+ * the @device_modify_mask and @device_modify structure.
+ */
+int ib_modify_device(struct ib_device *device,
+		     int device_modify_mask,
+		     struct ib_device_modify *device_modify)
+{
+	return device->modify_device(device, device_modify_mask,
+				     device_modify);
+}
+
+/**
+ * ib_modify_port - Modifies the attributes for the specified port.
+ * @device: The device to modify.
+ * @port_num: The number of the port to modify.
+ * @port_modify_mask: Mask used to specify which attributes of the port
+ *   to change.
+ * @port_modify: New attribute values for the port.
+ *
+ * ib_modify_port() changes a port's attributes as specified by the
+ * @port_modify_mask and @port_modify structure.
+ */
+int ib_modify_port(struct ib_device *device,
+		   u8 port_num, int port_modify_mask,
+		   struct ib_port_modify *port_modify)
+{
+	if (port_num < start_port(device) || port_num > end_port(device))
+		return (EINVAL);
+
+	return device->modify_port(device, port_num, port_modify_mask,
+				   port_modify);
+}
+
+/**
+ * ib_find_gid - Returns the port number and GID table index where
+ *   a specified GID value occurs.
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @port_num: The port number of the device where the GID value was found.
+ * @index: The index into the GID table where the GID was found.  This
+ *   parameter may be NULL.
+ */
+int ib_find_gid(struct ib_device *device, union ib_gid *gid,
+		u8 *port_num, u16 *index)
+{
+	union ib_gid tmp_gid;
+	int ret, port, i;
+
+	for (port = start_port(device); port <= end_port(device); ++port) {
+		for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) {
+			ret = ib_query_gid(device, port, i, &tmp_gid);
+			if (ret)
+				return ret;
+			if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
+				*port_num = port;
+				if (index)
+					*index = i;
+				return 0;
+			}
+		}
+	}
+
+	return (ENOENT);
+}
+
+/**
+ * ib_find_pkey - Returns the PKey table index where a specified
+ *   PKey value occurs.
+ * @device: The device to query.
+ * @port_num: The port number of the device to search for the PKey.
+ * @pkey: The PKey value to search for.
+ * @index: The index into the PKey table where the PKey was found.
+ */
+int ib_find_pkey(struct ib_device *device,
+		 u8 port_num, u16 pkey, u16 *index)
+{
+	int ret, i;
+	u16 tmp_pkey;
+
+	for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) {
+		ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
+		if (ret)
+			return ret;
+
+		if (pkey == tmp_pkey) {
+			*index = i;
+			return 0;
+		}
+	}
+
+	return (ENOENT);
+}
+
+static int rdma_core_init(void)
+{
+	int ret;
+#ifdef notyet
+	ret = ib_sysfs_setup();
+	if (ret)
+		log(LOG_WARNING, "Couldn't create InfiniBand device class\n");
+#endif
+
+	mtx_init(&device_mutex, "rdma_device mutex", NULL, MTX_DEF);
+	TAILQ_INIT(&client_list);
+	TAILQ_INIT(&device_list);
+	ret = ib_cache_setup();
+	if (ret) {
+		log(LOG_WARNING, "Couldn't set up InfiniBand P_Key/GID cache\n");
+#ifdef notyet
+		ib_sysfs_cleanup();
+#endif
+	}
+
+	return ret;
+}
+
+static void rdma_core_cleanup(void)
+{
+	ib_cache_cleanup();
+#ifdef notyet
+	ib_sysfs_cleanup();
+	/* Make sure that any pending umem accounting work is done. */
+	flush_scheduled_work();
+#endif
+}
+
+static int 
+rdma_core_load(module_t mod, int cmd, void *arg)
+{
+        int err = 0;
+
+        switch (cmd) {
+        case MOD_LOAD:
+                printf("Loading rdma_core.\n");
+                rdma_core_init();
+                break;
+        case MOD_QUIESCE:
+                break;
+        case MOD_UNLOAD:
+                printf("Unloading rdma_core.\n");
+		rdma_core_cleanup();
+                break;
+        case MOD_SHUTDOWN:
+                break;
+        default:
+                err = EOPNOTSUPP;
+                break;
+        }
+
+        return (err);
+}
+
+static moduledata_t mod_data = {
+	"rdma_core",
+	rdma_core_load,
+	0
+};
+
+MODULE_VERSION(rdma_core, 1);
+DECLARE_MODULE(rdma_core, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
diff --git a/sys/contrib/rdma/rdma_iwcm.c b/sys/contrib/rdma/rdma_iwcm.c
new file mode 100644
index 000000000000..916abcd2dfd9
--- /dev/null
+++ b/sys/contrib/rdma/rdma_iwcm.c
@@ -0,0 +1,1086 @@
+/*
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/libkern.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/module.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/priv.h>
+#include <sys/syslog.h>
+#include <sys/malloc.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+
+#include <contrib/rdma/iw_cm.h>
+
+enum iw_cm_state {
+	IW_CM_STATE_IDLE,             /* unbound, inactive */
+	IW_CM_STATE_LISTEN,           /* listen waiting for connect */
+	IW_CM_STATE_CONN_RECV,        /* inbound waiting for user accept */
+	IW_CM_STATE_CONN_SENT,        /* outbound waiting for peer accept */
+	IW_CM_STATE_ESTABLISHED,      /* established */
+	IW_CM_STATE_CLOSING,	      /* disconnect */
+	IW_CM_STATE_DESTROYING        /* object being deleted */
+};
+
+struct iwcm_id_private {
+	struct iw_cm_id	id;
+	enum iw_cm_state state;
+	unsigned long flags;
+	struct ib_qp *qp;
+	void * destroy_comp;
+	void * connect_wait;
+	TAILQ_HEAD(, iwcm_work) work_list;
+	struct mtx lock;
+	volatile int refcount;
+	TAILQ_HEAD(, iwcm_work) work_free_list;
+};
+
+#define IWCM_F_CALLBACK_DESTROY   1
+#define IWCM_F_CONNECT_WAIT       2
+
+static struct taskqueue *iwcm_wq;
+struct iwcm_work {
+	struct task task;
+	struct iwcm_id_private *cm_id;
+	TAILQ_ENTRY(iwcm_work) list;
+	struct iw_cm_event event;
+	TAILQ_ENTRY(iwcm_work) free_list;
+};
+
+/*
+ * The following services provide a mechanism for pre-allocating iwcm_work
+ * elements.  The design pre-allocates them  based on the cm_id type:
+ *	LISTENING IDS: 	Get enough elements preallocated to handle the
+ *			listen backlog.
+ *	ACTIVE IDS:	4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE
+ *	PASSIVE IDS:	3: ESTABLISHED, DISCONNECT, CLOSE
+ *
+ * Allocating them in connect and listen avoids having to deal
+ * with allocation failures on the event upcall from the provider (which
+ * is called in the interrupt context).
+ *
+ * One exception is when creating the cm_id for incoming connection requests.
+ * There are two cases:
+ * 1) in the event upcall, cm_event_handler(), for a listening cm_id.  If
+ *    the backlog is exceeded, then no more connection request events will
+ *    be processed.  cm_event_handler() returns ENOMEM in this case.  Its up
+ *    to the provider to reject the connection request.
+ * 2) in the connection request workqueue handler, cm_conn_req_handler().
+ *    If work elements cannot be allocated for the new connect request cm_id,
+ *    then IWCM will call the provider reject method.  This is ok since
+ *    cm_conn_req_handler() runs in the workqueue thread context.
+ */
+
+static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
+{
+	struct iwcm_work *work;
+
+	if (TAILQ_EMPTY(&cm_id_priv->work_free_list))
+		return NULL;
+	work = TAILQ_FIRST(&cm_id_priv->work_free_list);
+	TAILQ_REMOVE(&cm_id_priv->work_free_list, work, free_list);
+	return work;
+}
+
+static void put_work(struct iwcm_work *work)
+{
+	TAILQ_INSERT_HEAD(&work->cm_id->work_free_list, work, free_list);
+}
+
+static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
+{
+	struct iwcm_work *e, *tmp;
+
+	TAILQ_FOREACH_SAFE(e, &cm_id_priv->work_free_list, free_list, tmp)
+		free(e, M_DEVBUF);
+}
+
+static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
+{
+	struct iwcm_work *work;
+
+	PANIC_IF(!TAILQ_EMPTY(&cm_id_priv->work_free_list));
+	while (count--) {
+		work = malloc(sizeof(struct iwcm_work), M_DEVBUF, M_NOWAIT);
+		if (!work) {
+			dealloc_work_entries(cm_id_priv);
+			return (ENOMEM);
+		}
+		work->cm_id = cm_id_priv;
+		put_work(work);
+	}
+	return 0;
+}
+
+/*
+ * Save private data from incoming connection requests to
+ * iw_cm_event, so the low level driver doesn't have to. Adjust
+ * the event ptr to point to the local copy.
+ */
+static int copy_private_data(struct iw_cm_event *event)
+{
+	void *p;
+
+	p = malloc(event->private_data_len, M_DEVBUF, M_NOWAIT);
+	if (!p)
+		return (ENOMEM);
+	bcopy(event->private_data, p, event->private_data_len);
+	event->private_data = p;
+	return 0;
+}
+
+static void free_cm_id(struct iwcm_id_private *cm_id_priv)
+{
+	dealloc_work_entries(cm_id_priv);
+	free(cm_id_priv, M_DEVBUF);
+}
+
+/*
+ * Release a reference on cm_id. If the last reference is being
+ * released, enable the waiting thread (in iw_destroy_cm_id) to
+ * get woken up, and return 1 if a thread is already waiting.
+ */
+static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
+{
+	mtx_lock(&cm_id_priv->lock);
+	PANIC_IF(atomic_load_acq_int(&cm_id_priv->refcount)==0);
+	if (atomic_fetchadd_int(&cm_id_priv->refcount, -1) == 1) {
+		PANIC_IF(!TAILQ_EMPTY(&cm_id_priv->work_list));
+		wakeup(&cm_id_priv->destroy_comp);
+		mtx_unlock(&cm_id_priv->lock);
+		return 1;
+	}
+	mtx_unlock(&cm_id_priv->lock);
+
+	return 0;
+}
+
+static void add_ref(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	mtx_lock(&cm_id_priv->lock);
+	atomic_add_int(&cm_id_priv->refcount, 1);
+	mtx_unlock(&cm_id_priv->lock);
+}
+
+static void rem_ref(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	if (iwcm_deref_id(cm_id_priv) &&
+	    isset(&cm_id_priv->flags, IWCM_F_CALLBACK_DESTROY)) {
+		PANIC_IF(!TAILQ_EMPTY(&cm_id_priv->work_list));
+		free_cm_id(cm_id_priv);
+	}
+}
+
+static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
+
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+				 struct socket *so,
+				 iw_cm_handler cm_handler,
+				 void *context)
+{
+	struct iwcm_id_private *cm_id_priv;
+
+	KASSERT(so, ("iw_create_cm_id called with NULL socket!"));
+	cm_id_priv = malloc(sizeof(*cm_id_priv), M_DEVBUF, M_NOWAIT);
+	if (!cm_id_priv)
+		return ERR_PTR(ENOMEM);
+	bzero(cm_id_priv, sizeof *cm_id_priv);
+
+	cm_id_priv->state = IW_CM_STATE_IDLE;
+	cm_id_priv->id.device = device;
+	cm_id_priv->id.cm_handler = cm_handler;
+	cm_id_priv->id.context = context;
+	cm_id_priv->id.event_handler = cm_event_handler;
+	cm_id_priv->id.add_ref = add_ref;
+	cm_id_priv->id.rem_ref = rem_ref;
+	cm_id_priv->id.so = so;
+	mtx_init(&cm_id_priv->lock, "cm_id_priv", NULL, MTX_DUPOK|MTX_DEF);
+	atomic_store_rel_int(&cm_id_priv->refcount, 1);
+	TAILQ_INIT(&cm_id_priv->work_list);
+	TAILQ_INIT(&cm_id_priv->work_free_list);
+
+	return &cm_id_priv->id;
+}
+
+
+static int iwcm_modify_qp_err(struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+
+	if (!qp)
+		return (EINVAL);
+
+	qp_attr.qp_state = IB_QPS_ERR;
+	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+}
+
+/*
+ * This is really the RDMAC CLOSING state. It is most similar to the
+ * IB SQD QP state.
+ */
+static int iwcm_modify_qp_sqd(struct ib_qp *qp)
+{
+	struct ib_qp_attr qp_attr;
+
+	PANIC_IF(qp == NULL);
+	qp_attr.qp_state = IB_QPS_SQD;
+	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+}
+
+/*
+ * CM_ID <-- CLOSING
+ *
+ * Block if a passive or active connection is currently being processed. Then
+ * process the event as follows:
+ * - If we are ESTABLISHED, move to CLOSING and modify the QP state
+ *   based on the abrupt flag
+ * - If the connection is already in the CLOSING or IDLE state, the peer is
+ *   disconnecting concurrently with us and we've already seen the
+ *   DISCONNECT event -- ignore the request and return 0
+ * - Disconnect on a listening endpoint returns EINVAL
+ */
+int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
+{
+	struct iwcm_id_private *cm_id_priv;
+	int ret = 0;
+	struct ib_qp *qp = NULL;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	/* Wait if we're currently in a connect or accept downcall */
+	mtx_lock(&cm_id_priv->lock);
+	if (isset(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT))
+		msleep(&cm_id_priv->connect_wait, &cm_id_priv->lock, 0, "iwcm connect1", 0);
+
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_ESTABLISHED:
+		cm_id_priv->state = IW_CM_STATE_CLOSING;
+
+		/* QP could be <nul> for user-mode client */
+		if (cm_id_priv->qp)
+			qp = cm_id_priv->qp;
+		else
+			ret = EINVAL;
+		break;
+	case IW_CM_STATE_LISTEN:
+		ret = EINVAL;
+		break;
+	case IW_CM_STATE_CLOSING:
+		/* remote peer closed first */
+	case IW_CM_STATE_IDLE:
+		/* accept or connect returned !0 */
+		break;
+	case IW_CM_STATE_CONN_RECV:
+		/*
+		 * App called disconnect before/without calling accept after
+		 * connect_request event delivered.
+		 */
+		break;
+	case IW_CM_STATE_CONN_SENT:
+		/* Can only get here if wait above fails */
+	default:
+		panic("just cuz");
+	}
+	mtx_unlock(&cm_id_priv->lock);
+
+	if (qp) {
+		if (abrupt)
+			ret = iwcm_modify_qp_err(qp);
+		else
+			ret = iwcm_modify_qp_sqd(qp);
+
+		/*
+		 * If both sides are disconnecting the QP could
+		 * already be in ERR or SQD states
+		 */
+		ret = 0;
+	}
+
+	return ret;
+}
+
+/*
+ * CM_ID <-- DESTROYING
+ *
+ * Clean up all resources associated with the connection and release
+ * the initial reference taken by iw_create_cm_id.
+ */
+static void destroy_cm_id(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	/*
+	 * Wait if we're currently in a connect or accept downcall. A
+	 * listening endpoint should never block here.
+	 */
+	mtx_lock(&cm_id_priv->lock);
+	if (isset(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT))
+		msleep(&cm_id_priv->connect_wait, &cm_id_priv->lock, 0, "iwcm connect2", 0);
+
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_LISTEN:
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		mtx_unlock(&cm_id_priv->lock);
+		/* destroy the listening endpoint */
+		ret = cm_id->device->iwcm->destroy_listen(cm_id);
+		mtx_lock(&cm_id_priv->lock);
+		break;
+	case IW_CM_STATE_ESTABLISHED:
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		mtx_unlock(&cm_id_priv->lock);
+		/* Abrupt close of the connection */
+		(void)iwcm_modify_qp_err(cm_id_priv->qp);
+		mtx_lock(&cm_id_priv->lock);
+		break;
+	case IW_CM_STATE_IDLE:
+	case IW_CM_STATE_CLOSING:
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		break;
+	case IW_CM_STATE_CONN_RECV:
+		/*
+		 * App called destroy before/without calling accept after
+		 * receiving connection request event notification or
+		 * returned non zero from the event callback function.
+		 * In either case, must tell the provider to reject.
+		 */
+		cm_id_priv->state = IW_CM_STATE_DESTROYING;
+		break;
+	case IW_CM_STATE_CONN_SENT:
+	case IW_CM_STATE_DESTROYING:
+	default:
+		panic("just cuz");
+		break;
+	}
+	if (cm_id_priv->qp) {
+		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+		cm_id_priv->qp = NULL;
+	}
+	mtx_unlock(&cm_id_priv->lock);
+
+	(void)iwcm_deref_id(cm_id_priv);
+}
+
+/*
+ * This function is only called by the application thread and cannot
+ * be called by the event thread. The function will wait for all
+ * references to be released on the cm_id and then free the cm_id
+ * object.
+ */
+void iw_destroy_cm_id(struct iw_cm_id *cm_id)
+{
+	struct iwcm_id_private *cm_id_priv;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	PANIC_IF(isset(&cm_id_priv->flags, IWCM_F_CALLBACK_DESTROY));
+
+	destroy_cm_id(cm_id);
+
+	mtx_lock(&cm_id_priv->lock);
+	if (atomic_load_acq_int(&cm_id_priv->refcount))
+		msleep(&cm_id_priv->destroy_comp, &cm_id_priv->lock, 0, "iwcm destroy", 0);
+	mtx_unlock(&cm_id_priv->lock);
+
+	free_cm_id(cm_id_priv);
+}
+
+/*
+ * CM_ID <-- LISTEN
+ *
+ * Start listening for connect requests. Generates one CONNECT_REQUEST
+ * event for each inbound connect request.
+ */
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
+{
+	struct iwcm_id_private *cm_id_priv;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+	ret = alloc_work_entries(cm_id_priv, backlog);
+	if (ret)
+		return ret;
+
+	mtx_lock(&cm_id_priv->lock);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_IDLE:
+		cm_id_priv->state = IW_CM_STATE_LISTEN;
+		mtx_unlock(&cm_id_priv->lock);
+		ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
+		if (ret)
+			cm_id_priv->state = IW_CM_STATE_IDLE;
+		mtx_lock(&cm_id_priv->lock);
+		break;
+	default:
+		ret = EINVAL;
+	}
+	mtx_unlock(&cm_id_priv->lock);
+
+	return ret;
+}
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * Rejects an inbound connection request. No events are generated.
+ */
+int iw_cm_reject(struct iw_cm_id *cm_id,
+		 const void *private_data,
+		 u8 private_data_len)
+{
+	struct iwcm_id_private *cm_id_priv;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	setbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT);
+
+	mtx_lock(&cm_id_priv->lock);
+	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+		clrbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT);
+		wakeup(&cm_id_priv->connect_wait);
+		mtx_unlock(&cm_id_priv->lock);
+		return (EINVAL);
+	}
+	cm_id_priv->state = IW_CM_STATE_IDLE;
+	mtx_unlock(&cm_id_priv->lock);
+
+	ret = cm_id->device->iwcm->reject(cm_id, private_data,
+					  private_data_len);
+
+	mtx_lock(&cm_id_priv->lock);
+	clrbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT);
+	wakeup(&cm_id_priv->connect_wait);
+	mtx_unlock(&cm_id_priv->lock);
+
+	return ret;
+}
+
+/*
+ * CM_ID <-- ESTABLISHED
+ *
+ * Accepts an inbound connection request and generates an ESTABLISHED
+ * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
+ * until the ESTABLISHED event is received from the provider.
+ */
+int iw_cm_accept(struct iw_cm_id *cm_id,
+		 struct iw_cm_conn_param *iw_param)
+{
+	struct iwcm_id_private *cm_id_priv;
+	struct ib_qp *qp;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	setbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT);
+
+	mtx_lock(&cm_id_priv->lock);
+	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+		clrbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT);
+		wakeup(&cm_id_priv->connect_wait);
+		mtx_unlock(&cm_id_priv->lock);
+		
+		return (EINVAL);
+	}
+	/* Get the ib_qp given the QPN */
+	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+	if (!qp) {
+		mtx_unlock(&cm_id_priv->lock);
+		return (EINVAL);
+	}
+	cm_id->device->iwcm->add_ref(qp);
+	cm_id_priv->qp = qp;
+	mtx_unlock(&cm_id_priv->lock);
+
+	ret = cm_id->device->iwcm->accept(cm_id, iw_param);
+	if (ret) {
+		/* An error on accept precludes provider events */
+		PANIC_IF(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+		mtx_lock(&cm_id_priv->lock);
+		if (cm_id_priv->qp) {
+			cm_id->device->iwcm->rem_ref(qp);
+			cm_id_priv->qp = NULL;
+		}
+		clrbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT);
+		wakeup(&cm_id_priv->connect_wait);
+		mtx_unlock(&cm_id_priv->lock);
+	}
+
+	return ret;
+}
+
+/*
+ * Active Side: CM_ID <-- CONN_SENT
+ *
+ * If successful, results in the generation of a CONNECT_REPLY
+ * event. iw_cm_disconnect and iw_cm_destroy will block until the
+ * CONNECT_REPLY event is received from the provider.
+ */
+int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+	struct iwcm_id_private *cm_id_priv;
+	int ret;
+	struct ib_qp *qp;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+	ret = alloc_work_entries(cm_id_priv, 4);
+	if (ret)
+		return ret;
+
+	setbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT);
+	mtx_lock(&cm_id_priv->lock);
+
+	if (cm_id_priv->state != IW_CM_STATE_IDLE) {
+		clrbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT); 
+		wakeup(&cm_id_priv->connect_wait);
+		mtx_unlock(&cm_id_priv->lock);
+		
+		return (EINVAL);
+	}
+
+	/* Get the ib_qp given the QPN */
+	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+	if (!qp) {
+		mtx_unlock(&cm_id_priv->lock);
+		return (EINVAL);
+	}
+	cm_id->device->iwcm->add_ref(qp);
+	cm_id_priv->qp = qp;
+	cm_id_priv->state = IW_CM_STATE_CONN_SENT;
+	mtx_unlock(&cm_id_priv->lock);
+
+	ret = cm_id->device->iwcm->connect(cm_id, iw_param);
+	if (ret) {
+		mtx_lock(&cm_id_priv->lock);
+		if (cm_id_priv->qp) {
+			cm_id->device->iwcm->rem_ref(qp);
+			cm_id_priv->qp = NULL;
+		}
+		PANIC_IF(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+		clrbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT);
+		wakeup(&cm_id_priv->connect_wait);
+		mtx_unlock(&cm_id_priv->lock);
+		
+	}
+
+	return ret;
+}
+
+/*
+ * Passive Side: new CM_ID <-- CONN_RECV
+ *
+ * Handles an inbound connect request. The function creates a new
+ * iw_cm_id to represent the new connection and inherits the client
+ * callback function and other attributes from the listening parent.
+ *
+ * The work item contains a pointer to the listen_cm_id and the event. The
+ * listen_cm_id contains the client cm_handler, context and
+ * device. These are copied when the device is cloned. The event
+ * contains the new four tuple.
+ *
+ * An error on the child should not affect the parent, so this
+ * function does not return a value.
+ */
+static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
+				struct iw_cm_event *iw_event)
+{
+	struct iw_cm_id *cm_id;
+	struct iwcm_id_private *cm_id_priv;
+	int ret;
+
+	/*
+	 * The provider should never generate a connection request
+	 * event with a bad status.
+	 */
+	PANIC_IF(iw_event->status);
+
+	/*
+	 * We could be destroying the listening id. If so, ignore this
+	 * upcall.
+	 */
+	mtx_lock(&listen_id_priv->lock);
+	if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
+		mtx_unlock(&listen_id_priv->lock);
+		goto out;
+	}
+	mtx_unlock(&listen_id_priv->lock);
+
+	cm_id = iw_create_cm_id(listen_id_priv->id.device,
+				iw_event->so,
+				listen_id_priv->id.cm_handler,
+				listen_id_priv->id.context);
+	/* If the cm_id could not be created, ignore the request */
+	if (IS_ERR(cm_id))
+		goto out;
+
+	cm_id->provider_data = iw_event->provider_data;
+	cm_id->local_addr = iw_event->local_addr;
+	cm_id->remote_addr = iw_event->remote_addr;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	cm_id_priv->state = IW_CM_STATE_CONN_RECV;
+
+	ret = alloc_work_entries(cm_id_priv, 3);
+	if (ret) {
+		iw_cm_reject(cm_id, NULL, 0);
+		iw_destroy_cm_id(cm_id);
+		goto out;
+	}
+
+	/* Call the client CM handler */
+	ret = cm_id->cm_handler(cm_id, iw_event);
+	if (ret) {
+		iw_cm_reject(cm_id, NULL, 0);
+		setbit(&cm_id_priv->flags, IWCM_F_CALLBACK_DESTROY);
+		
+		destroy_cm_id(cm_id);
+		if (atomic_load_acq_int(&cm_id_priv->refcount)==0)
+			free_cm_id(cm_id_priv);
+	}
+
+out:
+	if (iw_event->private_data_len)
+		free(iw_event->private_data, M_DEVBUF);
+}
+
+/*
+ * Passive Side: CM_ID <-- ESTABLISHED
+ *
+ * The provider generated an ESTABLISHED event which means that
+ * the MPA negotion has completed successfully and we are now in MPA
+ * FPDU mode.
+ *
+ * This event can only be received in the CONN_RECV state. If the
+ * remote peer closed, the ESTABLISHED event would be received followed
+ * by the CLOSE event. If the app closes, it will block until we wake
+ * it up after processing this event.
+ */
+static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
+			       struct iw_cm_event *iw_event)
+{
+	int ret;
+
+	mtx_lock(&cm_id_priv->lock);
+
+	/*
+	 * We clear the CONNECT_WAIT bit here to allow the callback
+	 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
+	 * from a callback handler is not allowed.
+	 */
+	clrbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT);
+	PANIC_IF(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+	cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+	wakeup(&cm_id_priv->connect_wait);
+	mtx_unlock(&cm_id_priv->lock);
+
+	return ret;
+}
+
+/*
+ * Active Side: CM_ID <-- ESTABLISHED
+ *
+ * The app has called connect and is waiting for the established event to
+ * post it's requests to the server. This event will wake up anyone
+ * blocked in iw_cm_disconnect or iw_destroy_id.
+ */
+static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
+			       struct iw_cm_event *iw_event)
+{
+	int ret;
+
+	mtx_lock(&cm_id_priv->lock);
+	/*
+	 * Clear the connect wait bit so a callback function calling
+	 * iw_cm_disconnect will not wait and deadlock this thread
+	 */
+	clrbit(&cm_id_priv->flags, IWCM_F_CONNECT_WAIT);
+	PANIC_IF(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+	if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) {
+		cm_id_priv->id.local_addr = iw_event->local_addr;
+		cm_id_priv->id.remote_addr = iw_event->remote_addr;
+		cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+	} else {
+		/* REJECTED or RESET */
+		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+		cm_id_priv->qp = NULL;
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+	}
+	mtx_unlock(&cm_id_priv->lock);
+	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+
+	mtx_lock(&cm_id_priv->lock);
+	if (iw_event->private_data_len)
+		free(iw_event->private_data, M_DEVBUF);
+
+	/* Wake up waiters on connect complete */
+	wakeup(&cm_id_priv->connect_wait);
+	mtx_unlock(&cm_id_priv->lock);
+
+	return ret;
+}
+
+/*
+ * CM_ID <-- CLOSING
+ *
+ * If in the ESTABLISHED state, move to CLOSING.
+ */
+static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
+				  struct iw_cm_event *iw_event)
+{
+
+	mtx_lock(&cm_id_priv->lock);
+	if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
+		cm_id_priv->state = IW_CM_STATE_CLOSING;
+	mtx_unlock(&cm_id_priv->lock);
+}
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * If in the ESTBLISHED or CLOSING states, the QP will have have been
+ * moved by the provider to the ERR state. Disassociate the CM_ID from
+ * the QP,  move to IDLE, and remove the 'connected' reference.
+ *
+ * If in some other state, the cm_id was destroyed asynchronously.
+ * This is the last reference that will result in waking up
+ * the app thread blocked in iw_destroy_cm_id.
+ */
+static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
+				  struct iw_cm_event *iw_event)
+{
+	int ret = 0;
+	mtx_lock(&cm_id_priv->lock);
+
+	if (cm_id_priv->qp) {
+		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+		cm_id_priv->qp = NULL;
+	}
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_ESTABLISHED:
+	case IW_CM_STATE_CLOSING:
+		cm_id_priv->state = IW_CM_STATE_IDLE;
+		mtx_unlock(&cm_id_priv->lock);
+		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+		mtx_lock(&cm_id_priv->lock);
+		break;
+	case IW_CM_STATE_DESTROYING:
+		break;
+	default:
+		panic("just cuz");
+	}
+	mtx_unlock(&cm_id_priv->lock);
+
+	return ret;
+}
+
+static int process_event(struct iwcm_id_private *cm_id_priv,
+			 struct iw_cm_event *iw_event)
+{
+	int ret = 0;
+
+	switch (iw_event->event) {
+	case IW_CM_EVENT_CONNECT_REQUEST:
+		cm_conn_req_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_CONNECT_REPLY:
+		ret = cm_conn_rep_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_ESTABLISHED:
+		ret = cm_conn_est_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_DISCONNECT:
+		cm_disconnect_handler(cm_id_priv, iw_event);
+		break;
+	case IW_CM_EVENT_CLOSE:
+		ret = cm_close_handler(cm_id_priv, iw_event);
+		break;
+	default:
+		panic("just cuz");
+	}
+
+	return ret;
+}
+
+/*
+ * Process events on the work_list for the cm_id. If the callback
+ * function requests that the cm_id be deleted, a flag is set in the
+ * cm_id flags to indicate that when the last reference is
+ * removed, the cm_id is to be destroyed. This is necessary to
+ * distinguish between an object that will be destroyed by the app
+ * thread asleep on the destroy_comp list vs. an object destroyed
+ * here synchronously when the last reference is removed.
+ */
+static void cm_work_handler(void *context, int pending)
+{
+	struct iwcm_work *work = context;
+	struct iw_cm_event levent;
+	struct iwcm_id_private *cm_id_priv = work->cm_id;
+	int empty;
+	int ret = 0;
+
+	mtx_lock(&cm_id_priv->lock);
+	empty = TAILQ_EMPTY(&cm_id_priv->work_list);
+	while (!empty) {
+		work = TAILQ_FIRST(&cm_id_priv->work_list);
+		TAILQ_REMOVE(&cm_id_priv->work_list, work, list);
+		empty = TAILQ_EMPTY(&cm_id_priv->work_list);
+		levent = work->event;
+		put_work(work);
+		mtx_unlock(&cm_id_priv->lock);
+
+		ret = process_event(cm_id_priv, &levent);
+		if (ret) {
+			setbit(&cm_id_priv->flags, IWCM_F_CALLBACK_DESTROY);
+			destroy_cm_id(&cm_id_priv->id);
+		}
+		PANIC_IF(atomic_load_acq_int(&cm_id_priv->refcount)==0);
+		if (iwcm_deref_id(cm_id_priv)) {
+			if (isset(&cm_id_priv->flags,
+				IWCM_F_CALLBACK_DESTROY)) {
+				PANIC_IF(!TAILQ_EMPTY(&cm_id_priv->work_list));
+				free_cm_id(cm_id_priv);
+			}
+			return;
+		}
+		mtx_lock(&cm_id_priv->lock);
+	}
+	mtx_unlock(&cm_id_priv->lock);
+}
+
+/*
+ * This function is called on interrupt context. Schedule events on
+ * the iwcm_wq thread to allow callback functions to downcall into
+ * the CM and/or block.  Events are queued to a per-CM_ID
+ * work_list. If this is the first event on the work_list, the work
+ * element is also queued on the iwcm_wq thread.
+ *
+ * Each event holds a reference on the cm_id. Until the last posted
+ * event has been delivered and processed, the cm_id cannot be
+ * deleted.
+ *
+ * Returns:
+ * 	      0	- the event was handled.
+ *	ENOMEM	- the event was not handled due to lack of resources.
+ */
+static int cm_event_handler(struct iw_cm_id *cm_id,
+			     struct iw_cm_event *iw_event)
+{
+	struct iwcm_work *work;
+	struct iwcm_id_private *cm_id_priv;
+	int ret = 0;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+	mtx_lock(&cm_id_priv->lock);
+	work = get_work(cm_id_priv);
+	if (!work) {
+		ret = ENOMEM;
+		goto out;
+	}
+
+        TASK_INIT(&work->task, 0, cm_work_handler, work);
+	work->cm_id = cm_id_priv;
+	work->event = *iw_event;
+
+	if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST ||
+	     work->event.event == IW_CM_EVENT_CONNECT_REPLY) &&
+	    work->event.private_data_len) {
+		ret = copy_private_data(&work->event);
+		if (ret) {
+			put_work(work);
+			goto out;
+		}
+	}
+
+	atomic_add_acq_int(&cm_id_priv->refcount, 1);
+	if (TAILQ_EMPTY(&cm_id_priv->work_list)) {
+		TAILQ_INSERT_TAIL(&cm_id_priv->work_list, work, list);
+		taskqueue_enqueue(iwcm_wq, &work->task);
+	} else
+		TAILQ_INSERT_TAIL(&cm_id_priv->work_list, work, list);
+out:
+	mtx_unlock(&cm_id_priv->lock);
+	return ret;
+}
+
+static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
+				  struct ib_qp_attr *qp_attr,
+				  int *qp_attr_mask)
+{
+	int ret;
+
+	mtx_lock(&cm_id_priv->lock);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_IDLE:
+	case IW_CM_STATE_CONN_SENT:
+	case IW_CM_STATE_CONN_RECV:
+	case IW_CM_STATE_ESTABLISHED:
+		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
+		qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
+					   IB_ACCESS_REMOTE_WRITE|
+					   IB_ACCESS_REMOTE_READ;
+		ret = 0;
+		break;
+	default:
+		ret = EINVAL;
+		break;
+	}
+	mtx_unlock(&cm_id_priv->lock);
+	return ret;
+}
+
+static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
+				  struct ib_qp_attr *qp_attr,
+				  int *qp_attr_mask)
+{
+	int ret;
+
+	mtx_lock(&cm_id_priv->lock);
+	switch (cm_id_priv->state) {
+	case IW_CM_STATE_IDLE:
+	case IW_CM_STATE_CONN_SENT:
+	case IW_CM_STATE_CONN_RECV:
+	case IW_CM_STATE_ESTABLISHED:
+		*qp_attr_mask = 0;
+		ret = 0;
+		break;
+	default:
+		ret = EINVAL;
+		break;
+	}
+	mtx_unlock(&cm_id_priv->lock);
+	return ret;
+}
+
+int iw_cm_init_qp_attr(struct iw_cm_id *cm_id,
+		       struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask)
+{
+	struct iwcm_id_private *cm_id_priv;
+	int ret;
+
+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+	switch (qp_attr->qp_state) {
+	case IB_QPS_INIT:
+	case IB_QPS_RTR:
+		ret = iwcm_init_qp_init_attr(cm_id_priv,
+					     qp_attr, qp_attr_mask);
+		break;
+	case IB_QPS_RTS:
+		ret = iwcm_init_qp_rts_attr(cm_id_priv,
+					    qp_attr, qp_attr_mask);
+		break;
+	default:
+		ret = EINVAL;
+		break;
+	}
+	return ret;
+}
+
+static int iw_cm_init(void)
+{
+	iwcm_wq = taskqueue_create("iw_cm_wq", M_NOWAIT, taskqueue_thread_enqueue, &iwcm_wq);
+	if (!iwcm_wq)
+		return (ENOMEM);
+
+	taskqueue_start_threads(&iwcm_wq, 1, PI_NET, "iw_cm_wq thread");
+	return 0;
+}
+
+static void iw_cm_cleanup(void)
+{
+	taskqueue_free(iwcm_wq);
+}
+
+static int 
+iw_cm_load(module_t mod, int cmd, void *arg)
+{
+        int err = 0;
+
+        switch (cmd) {
+        case MOD_LOAD:
+                printf("Loading rdma_iwcm.\n");
+
+                iw_cm_init();
+                break;
+        case MOD_QUIESCE:
+                break;
+        case MOD_UNLOAD:
+                printf("Unloading rdma_iwcm.\n");
+		iw_cm_cleanup();
+                break;
+        case MOD_SHUTDOWN:
+                break;
+        default:
+                err = EOPNOTSUPP;
+                break;
+        }
+
+        return (err);
+}
+
+static moduledata_t mod_data = {
+	"rdma_iwcm",
+	iw_cm_load,
+	0
+};
+
+MODULE_VERSION(rdma_iwcm, 1);
+MODULE_DEPEND(rdma_iwcm, rdma_core, 1, 1, 1);
+DECLARE_MODULE(rdma_iwcm, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
diff --git a/sys/contrib/rdma/rdma_user_cm.h b/sys/contrib/rdma/rdma_user_cm.h
new file mode 100644
index 000000000000..0ffa4e5b3c24
--- /dev/null
+++ b/sys/contrib/rdma/rdma_user_cm.h
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef RDMA_USER_CM_H
+#define RDMA_USER_CM_H
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_user_sa.h>
+
+#define RDMA_USER_CM_ABI_VERSION	4
+
+#define RDMA_MAX_PRIVATE_DATA		256
+
+enum {
+	RDMA_USER_CM_CMD_CREATE_ID,
+	RDMA_USER_CM_CMD_DESTROY_ID,
+	RDMA_USER_CM_CMD_BIND_ADDR,
+	RDMA_USER_CM_CMD_RESOLVE_ADDR,
+	RDMA_USER_CM_CMD_RESOLVE_ROUTE,
+	RDMA_USER_CM_CMD_QUERY_ROUTE,
+	RDMA_USER_CM_CMD_CONNECT,
+	RDMA_USER_CM_CMD_LISTEN,
+	RDMA_USER_CM_CMD_ACCEPT,
+	RDMA_USER_CM_CMD_REJECT,
+	RDMA_USER_CM_CMD_DISCONNECT,
+	RDMA_USER_CM_CMD_INIT_QP_ATTR,
+	RDMA_USER_CM_CMD_GET_EVENT,
+	RDMA_USER_CM_CMD_GET_OPTION,
+	RDMA_USER_CM_CMD_SET_OPTION,
+	RDMA_USER_CM_CMD_NOTIFY,
+	RDMA_USER_CM_CMD_JOIN_MCAST,
+	RDMA_USER_CM_CMD_LEAVE_MCAST
+};
+
+/*
+ * command ABI structures.
+ */
+struct rdma_ucm_cmd_hdr {
+	__u32 cmd;
+	__u16 in;
+	__u16 out;
+};
+
+struct rdma_ucm_create_id {
+	__u64 uid;
+	__u64 response;
+	__u16 ps;
+	__u8  reserved[6];
+};
+
+struct rdma_ucm_create_id_resp {
+	__u32 id;
+};
+
+struct rdma_ucm_destroy_id {
+	__u64 response;
+	__u32 id;
+	__u32 reserved;
+};
+
+struct rdma_ucm_destroy_id_resp {
+	__u32 events_reported;
+};
+
+struct rdma_ucm_bind_addr {
+	__u64 response;
+	struct sockaddr_in6 addr;
+	__u32 id;
+};
+
+struct rdma_ucm_resolve_addr {
+	struct sockaddr_in6 src_addr;
+	struct sockaddr_in6 dst_addr;
+	__u32 id;
+	__u32 timeout_ms;
+};
+
+struct rdma_ucm_resolve_route {
+	__u32 id;
+	__u32 timeout_ms;
+};
+
+struct rdma_ucm_query_route {
+	__u64 response;
+	__u32 id;
+	__u32 reserved;
+};
+
+struct rdma_ucm_query_route_resp {
+	__u64 node_guid;
+	struct ib_user_path_rec ib_route[2];
+	struct sockaddr_in6 src_addr;
+	struct sockaddr_in6 dst_addr;
+	__u32 num_paths;
+	__u8 port_num;
+	__u8 reserved[3];
+};
+
+struct rdma_ucm_conn_param {
+	__u32 qp_num;
+	__u32 reserved;
+	__u8  private_data[RDMA_MAX_PRIVATE_DATA];
+	__u8  private_data_len;
+	__u8  srq;
+	__u8  responder_resources;
+	__u8  initiator_depth;
+	__u8  flow_control;
+	__u8  retry_count;
+	__u8  rnr_retry_count;
+	__u8  valid;
+};
+
+struct rdma_ucm_ud_param {
+	__u32 qp_num;
+	__u32 qkey;
+	struct ib_uverbs_ah_attr ah_attr;
+	__u8  private_data[RDMA_MAX_PRIVATE_DATA];
+	__u8  private_data_len;
+	__u8  reserved[7];
+};
+
+struct rdma_ucm_connect {
+	struct rdma_ucm_conn_param conn_param;
+	__u32 id;
+	__u32 reserved;
+};
+
+struct rdma_ucm_listen {
+	__u32 id;
+	__u32 backlog;
+};
+
+struct rdma_ucm_accept {
+	__u64 uid;
+	struct rdma_ucm_conn_param conn_param;
+	__u32 id;
+	__u32 reserved;
+};
+
+struct rdma_ucm_reject {
+	__u32 id;
+	__u8  private_data_len;
+	__u8  reserved[3];
+	__u8  private_data[RDMA_MAX_PRIVATE_DATA];
+};
+
+struct rdma_ucm_disconnect {
+	__u32 id;
+};
+
+struct rdma_ucm_init_qp_attr {
+	__u64 response;
+	__u32 id;
+	__u32 qp_state;
+};
+
+struct rdma_ucm_notify {
+	__u32 id;
+	__u32 event;
+};
+
+struct rdma_ucm_join_mcast {
+	__u64 response;		/* rdma_ucm_create_id_resp */
+	__u64 uid;
+	struct sockaddr_in6 addr;
+	__u32 id;
+};
+
+struct rdma_ucm_get_event {
+	__u64 response;
+};
+
+struct rdma_ucm_event_resp {
+	__u64 uid;
+	__u32 id;
+	__u32 event;
+	__u32 status;
+	union {
+		struct rdma_ucm_conn_param conn;
+		struct rdma_ucm_ud_param   ud;
+	} param;
+};
+
+#endif /* RDMA_USER_CM_H */
diff --git a/sys/contrib/rdma/rdma_verbs.c b/sys/contrib/rdma/rdma_verbs.c
new file mode 100644
index 000000000000..93821074b3c1
--- /dev/null
+++ b/sys/contrib/rdma/rdma_verbs.c
@@ -0,0 +1,822 @@
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
+ * Copyright (c) 2004 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: verbs.c 1349 2004-12-16 21:09:43Z roland $
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/libkern.h>
+#include <sys/module.h>
+#include <sys/endian.h>
+
+#include <contrib/rdma/ib_verbs.h>
+#include <contrib/rdma/ib_cache.h>
+
+int ib_rate_to_mult(enum ib_rate rate)
+{
+	switch (rate) {
+	case IB_RATE_2_5_GBPS: return  1;
+	case IB_RATE_5_GBPS:   return  2;
+	case IB_RATE_10_GBPS:  return  4;
+	case IB_RATE_20_GBPS:  return  8;
+	case IB_RATE_30_GBPS:  return 12;
+	case IB_RATE_40_GBPS:  return 16;
+	case IB_RATE_60_GBPS:  return 24;
+	case IB_RATE_80_GBPS:  return 32;
+	case IB_RATE_120_GBPS: return 48;
+	default:	       return -1;
+	}
+}
+
+enum ib_rate mult_to_ib_rate(int mult)
+{
+	switch (mult) {
+	case 1:  return IB_RATE_2_5_GBPS;
+	case 2:  return IB_RATE_5_GBPS;
+	case 4:  return IB_RATE_10_GBPS;
+	case 8:  return IB_RATE_20_GBPS;
+	case 12: return IB_RATE_30_GBPS;
+	case 16: return IB_RATE_40_GBPS;
+	case 24: return IB_RATE_60_GBPS;
+	case 32: return IB_RATE_80_GBPS;
+	case 48: return IB_RATE_120_GBPS;
+	default: return IB_RATE_PORT_CURRENT;
+	}
+}
+
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type)
+{
+	switch (node_type) {
+	case RDMA_NODE_IB_CA:
+	case RDMA_NODE_IB_SWITCH:
+	case RDMA_NODE_IB_ROUTER:
+		return RDMA_TRANSPORT_IB;
+	case RDMA_NODE_RNIC:
+		return RDMA_TRANSPORT_IWARP;
+	default:
+		panic("bad condition");
+		return 0;
+	}
+}
+
+/* Protection domains */
+
+struct ib_pd *ib_alloc_pd(struct ib_device *device)
+{
+	struct ib_pd *pd;
+
+	pd = device->alloc_pd(device, NULL, NULL);
+
+	if (!IS_ERR(pd)) {
+		pd->device  = device;
+		pd->uobject = NULL;
+		atomic_store_rel_int(&pd->usecnt, 0);
+	}
+
+	return pd;
+}
+
+int ib_dealloc_pd(struct ib_pd *pd)
+{
+	if (atomic_load_acq_int(&pd->usecnt))
+		return (EBUSY);
+
+	return pd->device->dealloc_pd(pd);
+}
+
+/* Address handles */
+
+struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+	struct ib_ah *ah;
+
+	ah = pd->device->create_ah(pd, ah_attr);
+
+	if (!IS_ERR(ah)) {
+		ah->device  = pd->device;
+		ah->pd      = pd;
+		ah->uobject = NULL;
+		atomic_add_acq_int(&pd->usecnt, 1);
+	}
+
+	return ah;
+}
+
+int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
+		       struct ib_grh *grh, struct ib_ah_attr *ah_attr)
+{
+	u32 flow_class;
+	u16 gid_index;
+	int ret;
+
+	memset(ah_attr, 0, sizeof *ah_attr);
+	ah_attr->dlid = wc->slid;
+	ah_attr->sl = wc->sl;
+	ah_attr->src_path_bits = wc->dlid_path_bits;
+	ah_attr->port_num = port_num;
+
+	if (wc->wc_flags & IB_WC_GRH) {
+		ah_attr->ah_flags = IB_AH_GRH;
+		ah_attr->grh.dgid = grh->sgid;
+
+		ret = ib_find_cached_gid(device, &grh->dgid, &port_num,
+					 &gid_index);
+		if (ret)
+			return ret;
+
+		ah_attr->grh.sgid_index = (u8) gid_index;
+		flow_class = be32toh(grh->version_tclass_flow);
+		ah_attr->grh.flow_label = flow_class & 0xFFFFF;
+		ah_attr->grh.hop_limit = 0xFF;
+		ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
+	}
+	return 0;
+}
+
+struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
+				   struct ib_grh *grh, u8 port_num)
+{
+	struct ib_ah_attr ah_attr;
+	int ret;
+
+	ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return ib_create_ah(pd, &ah_attr);
+}
+
+int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+	return ah->device->modify_ah ?
+		ah->device->modify_ah(ah, ah_attr) :
+		ENOSYS;
+}
+
+int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+{
+	return ah->device->query_ah ?
+		ah->device->query_ah(ah, ah_attr) :
+		ENOSYS;
+}
+
+int ib_destroy_ah(struct ib_ah *ah)
+{
+	struct ib_pd *pd;
+	int ret;
+
+	pd = ah->pd;
+	ret = ah->device->destroy_ah(ah);
+	if (!ret)
+		atomic_subtract_acq_int(&pd->usecnt, 1);
+
+	return ret;
+}
+
+/* Shared receive queues */
+
+struct ib_srq *ib_create_srq(struct ib_pd *pd,
+			     struct ib_srq_init_attr *srq_init_attr)
+{
+	struct ib_srq *srq;
+
+	if (!pd->device->create_srq)
+		return ERR_PTR(ENOSYS);
+
+	srq = pd->device->create_srq(pd, srq_init_attr, NULL);
+
+	if (!IS_ERR(srq)) {
+		srq->device    	   = pd->device;
+		srq->pd        	   = pd;
+		srq->uobject       = NULL;
+		srq->event_handler = srq_init_attr->event_handler;
+		srq->srq_context   = srq_init_attr->srq_context;
+		atomic_add_acq_int(&pd->usecnt, 1);
+		atomic_store_rel_int(&srq->usecnt, 0);
+	}
+
+	return srq;
+}
+
+int ib_modify_srq(struct ib_srq *srq,
+		  struct ib_srq_attr *srq_attr,
+		  enum ib_srq_attr_mask srq_attr_mask)
+{
+	return srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL);
+}
+
+int ib_query_srq(struct ib_srq *srq,
+		 struct ib_srq_attr *srq_attr)
+{
+	return srq->device->query_srq ?
+		srq->device->query_srq(srq, srq_attr) : ENOSYS;
+}
+
+int ib_destroy_srq(struct ib_srq *srq)
+{
+	struct ib_pd *pd;
+	int ret;
+
+	if (atomic_load_acq_int(&srq->usecnt))
+		return (EBUSY);
+
+	pd = srq->pd;
+
+	ret = srq->device->destroy_srq(srq);
+	if (!ret)
+		atomic_subtract_acq_int(&pd->usecnt, 1);
+
+	return ret;
+}
+
+/* Queue pairs */
+
+struct ib_qp *ib_create_qp(struct ib_pd *pd,
+			   struct ib_qp_init_attr *qp_init_attr)
+{
+	struct ib_qp *qp;
+
+	qp = pd->device->create_qp(pd, qp_init_attr, NULL);
+
+	if (!IS_ERR(qp)) {
+		qp->device     	  = pd->device;
+		qp->pd         	  = pd;
+		qp->send_cq    	  = qp_init_attr->send_cq;
+		qp->recv_cq    	  = qp_init_attr->recv_cq;
+		qp->srq	       	  = qp_init_attr->srq;
+		qp->uobject       = NULL;
+		qp->event_handler = qp_init_attr->event_handler;
+		qp->qp_context    = qp_init_attr->qp_context;
+		qp->qp_type	  = qp_init_attr->qp_type;
+		atomic_add_acq_int(&pd->usecnt, 1);
+		atomic_add_acq_int(&qp_init_attr->send_cq->usecnt, 1);
+		atomic_add_acq_int(&qp_init_attr->recv_cq->usecnt, 1);
+		if (qp_init_attr->srq)
+			atomic_add_acq_int(&qp_init_attr->srq->usecnt, 1);
+	}
+
+	return qp;
+}
+
+static const struct {
+	int			valid;
+	enum ib_qp_attr_mask	req_param[IB_QPT_RAW_ETY + 1];
+	enum ib_qp_attr_mask	opt_param[IB_QPT_RAW_ETY + 1];
+} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
+	[IB_QPS_RESET] = {
+		[IB_QPS_RESET] = { .valid = 1 },
+		[IB_QPS_ERR]   = { .valid = 1 },
+		[IB_QPS_INIT]  = {
+			.valid = 1,
+			.req_param = {
+				[IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_QKEY),
+				[IB_QPT_UC]  = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_ACCESS_FLAGS),
+				[IB_QPT_RC]  = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_ACCESS_FLAGS),
+				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
+						IB_QP_QKEY),
+				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
+						IB_QP_QKEY),
+			}
+		},
+	},
+	[IB_QPS_INIT]  = {
+		[IB_QPS_RESET] = { .valid = 1 },
+		[IB_QPS_ERR] =   { .valid = 1 },
+		[IB_QPS_INIT]  = {
+			.valid = 1,
+			.opt_param = {
+				[IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_QKEY),
+				[IB_QPT_UC]  = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_ACCESS_FLAGS),
+				[IB_QPT_RC]  = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_ACCESS_FLAGS),
+				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
+						IB_QP_QKEY),
+				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
+						IB_QP_QKEY),
+			}
+		},
+		[IB_QPS_RTR]   = {
+			.valid = 1,
+			.req_param = {
+				[IB_QPT_UC]  = (IB_QP_AV			|
+						IB_QP_PATH_MTU			|
+						IB_QP_DEST_QPN			|
+						IB_QP_RQ_PSN),
+				[IB_QPT_RC]  = (IB_QP_AV			|
+						IB_QP_PATH_MTU			|
+						IB_QP_DEST_QPN			|
+						IB_QP_RQ_PSN			|
+						IB_QP_MAX_DEST_RD_ATOMIC	|
+						IB_QP_MIN_RNR_TIMER),
+			},
+			.opt_param = {
+				 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
+						 IB_QP_QKEY),
+				 [IB_QPT_UC]  = (IB_QP_ALT_PATH			|
+						 IB_QP_ACCESS_FLAGS		|
+						 IB_QP_PKEY_INDEX),
+				 [IB_QPT_RC]  = (IB_QP_ALT_PATH			|
+						 IB_QP_ACCESS_FLAGS		|
+						 IB_QP_PKEY_INDEX),
+				 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
+						 IB_QP_QKEY),
+				 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
+						 IB_QP_QKEY),
+			 }
+		}
+	},
+	[IB_QPS_RTR]   = {
+		[IB_QPS_RESET] = { .valid = 1 },
+		[IB_QPS_ERR] =   { .valid = 1 },
+		[IB_QPS_RTS]   = {
+			.valid = 1,
+			.req_param = {
+				[IB_QPT_UD]  = IB_QP_SQ_PSN,
+				[IB_QPT_UC]  = IB_QP_SQ_PSN,
+				[IB_QPT_RC]  = (IB_QP_TIMEOUT			|
+						IB_QP_RETRY_CNT			|
+						IB_QP_RNR_RETRY			|
+						IB_QP_SQ_PSN			|
+						IB_QP_MAX_QP_RD_ATOMIC),
+				[IB_QPT_SMI] = IB_QP_SQ_PSN,
+				[IB_QPT_GSI] = IB_QP_SQ_PSN,
+			},
+			.opt_param = {
+				 [IB_QPT_UD]  = (IB_QP_CUR_STATE		|
+						 IB_QP_QKEY),
+				 [IB_QPT_UC]  = (IB_QP_CUR_STATE		|
+						 IB_QP_ALT_PATH			|
+						 IB_QP_ACCESS_FLAGS		|
+						 IB_QP_PATH_MIG_STATE),
+				 [IB_QPT_RC]  = (IB_QP_CUR_STATE		|
+						 IB_QP_ALT_PATH			|
+						 IB_QP_ACCESS_FLAGS		|
+						 IB_QP_MIN_RNR_TIMER		|
+						 IB_QP_PATH_MIG_STATE),
+				 [IB_QPT_SMI] = (IB_QP_CUR_STATE		|
+						 IB_QP_QKEY),
+				 [IB_QPT_GSI] = (IB_QP_CUR_STATE		|
+						 IB_QP_QKEY),
+			 }
+		}
+	},
+	[IB_QPS_RTS]   = {
+		[IB_QPS_RESET] = { .valid = 1 },
+		[IB_QPS_ERR] =   { .valid = 1 },
+		[IB_QPS_RTS]   = {
+			.valid = 1,
+			.opt_param = {
+				[IB_QPT_UD]  = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+				[IB_QPT_UC]  = (IB_QP_CUR_STATE			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_ALT_PATH			|
+						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_RC]  = (IB_QP_CUR_STATE			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_ALT_PATH			|
+						IB_QP_PATH_MIG_STATE		|
+						IB_QP_MIN_RNR_TIMER),
+				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+			}
+		},
+		[IB_QPS_SQD]   = {
+			.valid = 1,
+			.opt_param = {
+				[IB_QPT_UD]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
+				[IB_QPT_UC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
+				[IB_QPT_RC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
+				[IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+				[IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
+			}
+		},
+	},
+	[IB_QPS_SQD]   = {
+		[IB_QPS_RESET] = { .valid = 1 },
+		[IB_QPS_ERR] =   { .valid = 1 },
+		[IB_QPS_RTS]   = {
+			.valid = 1,
+			.opt_param = {
+				[IB_QPT_UD]  = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+				[IB_QPT_UC]  = (IB_QP_CUR_STATE			|
+						IB_QP_ALT_PATH			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_RC]  = (IB_QP_CUR_STATE			|
+						IB_QP_ALT_PATH			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_MIN_RNR_TIMER		|
+						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+			}
+		},
+		[IB_QPS_SQD]   = {
+			.valid = 1,
+			.opt_param = {
+				[IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
+						IB_QP_QKEY),
+				[IB_QPT_UC]  = (IB_QP_AV			|
+						IB_QP_ALT_PATH			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_PKEY_INDEX		|
+						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_RC]  = (IB_QP_PORT			|
+						IB_QP_AV			|
+						IB_QP_TIMEOUT			|
+						IB_QP_RETRY_CNT			|
+						IB_QP_RNR_RETRY			|
+						IB_QP_MAX_QP_RD_ATOMIC		|
+						IB_QP_MAX_DEST_RD_ATOMIC	|
+						IB_QP_ALT_PATH			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_PKEY_INDEX		|
+						IB_QP_MIN_RNR_TIMER		|
+						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
+						IB_QP_QKEY),
+				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
+						IB_QP_QKEY),
+			}
+		}
+	},
+	[IB_QPS_SQE]   = {
+		[IB_QPS_RESET] = { .valid = 1 },
+		[IB_QPS_ERR] =   { .valid = 1 },
+		[IB_QPS_RTS]   = {
+			.valid = 1,
+			.opt_param = {
+				[IB_QPT_UD]  = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+				[IB_QPT_UC]  = (IB_QP_CUR_STATE			|
+						IB_QP_ACCESS_FLAGS),
+				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
+						IB_QP_QKEY),
+			}
+		}
+	},
+	[IB_QPS_ERR] = {
+		[IB_QPS_RESET] = { .valid = 1 },
+		[IB_QPS_ERR] =   { .valid = 1 }
+	}
+};
+
+int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
+		       enum ib_qp_type type, enum ib_qp_attr_mask mask)
+{
+	enum ib_qp_attr_mask req_param, opt_param;
+
+	if (cur_state  < 0 || cur_state  > IB_QPS_ERR ||
+	    next_state < 0 || next_state > IB_QPS_ERR)
+		return 0;
+
+	if (mask & IB_QP_CUR_STATE  &&
+	    cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
+	    cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
+		return 0;
+
+	if (!qp_state_table[cur_state][next_state].valid)
+		return 0;
+
+	req_param = qp_state_table[cur_state][next_state].req_param[type];
+	opt_param = qp_state_table[cur_state][next_state].opt_param[type];
+
+	if ((mask & req_param) != req_param)
+		return 0;
+
+	if (mask & ~(req_param | opt_param | IB_QP_STATE))
+		return 0;
+
+	return 1;
+}
+
+int ib_modify_qp(struct ib_qp *qp,
+		 struct ib_qp_attr *qp_attr,
+		 int qp_attr_mask)
+{
+	return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
+}
+
+int ib_query_qp(struct ib_qp *qp,
+		struct ib_qp_attr *qp_attr,
+		int qp_attr_mask,
+		struct ib_qp_init_attr *qp_init_attr)
+{
+	return qp->device->query_qp ?
+		qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) :
+		ENOSYS;
+}
+
+int ib_destroy_qp(struct ib_qp *qp)
+{
+	struct ib_pd *pd;
+	struct ib_cq *scq, *rcq;
+	struct ib_srq *srq;
+	int ret;
+
+	pd  = qp->pd;
+	scq = qp->send_cq;
+	rcq = qp->recv_cq;
+	srq = qp->srq;
+
+	ret = qp->device->destroy_qp(qp);
+	if (!ret) {
+		atomic_subtract_acq_int(&pd->usecnt, 1);
+		atomic_subtract_acq_int(&scq->usecnt, 1);
+		atomic_subtract_acq_int(&rcq->usecnt, 1);
+		if (srq)
+			atomic_subtract_acq_int(&srq->usecnt, 1);
+	}
+
+	return ret;
+}
+
+/* Completion queues */
+
+struct ib_cq *ib_create_cq(struct ib_device *device,
+			   ib_comp_handler comp_handler,
+			   void (*event_handler)(struct ib_event *, void *),
+			   void *cq_context, int cqe, int comp_vector)
+{
+	struct ib_cq *cq;
+
+	cq = device->create_cq(device, cqe, comp_vector, NULL, NULL);
+
+	if (!IS_ERR(cq)) {
+		cq->device        = device;
+		cq->uobject       = NULL;
+		cq->comp_handler  = comp_handler;
+		cq->event_handler = event_handler;
+		cq->cq_context    = cq_context;
+		atomic_store_rel_int(&cq->usecnt, 0);
+	}
+
+	return cq;
+}
+
+int ib_destroy_cq(struct ib_cq *cq)
+{
+	if (atomic_load_acq_int(&cq->usecnt))
+		return (EBUSY);
+
+	return cq->device->destroy_cq(cq);
+}
+
+int ib_resize_cq(struct ib_cq *cq, int cqe)
+{
+	return cq->device->resize_cq ?
+		cq->device->resize_cq(cq, cqe, NULL) : ENOSYS;
+}
+
+/* Memory regions */
+
+struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
+{
+	struct ib_mr *mr;
+
+	mr = pd->device->get_dma_mr(pd, mr_access_flags);
+
+	if (!IS_ERR(mr)) {
+		mr->device  = pd->device;
+		mr->pd      = pd;
+		mr->uobject = NULL;
+		atomic_add_acq_int(&pd->usecnt, 1);
+		atomic_store_rel_int(&mr->usecnt, 0);
+	}
+
+	return mr;
+}
+
+struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
+			     struct ib_phys_buf *phys_buf_array,
+			     int num_phys_buf,
+			     int mr_access_flags,
+			     u64 *iova_start)
+{
+	struct ib_mr *mr;
+
+	mr = pd->device->reg_phys_mr(pd, phys_buf_array, num_phys_buf,
+				     mr_access_flags, iova_start);
+
+	if (!IS_ERR(mr)) {
+		mr->device  = pd->device;
+		mr->pd      = pd;
+		mr->uobject = NULL;
+		atomic_add_acq_int(&pd->usecnt, 1);
+		atomic_store_rel_int(&mr->usecnt, 0);
+	}
+
+	return mr;
+}
+
+int ib_rereg_phys_mr(struct ib_mr *mr,
+		     int mr_rereg_mask,
+		     struct ib_pd *pd,
+		     struct ib_phys_buf *phys_buf_array,
+		     int num_phys_buf,
+		     int mr_access_flags,
+		     u64 *iova_start)
+{
+	struct ib_pd *old_pd;
+	int ret;
+
+	if (!mr->device->rereg_phys_mr)
+		return (ENOSYS);
+
+	if (atomic_load_acq_int(&mr->usecnt))
+		return (EBUSY);
+
+	old_pd = mr->pd;
+
+	ret = mr->device->rereg_phys_mr(mr, mr_rereg_mask, pd,
+					phys_buf_array, num_phys_buf,
+					mr_access_flags, iova_start);
+
+	if (!ret && (mr_rereg_mask & IB_MR_REREG_PD)) {
+		atomic_subtract_acq_int(&old_pd->usecnt, 1);
+		atomic_add_acq_int(&pd->usecnt, 1);
+	}
+
+	return ret;
+}
+
+int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
+{
+	return mr->device->query_mr ?
+		mr->device->query_mr(mr, mr_attr) : ENOSYS;
+}
+
+int ib_dereg_mr(struct ib_mr *mr)
+{
+	struct ib_pd *pd;
+	int ret;
+
+	if (atomic_load_acq_int(&mr->usecnt))
+		return (EBUSY);
+
+	pd = mr->pd;
+	ret = mr->device->dereg_mr(mr);
+	if (!ret)
+		atomic_subtract_acq_int(&pd->usecnt, 1);
+
+	return ret;
+}
+
+/* Memory windows */
+
+struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
+{
+	struct ib_mw *mw;
+
+	if (!pd->device->alloc_mw)
+		return ERR_PTR(ENOSYS);
+
+	mw = pd->device->alloc_mw(pd);
+	if (!IS_ERR(mw)) {
+		mw->device  = pd->device;
+		mw->pd      = pd;
+		mw->uobject = NULL;
+		atomic_add_acq_int(&pd->usecnt, 1);
+	}
+
+	return mw;
+}
+
+int ib_dealloc_mw(struct ib_mw *mw)
+{
+	struct ib_pd *pd;
+	int ret;
+
+	pd = mw->pd;
+	ret = mw->device->dealloc_mw(mw);
+	if (!ret)
+		atomic_subtract_acq_int(&pd->usecnt, 1);
+
+	return ret;
+}
+
+/* "Fast" memory regions */
+
+struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
+			    int mr_access_flags,
+			    struct ib_fmr_attr *fmr_attr)
+{
+	struct ib_fmr *fmr;
+
+	if (!pd->device->alloc_fmr)
+		return ERR_PTR(ENOSYS);
+
+	fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr);
+	if (!IS_ERR(fmr)) {
+		fmr->device = pd->device;
+		fmr->pd     = pd;
+		atomic_add_acq_int(&pd->usecnt, 1);
+	}
+
+	return fmr;
+}
+
+int ib_unmap_fmr(struct ib_fmr_list_head *fmr_list)
+{
+	struct ib_fmr *fmr;
+
+	if (TAILQ_EMPTY(fmr_list))
+		return 0;
+
+	fmr = TAILQ_FIRST(fmr_list);
+	return fmr->device->unmap_fmr(fmr_list);
+}
+
+int ib_dealloc_fmr(struct ib_fmr *fmr)
+{
+	struct ib_pd *pd;
+	int ret;
+
+	pd = fmr->pd;
+	ret = fmr->device->dealloc_fmr(fmr);
+	if (!ret)
+		atomic_subtract_acq_int(&pd->usecnt, 1);
+
+	return ret;
+}
+
+/* Multicast groups */
+
+int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
+{
+	if (!qp->device->attach_mcast)
+		return (ENOSYS);
+	if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+		return (EINVAL);
+
+	return qp->device->attach_mcast(qp, gid, lid);
+}
+
+int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
+{
+	if (!qp->device->detach_mcast)
+		return (ENOSYS);
+	if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+		return (EINVAL);
+
+	return qp->device->detach_mcast(qp, gid, lid);
+}
diff --git a/sys/contrib/rdma/types.h b/sys/contrib/rdma/types.h
new file mode 100644
index 000000000000..33a1a62bd905
--- /dev/null
+++ b/sys/contrib/rdma/types.h
@@ -0,0 +1,121 @@
+/*
+ * $FreeBSD$
+ */
+#ifndef	__RDMA_TYPES_H_
+#define	__RDMA_TYPES_H_
+#include <sys/types.h>
+#include <sys/malloc.h>
+
+
+typedef uint8_t 	u8;
+typedef uint16_t 	u16;
+typedef uint32_t 	u32;
+typedef uint64_t 	u64;
+ 
+typedef uint8_t		__u8;
+typedef uint16_t	__u16;
+typedef uint32_t	__u32;
+typedef uint64_t	__u64;
+typedef uint8_t		__be8;
+typedef uint16_t	__be16;
+typedef uint32_t	__be32;
+typedef uint64_t	__be64;
+
+typedef	int32_t		__s32;
+
+
+#define LINUX_TYPES_DEFINED
+#define ERR_PTR(err) ((void *)((long)(err)))
+#define IS_ERR(ptr)  ((unsigned long)(ptr) > (unsigned long)(-1000))
+#define PTR_ERR(ptr)    ((long)(ptr))
+
+#define PANIC_IF(exp) do {                  \
+	if (exp)                            \
+		panic("BUG func %s line %u: %s", __FUNCTION__, __LINE__, #exp);      \
+} while (0)
+
+#define container_of(p, stype, field) ((stype *)(((uint8_t *)(p)) - offsetof(stype, field)))
+
+static __inline int
+find_first_zero_bit(volatile void *p, int max)
+{
+        int b;
+        volatile int *ptr = (volatile int *)p;
+
+        for (b = 0; b < max; b += 32) {
+                if (ptr[b >> 5] != ~0) {
+                        for (;;) {
+                                if ((ptr[b >> 5] & (1 << (b & 0x1f))) == 0)
+                                        return (b);
+                                b++;
+                        }
+                }
+        }
+
+        return (max);
+}
+
+struct kvl {
+        struct kvl *next;
+        unsigned int key;
+        void *value;
+};
+
+#define DEFINE_KVL(x) struct kvl x;
+
+static __inline void *
+kvl_lookup(struct kvl *x, uint32_t key)
+{
+        struct kvl *i;
+        for (i=x->next;i;i=i->next) if (i->key==key) return(i->value);
+        return(0);
+}
+
+static __inline int
+kvl_alloc_above(struct kvl *idp, void *ptr, int starting_id, int *id)
+{
+	int newid = starting_id;
+	struct kvl *i;
+
+        for (i=idp->next;i;i=i->next) 
+		if (i->key == newid)
+			return -EEXIST;
+
+        i=malloc(sizeof(struct kvl),M_TEMP,M_NOWAIT);
+        i->key=newid;
+        i->value=ptr;
+        i->next=idp->next;
+        idp->next=i;
+	*id = newid;
+        return(0);
+}
+
+static __inline void
+kvl_delete(struct kvl *idp, int id) 
+{
+        /* leak */
+        struct kvl *i, *prev=NULL;
+        for (i=idp->next;i;prev=i,i=i->next) 
+                if ((i)->key==id) {
+			if (!prev)
+				idp->next = i->next;
+			else
+				prev->next = i->next;
+			free(i, M_TEMP);
+                        return;
+                }
+}
+
+static __inline void
+kvl_free(struct kvl *idp)
+{
+        struct kvl *i, *tmp;
+        for (i=idp->next;i;i=tmp) {
+		tmp=i->next;
+		free(i, M_TEMP);
+	}
+	idp->next = NULL;
+}
+
+
+#endif
author	Kip Macy <kmacy@FreeBSD.org>	2008-05-05 18:35:55 +0000
committer	Kip Macy <kmacy@FreeBSD.org>	2008-05-05 18:35:55 +0000
commit	e68ff398875b17e19f8a55cd0f2d3d502c45d821 (patch)
tree	5b86b050e1ca019fffc7e10f98517cb5f92d4686
parent	143b946188bf3f298f12bee29e7adcbca3215f33 (diff)
download	src-test2-e68ff398875b17e19f8a55cd0f2d3d502c45d821.tar.gz src-test2-e68ff398875b17e19f8a55cd0f2d3d502c45d821.zip