diff options
author | Edward Tomasz Napierala <trasz@FreeBSD.org> | 2013-09-14 15:29:06 +0000 |
---|---|---|
committer | Edward Tomasz Napierala <trasz@FreeBSD.org> | 2013-09-14 15:29:06 +0000 |
commit | 009ea47eb2d21856af4529aaaca32cd67748daea (patch) | |
tree | d04af1389a0e20c7613b9dccaf4f3176084e40cc /sys | |
parent | 196beb53597c7f328bf31034397007e494ce7421 (diff) | |
download | src-test2-009ea47eb2d21856af4529aaaca32cd67748daea.tar.gz src-test2-009ea47eb2d21856af4529aaaca32cd67748daea.zip |
Notes
Diffstat (limited to 'sys')
-rw-r--r-- | sys/cam/ctl/ctl.c | 22 | ||||
-rw-r--r-- | sys/cam/ctl/ctl_frontend_iscsi.c | 2638 | ||||
-rw-r--r-- | sys/cam/ctl/ctl_frontend_iscsi.h | 112 | ||||
-rw-r--r-- | sys/cam/ctl/ctl_ioctl.h | 169 | ||||
-rw-r--r-- | sys/conf/files | 4 | ||||
-rw-r--r-- | sys/dev/iscsi/icl.c | 1292 | ||||
-rw-r--r-- | sys/dev/iscsi/icl.h | 151 | ||||
-rw-r--r-- | sys/dev/iscsi/icl_proxy.c | 397 | ||||
-rw-r--r-- | sys/dev/iscsi/iscsi.c | 2109 | ||||
-rw-r--r-- | sys/dev/iscsi/iscsi.h | 135 | ||||
-rw-r--r-- | sys/dev/iscsi/iscsi_ioctl.h | 201 | ||||
-rw-r--r-- | sys/dev/iscsi/iscsi_proto.h | 439 | ||||
-rw-r--r-- | sys/dev/iscsi_initiator/iscsi.c | 6 | ||||
-rw-r--r-- | sys/modules/Makefile | 1 | ||||
-rw-r--r-- | sys/modules/ctl/Makefile | 3 | ||||
-rw-r--r-- | sys/modules/iscsi/Makefile | 24 |
16 files changed, 7698 insertions, 5 deletions
diff --git a/sys/cam/ctl/ctl.c b/sys/cam/ctl/ctl.c index 215f1cd1074c..853a36de47d9 100644 --- a/sys/cam/ctl/ctl.c +++ b/sys/cam/ctl/ctl.c @@ -3148,6 +3148,28 @@ ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, sbuf_delete(sb); break; } + case CTL_ISCSI: { + struct ctl_iscsi *ci; + struct ctl_frontend *fe; + + ci = (struct ctl_iscsi *)addr; + + mtx_lock(&softc->ctl_lock); + STAILQ_FOREACH(fe, &softc->fe_list, links) { + if (strcmp(fe->port_name, "iscsi") == 0) + break; + } + mtx_unlock(&softc->ctl_lock); + + if (fe == NULL) { + ci->status = CTL_ISCSI_ERROR; + snprintf(ci->error_str, sizeof(ci->error_str), "Backend \"iscsi\" not found."); + break; + } + + retval = fe->ioctl(dev, cmd, addr, flag, td); + break; + } default: { /* XXX KDM should we fix this? */ #if 0 diff --git a/sys/cam/ctl/ctl_frontend_iscsi.c b/sys/cam/ctl/ctl_frontend_iscsi.c new file mode 100644 index 000000000000..34e2ead1c5a0 --- /dev/null +++ b/sys/cam/ctl/ctl_frontend_iscsi.c @@ -0,0 +1,2638 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Edward Tomasz Napierala under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * CTL frontend for the iSCSI protocol. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/capability.h> +#include <sys/condvar.h> +#include <sys/file.h> +#include <sys/kernel.h> +#include <sys/kthread.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/queue.h> +#include <sys/sbuf.h> +#include <sys/sysctl.h> +#include <sys/systm.h> +#include <sys/uio.h> +#include <sys/unistd.h> +#include <vm/uma.h> + +#include <cam/scsi/scsi_all.h> +#include <cam/scsi/scsi_da.h> +#include <cam/ctl/ctl_io.h> +#include <cam/ctl/ctl.h> +#include <cam/ctl/ctl_backend.h> +#include <cam/ctl/ctl_frontend.h> +#include <cam/ctl/ctl_frontend_internal.h> +#include <cam/ctl/ctl_debug.h> +#include <cam/ctl/ctl_ha.h> +#include <cam/ctl/ctl_ioctl.h> +#include <cam/ctl/ctl_private.h> + +#include "../../dev/iscsi/icl.h" +#include "../../dev/iscsi/iscsi_proto.h" +#include "ctl_frontend_iscsi.h" + +#ifdef ICL_KERNEL_PROXY +#include <sys/socketvar.h> +#endif + +static MALLOC_DEFINE(M_CFISCSI, "cfiscsi", "Memory used for CTL iSCSI frontend"); +static uma_zone_t cfiscsi_data_wait_zone; + +SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, iscsi, CTLFLAG_RD, 0, + "CAM Target Layer iSCSI Frontend"); +static int debug = 3; +TUNABLE_INT("kern.cam.ctl.iscsi.debug", &debug); +SYSCTL_INT(_kern_cam_ctl_iscsi, OID_AUTO, debug, CTLFLAG_RW, + &debug, 1, "Enable debug messages"); +static int ping_timeout = 5; +TUNABLE_INT("kern.cam.ctl.iscsi.ping_timeout", &ping_timeout); +SYSCTL_INT(_kern_cam_ctl_iscsi, OID_AUTO, ping_timeout, CTLFLAG_RW, + &ping_timeout, 5, "Interval between ping (NOP-Out) requests, in seconds"); +static int login_timeout = 60; +TUNABLE_INT("kern.cam.ctl.iscsi.login_timeout", &login_timeout); +SYSCTL_INT(_kern_cam_ctl_iscsi, OID_AUTO, login_timeout, CTLFLAG_RW, + &login_timeout, 60, "Time to wait for ctld(8) to finish Login Phase, in seconds"); +static int maxcmdsn_delta = 256; +TUNABLE_INT("kern.cam.ctl.iscsi.maxcmdsn_delta", &maxcmdsn_delta); +SYSCTL_INT(_kern_cam_ctl_iscsi, OID_AUTO, maxcmdsn_delta, CTLFLAG_RW, + &maxcmdsn_delta, 256, "Number of commands the initiator can send " + "without confirmation"); + +#define CFISCSI_DEBUG(X, ...) \ + if (debug > 1) { \ + printf("%s: " X "\n", __func__, ## __VA_ARGS__);\ + } while (0) + +#define CFISCSI_WARN(X, ...) \ + if (debug > 0) { \ + printf("WARNING: %s: " X "\n", \ + __func__, ## __VA_ARGS__); \ + } while (0) + +#define CFISCSI_SESSION_DEBUG(S, X, ...) \ + if (debug > 1) { \ + printf("%s: %s (%s): " X "\n", \ + __func__, S->cs_initiator_addr, \ + S->cs_initiator_name, ## __VA_ARGS__); \ + } while (0) + +#define CFISCSI_SESSION_WARN(S, X, ...) \ + if (debug > 0) { \ + printf("WARNING: %s (%s): " X "\n", \ + S->cs_initiator_addr, \ + S->cs_initiator_name, ## __VA_ARGS__); \ + } while (0) + +#define CFISCSI_SESSION_LOCK(X) mtx_lock(&X->cs_lock) +#define CFISCSI_SESSION_UNLOCK(X) mtx_unlock(&X->cs_lock) +#define CFISCSI_SESSION_LOCK_ASSERT(X) mtx_assert(&X->cs_lock, MA_OWNED) + +#define CONN_SESSION(X) ((struct cfiscsi_session *)(X)->ic_prv0) +#define PDU_SESSION(X) CONN_SESSION((X)->ip_conn) +#define PDU_EXPDATASN(X) (X)->ip_prv0 +#define PDU_TOTAL_TRANSFER_LEN(X) (X)->ip_prv1 +#define PDU_R2TSN(X) (X)->ip_prv2 + +int cfiscsi_init(void); +static void cfiscsi_online(void *arg); +static void cfiscsi_offline(void *arg); +static int cfiscsi_targ_enable(void *arg, struct ctl_id targ_id); +static int cfiscsi_targ_disable(void *arg, struct ctl_id targ_id); +static int cfiscsi_lun_enable(void *arg, + struct ctl_id target_id, int lun_id); +static int cfiscsi_lun_disable(void *arg, + struct ctl_id target_id, int lun_id); +static int cfiscsi_ioctl(struct cdev *dev, + u_long cmd, caddr_t addr, int flag, struct thread *td); +static int cfiscsi_devid(struct ctl_scsiio *ctsio, int alloc_len); +static void cfiscsi_datamove(union ctl_io *io); +static void cfiscsi_done(union ctl_io *io); +static uint32_t cfiscsi_map_lun(void *arg, uint32_t lun); +static void cfiscsi_pdu_update_cmdsn(const struct icl_pdu *request); +static void cfiscsi_pdu_handle_nop_out(struct icl_pdu *request); +static void cfiscsi_pdu_handle_scsi_command(struct icl_pdu *request); +static void cfiscsi_pdu_handle_task_request(struct icl_pdu *request); +static void cfiscsi_pdu_handle_data_out(struct icl_pdu *request); +static void cfiscsi_pdu_handle_logout_request(struct icl_pdu *request); +static void cfiscsi_session_terminate(struct cfiscsi_session *cs); +static struct cfiscsi_target *cfiscsi_target_find(struct cfiscsi_softc + *softc, const char *name); +static void cfiscsi_target_release(struct cfiscsi_target *ct); +static void cfiscsi_session_delete(struct cfiscsi_session *cs); + +static struct cfiscsi_softc cfiscsi_softc; +extern struct ctl_softc *control_softc; + +static int cfiscsi_module_event_handler(module_t, int /*modeventtype_t*/, void *); + +static moduledata_t cfiscsi_moduledata = { + "ctlcfiscsi", + cfiscsi_module_event_handler, + NULL +}; + +DECLARE_MODULE(ctlcfiscsi, cfiscsi_moduledata, SI_SUB_CONFIGURE, SI_ORDER_FOURTH); +MODULE_VERSION(ctlcfiscsi, 1); +MODULE_DEPEND(ctlcfiscsi, ctl, 1, 1, 1); +MODULE_DEPEND(ctlcfiscsi, icl, 1, 1, 1); + +static struct icl_pdu * +cfiscsi_pdu_new_response(struct icl_pdu *request, int flags) +{ + + return (icl_pdu_new_bhs(request->ip_conn, flags)); +} + +static void +cfiscsi_pdu_update_cmdsn(const struct icl_pdu *request) +{ + const struct iscsi_bhs_scsi_command *bhssc; + struct cfiscsi_session *cs; + uint32_t cmdsn, expstatsn; + + cs = PDU_SESSION(request); + + /* + * Every incoming PDU - not just NOP-Out - resets the ping timer. + * The purpose of the timeout is to reset the connection when it stalls; + * we don't want this to happen when NOP-In or NOP-Out ends up delayed + * in some queue. + * + * XXX: Locking? + */ + cs->cs_timeout = 0; + + /* + * Data-Out PDUs don't contain CmdSN. + */ + if ((request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == + ISCSI_BHS_OPCODE_SCSI_DATA_OUT) + return; + + /* + * We're only using fields common for all the request + * (initiator -> target) PDUs. + */ + bhssc = (const struct iscsi_bhs_scsi_command *)request->ip_bhs; + cmdsn = ntohl(bhssc->bhssc_cmdsn); + expstatsn = ntohl(bhssc->bhssc_expstatsn); + + CFISCSI_SESSION_LOCK(cs); +#if 0 + if (expstatsn != cs->cs_statsn) { + CFISCSI_SESSION_DEBUG(cs, "received PDU with ExpStatSN %d, " + "while current StatSN is %d", expstatsn, + cs->cs_statsn); + } +#endif + + /* + * XXX: The target MUST silently ignore any non-immediate command + * outside of this range or non-immediate duplicates within + * the range. + */ + if (cmdsn != cs->cs_cmdsn) { + CFISCSI_SESSION_WARN(cs, "received PDU with CmdSN %d, " + "while expected CmdSN was %d", cmdsn, cs->cs_cmdsn); + cs->cs_cmdsn = cmdsn + 1; + CFISCSI_SESSION_UNLOCK(cs); + return; + } + + /* + * XXX: The CmdSN of the rejected command PDU (if it is a non-immediate + * command) MUST NOT be considered received by the target + * (i.e., a command sequence gap must be assumed for the CmdSN) + */ + + if ((request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0) + cs->cs_cmdsn++; + + CFISCSI_SESSION_UNLOCK(cs); +} + +static void +cfiscsi_pdu_handle(struct icl_pdu *request) +{ + struct cfiscsi_session *cs; + + cs = PDU_SESSION(request); + + cfiscsi_pdu_update_cmdsn(request); + + /* + * Handle the PDU; this includes e.g. receiving the remaining + * part of PDU and submitting the SCSI command to CTL + * or queueing a reply. The handling routine is responsible + * for freeing the PDU when it's no longer needed. + */ + switch (request->ip_bhs->bhs_opcode & + ~ISCSI_BHS_OPCODE_IMMEDIATE) { + case ISCSI_BHS_OPCODE_NOP_OUT: + cfiscsi_pdu_handle_nop_out(request); + break; + case ISCSI_BHS_OPCODE_SCSI_COMMAND: + cfiscsi_pdu_handle_scsi_command(request); + break; + case ISCSI_BHS_OPCODE_TASK_REQUEST: + cfiscsi_pdu_handle_task_request(request); + break; + case ISCSI_BHS_OPCODE_SCSI_DATA_OUT: + cfiscsi_pdu_handle_data_out(request); + break; + case ISCSI_BHS_OPCODE_LOGOUT_REQUEST: + cfiscsi_pdu_handle_logout_request(request); + break; + default: + CFISCSI_SESSION_WARN(cs, "received PDU with unsupported " + "opcode 0x%x; dropping connection", + request->ip_bhs->bhs_opcode); + cfiscsi_session_terminate(cs); + icl_pdu_free(request); + } + +} + +static void +cfiscsi_receive_callback(struct icl_pdu *request) +{ + struct cfiscsi_session *cs; + + cs = PDU_SESSION(request); + +#ifdef ICL_KERNEL_PROXY + if (cs->cs_waiting_for_ctld || cs->cs_login_phase) { + if (cs->cs_login_pdu == NULL) + cs->cs_login_pdu = request; + else + icl_pdu_free(request); + cv_signal(&cs->cs_login_cv); + return; + } +#endif + + cfiscsi_pdu_handle(request); +} + +static void +cfiscsi_error_callback(struct icl_conn *ic) +{ + struct cfiscsi_session *cs; + + cs = CONN_SESSION(ic); + + CFISCSI_SESSION_WARN(cs, "connection error; dropping connection"); + cfiscsi_session_terminate(cs); +} + +static int +cfiscsi_pdu_prepare(struct icl_pdu *response) +{ + struct cfiscsi_session *cs; + struct iscsi_bhs_scsi_response *bhssr; + bool advance_statsn = true; + + cs = PDU_SESSION(response); + + CFISCSI_SESSION_LOCK_ASSERT(cs); + + /* + * We're only using fields common for all the response + * (target -> initiator) PDUs. + */ + bhssr = (struct iscsi_bhs_scsi_response *)response->ip_bhs; + + /* + * 10.8.3: "The StatSN for this connection is not advanced + * after this PDU is sent." + */ + if (bhssr->bhssr_opcode == ISCSI_BHS_OPCODE_R2T) + advance_statsn = false; + + /* + * 10.19.2: "However, when the Initiator Task Tag is set to 0xffffffff, + * StatSN for the connection is not advanced after this PDU is sent." + */ + if (bhssr->bhssr_opcode == ISCSI_BHS_OPCODE_NOP_IN && + bhssr->bhssr_initiator_task_tag == 0xffffffff) + advance_statsn = false; + + /* + * See the comment below - StatSN is not meaningful and must + * not be advanced. + */ + if (bhssr->bhssr_opcode == ISCSI_BHS_OPCODE_SCSI_DATA_IN) + advance_statsn = false; + + /* + * 10.7.3: "The fields StatSN, Status, and Residual Count + * only have meaningful content if the S bit is set to 1." + */ + if (bhssr->bhssr_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_IN) + bhssr->bhssr_statsn = htonl(cs->cs_statsn); + bhssr->bhssr_expcmdsn = htonl(cs->cs_cmdsn); + bhssr->bhssr_maxcmdsn = htonl(cs->cs_cmdsn + maxcmdsn_delta); + + if (advance_statsn) + cs->cs_statsn++; + + return (0); +} + +static void +cfiscsi_pdu_queue(struct icl_pdu *response) +{ + struct cfiscsi_session *cs; + + cs = PDU_SESSION(response); + + CFISCSI_SESSION_LOCK(cs); + cfiscsi_pdu_prepare(response); + icl_pdu_queue(response); + CFISCSI_SESSION_UNLOCK(cs); +} + +static uint32_t +cfiscsi_decode_lun(uint64_t encoded) +{ + uint8_t lun[8]; + uint32_t result; + + /* + * The LUN field in iSCSI PDUs may look like an ordinary 64 bit number, + * but is in fact an evil, multidimensional structure defined + * in SCSI Architecture Model 5 (SAM-5), section 4.6. + */ + memcpy(lun, &encoded, sizeof(lun)); + switch (lun[0] & 0xC0) { + case 0x00: + if ((lun[0] & 0x3f) != 0 || lun[2] != 0 || lun[3] != 0 || + lun[4] != 0 || lun[5] != 0 || lun[6] != 0 || lun[7] != 0) { + CFISCSI_WARN("malformed LUN " + "(peripheral device addressing method): 0x%jx", + (uintmax_t)encoded); + result = 0xffffffff; + break; + } + result = lun[1]; + break; + case 0x40: + if (lun[2] != 0 || lun[3] != 0 || lun[4] != 0 || lun[5] != 0 || + lun[6] != 0 || lun[7] != 0) { + CFISCSI_WARN("malformed LUN " + "(flat address space addressing method): 0x%jx", + (uintmax_t)encoded); + result = 0xffffffff; + break; + } + result = ((lun[0] & 0x3f) << 8) + lun[1]; + break; + case 0xC0: + if (lun[0] != 0xD2 || lun[4] != 0 || lun[5] != 0 || + lun[6] != 0 || lun[7] != 0) { + CFISCSI_WARN("malformed LUN (extended flat " + "address space addressing method): 0x%jx", + (uintmax_t)encoded); + result = 0xffffffff; + break; + } + result = (lun[1] << 16) + (lun[2] << 8) + lun[3]; + default: + CFISCSI_WARN("unsupported LUN format 0x%jx", + (uintmax_t)encoded); + result = 0xffffffff; + break; + } + + return (result); +} + +static void +cfiscsi_pdu_handle_nop_out(struct icl_pdu *request) +{ + struct cfiscsi_session *cs; + struct iscsi_bhs_nop_out *bhsno; + struct iscsi_bhs_nop_in *bhsni; + struct icl_pdu *response; + + cs = PDU_SESSION(request); + bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs; + + if (bhsno->bhsno_initiator_task_tag == 0xffffffff) { + /* + * Nothing to do, iscsi_pdu_update_statsn() already + * zeroed the timeout. + */ + icl_pdu_free(request); + return; + } + + response = cfiscsi_pdu_new_response(request, M_NOWAIT); + if (response == NULL) { + icl_pdu_free(request); + return; + } + bhsni = (struct iscsi_bhs_nop_in *)response->ip_bhs; + bhsni->bhsni_opcode = ISCSI_BHS_OPCODE_NOP_IN; + bhsni->bhsni_flags = 0x80; + bhsni->bhsni_initiator_task_tag = bhsno->bhsno_initiator_task_tag; + bhsni->bhsni_target_transfer_tag = 0xffffffff; + +#if 0 + /* XXX */ + response->ip_data_len = request->ip_data_len; + response->ip_data_mbuf = request->ip_data_mbuf; + request->ip_data_len = 0; + request->ip_data_mbuf = NULL; +#endif + + icl_pdu_free(request); + cfiscsi_pdu_queue(response); +} + +static void +cfiscsi_pdu_handle_scsi_command(struct icl_pdu *request) +{ + struct iscsi_bhs_scsi_command *bhssc; + struct cfiscsi_session *cs; + union ctl_io *io; + int error; + + cs = PDU_SESSION(request); + bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; + //CFISCSI_SESSION_DEBUG(cs, "initiator task tag 0x%x", + // bhssc->bhssc_initiator_task_tag); + + if (request->ip_data_len > 0 && cs->cs_immediate_data == false) { + CFISCSI_SESSION_WARN(cs, "unsolicited data with " + "ImmediateData=No; dropping connection"); + cfiscsi_session_terminate(cs); + icl_pdu_free(request); + return; + } + io = ctl_alloc_io(cs->cs_target->ct_softc->fe.ctl_pool_ref); + if (io == NULL) { + CFISCSI_SESSION_WARN(cs, "can't allocate ctl_io"); + icl_pdu_free(request); + return; + } + ctl_zero_io(io); + io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = request; + io->io_hdr.io_type = CTL_IO_SCSI; + io->io_hdr.nexus.initid.id = cs->cs_ctl_initid; + io->io_hdr.nexus.targ_port = cs->cs_target->ct_softc->fe.targ_port; + io->io_hdr.nexus.targ_target.id = 0; + io->io_hdr.nexus.targ_lun = cfiscsi_decode_lun(bhssc->bhssc_lun); + io->io_hdr.nexus.lun_map_fn = cfiscsi_map_lun; + io->io_hdr.nexus.lun_map_arg = cs; + io->scsiio.tag_num = bhssc->bhssc_initiator_task_tag; + switch ((bhssc->bhssc_flags & BHSSC_FLAGS_ATTR)) { + case BHSSC_FLAGS_ATTR_UNTAGGED: + io->scsiio.tag_type = CTL_TAG_UNTAGGED; + break; + case BHSSC_FLAGS_ATTR_SIMPLE: + io->scsiio.tag_type = CTL_TAG_SIMPLE; + break; + case BHSSC_FLAGS_ATTR_ORDERED: + io->scsiio.tag_type = CTL_TAG_ORDERED; + break; + case BHSSC_FLAGS_ATTR_HOQ: + io->scsiio.tag_type = CTL_TAG_HEAD_OF_QUEUE; + break; + case BHSSC_FLAGS_ATTR_ACA: + io->scsiio.tag_type = CTL_TAG_ACA; + break; + default: + io->scsiio.tag_type = CTL_TAG_UNTAGGED; + CFISCSI_SESSION_WARN(cs, "unhandled tag type %d", + bhssc->bhssc_flags & BHSSC_FLAGS_ATTR); + break; + } + io->scsiio.cdb_len = sizeof(bhssc->bhssc_cdb); /* Which is 16. */ + memcpy(io->scsiio.cdb, bhssc->bhssc_cdb, sizeof(bhssc->bhssc_cdb)); + refcount_acquire(&cs->cs_outstanding_ctl_pdus); + error = ctl_queue(io); + if (error != CTL_RETVAL_COMPLETE) { + CFISCSI_SESSION_WARN(cs, "ctl_queue() failed; error %d", error); + ctl_free_io(io); + refcount_release(&cs->cs_outstanding_ctl_pdus); + icl_pdu_free(request); + } +} + +static void +cfiscsi_pdu_handle_task_request(struct icl_pdu *request) +{ + struct iscsi_bhs_task_management_request *bhstmr; + struct iscsi_bhs_task_management_response *bhstmr2; + struct icl_pdu *response; + struct cfiscsi_session *cs; + union ctl_io *io; + int error; + + cs = PDU_SESSION(request); + bhstmr = (struct iscsi_bhs_task_management_request *)request->ip_bhs; + io = ctl_alloc_io(cs->cs_target->ct_softc->fe.ctl_pool_ref); + if (io == NULL) { + CFISCSI_SESSION_WARN(cs, "can't allocate ctl_io"); + icl_pdu_free(request); + return; + } + ctl_zero_io(io); + io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = request; + io->io_hdr.io_type = CTL_IO_TASK; + io->io_hdr.nexus.initid.id = cs->cs_ctl_initid; + io->io_hdr.nexus.targ_port = cs->cs_target->ct_softc->fe.targ_port; + io->io_hdr.nexus.targ_target.id = 0; + io->io_hdr.nexus.targ_lun = cfiscsi_decode_lun(bhstmr->bhstmr_lun); + io->io_hdr.nexus.lun_map_fn = cfiscsi_map_lun; + io->io_hdr.nexus.lun_map_arg = cs; + io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX */ + + switch (bhstmr->bhstmr_function & ~0x80) { + case BHSTMR_FUNCTION_ABORT_TASK: +#if 0 + CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_ABORT_TASK"); +#endif + io->taskio.task_action = CTL_TASK_ABORT_TASK; + io->taskio.tag_num = bhstmr->bhstmr_referenced_task_tag; + break; + case BHSTMR_FUNCTION_LOGICAL_UNIT_RESET: +#if 0 + CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_LOGICAL_UNIT_RESET"); +#endif + io->taskio.task_action = CTL_TASK_LUN_RESET; + break; + case BHSTMR_FUNCTION_TARGET_COLD_RESET: +#if 0 + CFISCSI_SESSION_DEBUG(cs, "BHSTMR_FUNCTION_TARGET_COLD_RESET"); +#endif + io->taskio.task_action = CTL_TASK_BUS_RESET; + break; + default: + CFISCSI_SESSION_DEBUG(cs, "unsupported function 0x%x", + bhstmr->bhstmr_function & ~0x80); + ctl_free_io(io); + + response = cfiscsi_pdu_new_response(request, M_NOWAIT); + if (response == NULL) { + icl_pdu_free(request); + return; + } + bhstmr2 = (struct iscsi_bhs_task_management_response *) + response->ip_bhs; + bhstmr2->bhstmr_opcode = ISCSI_BHS_OPCODE_TASK_RESPONSE; + bhstmr2->bhstmr_flags = 0x80; + bhstmr2->bhstmr_response = + BHSTMR_RESPONSE_FUNCTION_NOT_SUPPORTED; + bhstmr2->bhstmr_initiator_task_tag = + bhstmr->bhstmr_initiator_task_tag; + icl_pdu_free(request); + cfiscsi_pdu_queue(response); + return; + } + + refcount_acquire(&cs->cs_outstanding_ctl_pdus); + error = ctl_queue(io); + if (error != CTL_RETVAL_COMPLETE) { + CFISCSI_SESSION_WARN(cs, "ctl_queue() failed; error %d", error); + ctl_free_io(io); + refcount_release(&cs->cs_outstanding_ctl_pdus); + icl_pdu_free(request); + } +} + +static bool +cfiscsi_handle_data_segment(struct icl_pdu *request, struct cfiscsi_data_wait *cdw) +{ + struct iscsi_bhs_data_out *bhsdo; + struct cfiscsi_session *cs; + struct ctl_sg_entry ctl_sg_entry, *ctl_sglist; + size_t copy_len, off, buffer_offset; + int ctl_sg_count; + union ctl_io *io; + + cs = PDU_SESSION(request); + + KASSERT((request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == + ISCSI_BHS_OPCODE_SCSI_DATA_OUT || + (request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == + ISCSI_BHS_OPCODE_SCSI_COMMAND, + ("bad opcode 0x%x", request->ip_bhs->bhs_opcode)); + + /* + * We're only using fields common for Data Out and SCSI Command PDUs. + */ + bhsdo = (struct iscsi_bhs_data_out *)request->ip_bhs; + + io = cdw->cdw_ctl_io; + KASSERT((io->io_hdr.flags & CTL_FLAG_DATA_MASK) != CTL_FLAG_DATA_IN, + ("CTL_FLAG_DATA_IN")); + +#if 0 + CFISCSI_SESSION_DEBUG(cs, "received %zd bytes out of %d", + request->ip_data_len, io->scsiio.kern_total_len); +#endif + + if (io->scsiio.kern_sg_entries > 0) { + ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; + ctl_sg_count = io->scsiio.kern_sg_entries; + } else { + ctl_sglist = &ctl_sg_entry; + ctl_sglist->addr = io->scsiio.kern_data_ptr; + ctl_sglist->len = io->scsiio.kern_data_len; + ctl_sg_count = 1; + } +#if 0 + if (ctl_sg_count > 1) + CFISCSI_SESSION_DEBUG(cs, "ctl_sg_count = %d", ctl_sg_count); +#endif + + if ((request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == + ISCSI_BHS_OPCODE_SCSI_DATA_OUT) + buffer_offset = ntohl(bhsdo->bhsdo_buffer_offset); + else + buffer_offset = 0; + + /* + * Make sure the offset, as sent by the initiator, matches the offset + * we're supposed to be at in the scatter-gather list. + */ + if (buffer_offset != io->scsiio.ext_data_filled) { + CFISCSI_SESSION_WARN(cs, "received bad buffer offset %zd, " + "expected %zd", buffer_offset, + (size_t)io->scsiio.ext_data_filled); + cfiscsi_session_terminate(cs); + return (true); + } + + off = 0; + for (;;) { + KASSERT(cdw->cdw_sg_index < ctl_sg_count, + ("cdw->cdw_sg_index >= ctl_sg_count")); + if (cdw->cdw_sg_len == 0) { + cdw->cdw_sg_addr = ctl_sglist[cdw->cdw_sg_index].addr; + cdw->cdw_sg_len = ctl_sglist[cdw->cdw_sg_index].len; + } + copy_len = icl_pdu_data_segment_length(request) - off; + if (copy_len > cdw->cdw_sg_len) + copy_len = cdw->cdw_sg_len; + + icl_pdu_get_data(request, off, cdw->cdw_sg_addr, copy_len); + cdw->cdw_sg_addr += copy_len; + cdw->cdw_sg_len -= copy_len; + off += copy_len; + io->scsiio.ext_data_filled += copy_len; + + if (cdw->cdw_sg_len == 0) { + if (cdw->cdw_sg_index == ctl_sg_count - 1) + break; + cdw->cdw_sg_index++; + } + if (off == icl_pdu_data_segment_length(request)) + break; + } + + if (off < icl_pdu_data_segment_length(request)) { + CFISCSI_SESSION_WARN(cs, "received too much data: got %zd bytes, " + "expected %zd", icl_pdu_data_segment_length(request), off); + cfiscsi_session_terminate(cs); + return (true); + } + + if (bhsdo->bhsdo_flags & BHSDO_FLAGS_F || + io->scsiio.ext_data_filled == io->scsiio.kern_total_len) { + if ((bhsdo->bhsdo_flags & BHSDO_FLAGS_F) == 0) { + CFISCSI_SESSION_WARN(cs, "got the final packet without " + "the F flag; flags = 0x%x; dropping connection", + bhsdo->bhsdo_flags); + cfiscsi_session_terminate(cs); + return (true); + } + + if (io->scsiio.ext_data_filled != io->scsiio.kern_total_len) { + if ((request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == + ISCSI_BHS_OPCODE_SCSI_DATA_OUT) { + CFISCSI_SESSION_WARN(cs, "got the final packet, but the " + "transmitted size was %zd bytes instead of %d; " + "dropping connection", + (size_t)io->scsiio.ext_data_filled, + io->scsiio.kern_total_len); + cfiscsi_session_terminate(cs); + return (true); + } else { + /* + * For SCSI Command PDU, this just means we need to + * solicit more data by sending R2T. + */ + return (false); + } + } +#if 0 + CFISCSI_SESSION_DEBUG(cs, "no longer expecting Data-Out with target " + "transfer tag 0x%x", cdw->cdw_target_transfer_tag); +#endif + + return (true); + } + + return (false); +} + +static void +cfiscsi_pdu_handle_data_out(struct icl_pdu *request) +{ + struct iscsi_bhs_data_out *bhsdo; + struct cfiscsi_session *cs; + struct cfiscsi_data_wait *cdw = NULL; + union ctl_io *io; + bool done; + + cs = PDU_SESSION(request); + bhsdo = (struct iscsi_bhs_data_out *)request->ip_bhs; + + CFISCSI_SESSION_LOCK(cs); + TAILQ_FOREACH(cdw, &cs->cs_waiting_for_data_out, cdw_next) { +#if 0 + CFISCSI_SESSION_DEBUG(cs, "have ttt 0x%x, itt 0x%x; looking for " + "ttt 0x%x, itt 0x%x", + bhsdo->bhsdo_target_transfer_tag, + bhsdo->bhsdo_initiator_task_tag, + cdw->cdw_target_transfer_tag, cdw->cdw_initiator_task_tag)); +#endif + if (bhsdo->bhsdo_target_transfer_tag == + cdw->cdw_target_transfer_tag) + break; + } + CFISCSI_SESSION_UNLOCK(cs); + if (cdw == NULL) { + CFISCSI_SESSION_WARN(cs, "data transfer tag 0x%x, initiator task tag " + "0x%x, not found", bhsdo->bhsdo_target_transfer_tag, + bhsdo->bhsdo_initiator_task_tag); + icl_pdu_free(request); + cfiscsi_session_terminate(cs); + return; + } + + io = cdw->cdw_ctl_io; + KASSERT((io->io_hdr.flags & CTL_FLAG_DATA_MASK) != CTL_FLAG_DATA_IN, + ("CTL_FLAG_DATA_IN")); + + done = cfiscsi_handle_data_segment(request, cdw); + if (done) { + CFISCSI_SESSION_LOCK(cs); + TAILQ_REMOVE(&cs->cs_waiting_for_data_out, cdw, cdw_next); + CFISCSI_SESSION_UNLOCK(cs); + uma_zfree(cfiscsi_data_wait_zone, cdw); + io->scsiio.be_move_done(io); + } + + icl_pdu_free(request); +} + +static void +cfiscsi_pdu_handle_logout_request(struct icl_pdu *request) +{ + struct iscsi_bhs_logout_request *bhslr; + struct iscsi_bhs_logout_response *bhslr2; + struct icl_pdu *response; + struct cfiscsi_session *cs; + + cs = PDU_SESSION(request); + bhslr = (struct iscsi_bhs_logout_request *)request->ip_bhs; + switch (bhslr->bhslr_reason & 0x7f) { + case BHSLR_REASON_CLOSE_SESSION: + case BHSLR_REASON_CLOSE_CONNECTION: + response = cfiscsi_pdu_new_response(request, M_NOWAIT); + if (response == NULL) { + icl_pdu_free(request); + cfiscsi_session_terminate(cs); + return; + } + bhslr2 = (struct iscsi_bhs_logout_response *)response->ip_bhs; + bhslr2->bhslr_opcode = ISCSI_BHS_OPCODE_LOGOUT_RESPONSE; + bhslr2->bhslr_flags = 0x80; + bhslr2->bhslr_response = BHSLR_RESPONSE_CLOSED_SUCCESSFULLY; + bhslr2->bhslr_initiator_task_tag = + bhslr->bhslr_initiator_task_tag; + icl_pdu_free(request); + cfiscsi_pdu_queue(response); + cfiscsi_session_terminate(cs); + break; + case BHSLR_REASON_REMOVE_FOR_RECOVERY: + response = cfiscsi_pdu_new_response(request, M_NOWAIT); + if (response == NULL) { + icl_pdu_free(request); + cfiscsi_session_terminate(cs); + return; + } + bhslr2 = (struct iscsi_bhs_logout_response *)response->ip_bhs; + bhslr2->bhslr_opcode = ISCSI_BHS_OPCODE_LOGOUT_RESPONSE; + bhslr2->bhslr_flags = 0x80; + bhslr2->bhslr_response = BHSLR_RESPONSE_RECOVERY_NOT_SUPPORTED; + bhslr2->bhslr_initiator_task_tag = + bhslr->bhslr_initiator_task_tag; + icl_pdu_free(request); + cfiscsi_pdu_queue(response); + break; + default: + CFISCSI_SESSION_WARN(cs, "invalid reason 0%x; dropping connection", + bhslr->bhslr_reason); + icl_pdu_free(request); + cfiscsi_session_terminate(cs); + break; + } +} + +static void +cfiscsi_callout(void *context) +{ + struct icl_pdu *cp; + struct iscsi_bhs_nop_in *bhsni; + struct cfiscsi_session *cs; + + cs = context; + + if (cs->cs_terminating) + return; + + callout_schedule(&cs->cs_callout, 1 * hz); + + CFISCSI_SESSION_LOCK(cs); + cs->cs_timeout++; + CFISCSI_SESSION_UNLOCK(cs); + +#ifdef ICL_KERNEL_PROXY + if (cs->cs_waiting_for_ctld || cs->cs_login_phase) { + if (cs->cs_timeout > login_timeout) { + CFISCSI_SESSION_WARN(cs, "login timed out after " + "%d seconds; dropping connection", cs->cs_timeout); + cfiscsi_session_terminate(cs); + } + return; + } +#endif + + if (cs->cs_timeout >= ping_timeout) { + CFISCSI_SESSION_WARN(cs, "no ping reply (NOP-Out) after %d seconds; " + "dropping connection", ping_timeout); + cfiscsi_session_terminate(cs); + return; + } + + /* + * If the ping was reset less than one second ago - which means + * that we've received some PDU during the last second - assume + * the traffic flows correctly and don't bother sending a NOP-Out. + * + * (It's 2 - one for one second, and one for incrementing is_timeout + * earlier in this routine.) + */ + if (cs->cs_timeout < 2) + return; + + cp = icl_pdu_new_bhs(cs->cs_conn, M_WAITOK); + bhsni = (struct iscsi_bhs_nop_in *)cp->ip_bhs; + bhsni->bhsni_opcode = ISCSI_BHS_OPCODE_NOP_IN; + bhsni->bhsni_flags = 0x80; + bhsni->bhsni_initiator_task_tag = 0xffffffff; + + cfiscsi_pdu_queue(cp); +} + +static void +cfiscsi_session_terminate_tasks(struct cfiscsi_session *cs) +{ + struct cfiscsi_data_wait *cdw, *tmpcdw; + union ctl_io *io; + int error; + +#ifdef notyet + io = ctl_alloc_io(cs->cs_target->ct_softc->fe.ctl_pool_ref); + if (io == NULL) { + CFISCSI_SESSION_WARN(cs, "can't allocate ctl_io"); + return; + } + ctl_zero_io(io); + io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = NULL; + io->io_hdr.io_type = CTL_IO_TASK; + io->io_hdr.nexus.initid.id = cs->cs_ctl_initid; + io->io_hdr.nexus.targ_port = cs->cs_target->ct_softc->fe.targ_port; + io->io_hdr.nexus.targ_target.id = 0; + io->io_hdr.nexus.targ_lun = lun; + io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX */ + io->taskio.task_action = CTL_TASK_ABORT_TASK_SET; + error = ctl_queue(io); + if (error != CTL_RETVAL_COMPLETE) { + CFISCSI_SESSION_WARN(cs, "ctl_queue() failed; error %d", error); + ctl_free_io(io); + } +#else + /* + * CTL doesn't currently support CTL_TASK_ABORT_TASK_SET, so instead + * just iterate over tasks that are waiting for something - data - and + * terminate those. + */ + CFISCSI_SESSION_LOCK(cs); + TAILQ_FOREACH_SAFE(cdw, + &cs->cs_waiting_for_data_out, cdw_next, tmpcdw) { + io = ctl_alloc_io(cs->cs_target->ct_softc->fe.ctl_pool_ref); + if (io == NULL) { + CFISCSI_SESSION_WARN(cs, "can't allocate ctl_io"); + return; + } + ctl_zero_io(io); + io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = NULL; + io->io_hdr.io_type = CTL_IO_TASK; + io->io_hdr.nexus.initid.id = cs->cs_ctl_initid; + io->io_hdr.nexus.targ_port = + cs->cs_target->ct_softc->fe.targ_port; + io->io_hdr.nexus.targ_target.id = 0; + //io->io_hdr.nexus.targ_lun = lun; /* Not needed? */ + io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX */ + io->taskio.task_action = CTL_TASK_ABORT_TASK; + io->taskio.tag_num = cdw->cdw_initiator_task_tag; + error = ctl_queue(io); + if (error != CTL_RETVAL_COMPLETE) { + CFISCSI_SESSION_WARN(cs, "ctl_queue() failed; error %d", error); + ctl_free_io(io); + return; + } +#if 0 + CFISCSI_SESSION_DEBUG(cs, "removing csw for initiator task tag " + "0x%x", cdw->cdw_initiator_task_tag); +#endif + cdw->cdw_ctl_io->scsiio.be_move_done(cdw->cdw_ctl_io); + TAILQ_REMOVE(&cs->cs_waiting_for_data_out, cdw, cdw_next); + uma_zfree(cfiscsi_data_wait_zone, cdw); + } + CFISCSI_SESSION_UNLOCK(cs); +#endif +} + +static void +cfiscsi_maintenance_thread(void *arg) +{ + struct cfiscsi_session *cs; + + cs = arg; + + for (;;) { + CFISCSI_SESSION_LOCK(cs); + if (cs->cs_terminating == false) + cv_wait(&cs->cs_maintenance_cv, &cs->cs_lock); + CFISCSI_SESSION_UNLOCK(cs); + + if (cs->cs_terminating) { + cfiscsi_session_terminate_tasks(cs); + callout_drain(&cs->cs_callout); + + icl_conn_shutdown(cs->cs_conn); + icl_conn_close(cs->cs_conn); + + cs->cs_terminating++; + + /* + * XXX: We used to wait up to 30 seconds to deliver queued PDUs + * to the initiator. We also tried hard to deliver SCSI Responses + * for the aborted PDUs. We don't do that anymore. We might need + * to revisit that. + */ + + cfiscsi_session_delete(cs); + kthread_exit(); + return; + } + CFISCSI_SESSION_DEBUG(cs, "nothing to do"); + } +} + +static void +cfiscsi_session_terminate(struct cfiscsi_session *cs) +{ + + if (cs->cs_terminating != 0) + return; + cs->cs_terminating = 1; + cv_signal(&cs->cs_maintenance_cv); +} + +static int +cfiscsi_session_register_initiator(struct cfiscsi_session *cs) +{ + int error, i; + struct cfiscsi_softc *softc; + + KASSERT(cs->cs_ctl_initid == -1, ("already registered")); + + softc = &cfiscsi_softc; + + mtx_lock(&softc->lock); + for (i = 0; i < softc->max_initiators; i++) { + if (softc->ctl_initids[i] == 0) + break; + } + if (i == softc->max_initiators) { + CFISCSI_SESSION_WARN(cs, "too many concurrent sessions (%d)", + softc->max_initiators); + mtx_unlock(&softc->lock); + return (1); + } + softc->ctl_initids[i] = 1; + mtx_unlock(&softc->lock); + +#if 0 + CFISCSI_SESSION_DEBUG(cs, "adding initiator id %d, max %d", + i, softc->max_initiators); +#endif + cs->cs_ctl_initid = i; + error = ctl_add_initiator(0x0, softc->fe.targ_port, cs->cs_ctl_initid); + if (error != 0) { + CFISCSI_SESSION_WARN(cs, "ctl_add_initiator failed with error %d", error); + mtx_lock(&softc->lock); + softc->ctl_initids[cs->cs_ctl_initid] = 0; + mtx_unlock(&softc->lock); + cs->cs_ctl_initid = -1; + return (1); + } + + return (0); +} + +static void +cfiscsi_session_unregister_initiator(struct cfiscsi_session *cs) +{ + int error; + struct cfiscsi_softc *softc; + + if (cs->cs_ctl_initid == -1) + return; + + softc = &cfiscsi_softc; + + error = ctl_remove_initiator(softc->fe.targ_port, cs->cs_ctl_initid); + if (error != 0) { + CFISCSI_SESSION_WARN(cs, "ctl_remove_initiator failed with error %d", + error); + } + mtx_lock(&softc->lock); + softc->ctl_initids[cs->cs_ctl_initid] = 0; + mtx_unlock(&softc->lock); + cs->cs_ctl_initid = -1; +} + +static struct cfiscsi_session * +cfiscsi_session_new(struct cfiscsi_softc *softc) +{ + struct cfiscsi_session *cs; + int error; + + cs = malloc(sizeof(*cs), M_CFISCSI, M_NOWAIT | M_ZERO); + if (cs == NULL) { + CFISCSI_WARN("malloc failed"); + return (NULL); + } + cs->cs_ctl_initid = -1; + + refcount_init(&cs->cs_outstanding_ctl_pdus, 0); + TAILQ_INIT(&cs->cs_waiting_for_data_out); + mtx_init(&cs->cs_lock, "cfiscsi_lock", NULL, MTX_DEF); + cv_init(&cs->cs_maintenance_cv, "cfiscsi_mt"); +#ifdef ICL_KERNEL_PROXY + cv_init(&cs->cs_login_cv, "cfiscsi_login"); +#endif + + cs->cs_conn = icl_conn_new(); + cs->cs_conn->ic_receive = cfiscsi_receive_callback; + cs->cs_conn->ic_error = cfiscsi_error_callback; + cs->cs_conn->ic_prv0 = cs; + + error = kthread_add(cfiscsi_maintenance_thread, cs, NULL, NULL, 0, 0, "cfiscsimt"); + if (error != 0) { + CFISCSI_SESSION_WARN(cs, "kthread_add(9) failed with error %d", error); + free(cs, M_CFISCSI); + return (NULL); + } + + mtx_lock(&softc->lock); + cs->cs_id = softc->last_session_id + 1; + softc->last_session_id++; + mtx_unlock(&softc->lock); + + mtx_lock(&softc->lock); + TAILQ_INSERT_TAIL(&softc->sessions, cs, cs_next); + mtx_unlock(&softc->lock); + + /* + * Start pinging the initiator. + */ + callout_init(&cs->cs_callout, 1); + callout_reset(&cs->cs_callout, 1 * hz, cfiscsi_callout, cs); + + return (cs); +} + +static void +cfiscsi_session_delete(struct cfiscsi_session *cs) +{ + struct cfiscsi_softc *softc; + + softc = &cfiscsi_softc; + + KASSERT(cs->cs_outstanding_ctl_pdus == 0, + ("destroying session with outstanding CTL pdus")); + KASSERT(TAILQ_EMPTY(&cs->cs_waiting_for_data_out), + ("destroying session with non-empty queue")); + + cfiscsi_session_unregister_initiator(cs); + if (cs->cs_target != NULL) + cfiscsi_target_release(cs->cs_target); + icl_conn_close(cs->cs_conn); + icl_conn_free(cs->cs_conn); + + mtx_lock(&softc->lock); + TAILQ_REMOVE(&softc->sessions, cs, cs_next); + mtx_unlock(&softc->lock); + + free(cs, M_CFISCSI); +} + +int +cfiscsi_init(void) +{ + struct cfiscsi_softc *softc; + struct ctl_frontend *fe; + int retval; + + softc = &cfiscsi_softc; + retval = 0; + bzero(softc, sizeof(*softc)); + mtx_init(&softc->lock, "cfiscsi", NULL, MTX_DEF); + +#ifdef ICL_KERNEL_PROXY + cv_init(&softc->accept_cv, "cfiscsi_accept"); +#endif + TAILQ_INIT(&softc->sessions); + TAILQ_INIT(&softc->targets); + + fe = &softc->fe; + fe->port_type = CTL_PORT_ISCSI; + /* XXX KDM what should the real number be here? */ + fe->num_requested_ctl_io = 4096; + snprintf(softc->port_name, sizeof(softc->port_name), "iscsi"); + fe->port_name = softc->port_name; + fe->port_online = cfiscsi_online; + fe->port_offline = cfiscsi_offline; + fe->onoff_arg = softc; + fe->targ_enable = cfiscsi_targ_enable; + fe->targ_disable = cfiscsi_targ_disable; + fe->lun_enable = cfiscsi_lun_enable; + fe->lun_disable = cfiscsi_lun_disable; + fe->targ_lun_arg = softc; + fe->ioctl = cfiscsi_ioctl; + fe->devid = cfiscsi_devid; + fe->fe_datamove = cfiscsi_datamove; + fe->fe_done = cfiscsi_done; + + /* XXX KDM what should we report here? */ + /* XXX These should probably be fetched from CTL. */ + fe->max_targets = 1; + fe->max_target_id = 15; + + retval = ctl_frontend_register(fe, /*master_SC*/ 1); + if (retval != 0) { + CFISCSI_WARN("ctl_frontend_register() failed with error %d", + retval); + retval = 1; + goto bailout; + } + + softc->max_initiators = fe->max_initiators; + + cfiscsi_data_wait_zone = uma_zcreate("cfiscsi_data_wait", + sizeof(struct cfiscsi_data_wait), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + + return (0); + +bailout: + return (retval); +} + +static int +cfiscsi_module_event_handler(module_t mod, int what, void *arg) +{ + + switch (what) { + case MOD_LOAD: + return (cfiscsi_init()); + case MOD_UNLOAD: + return (EBUSY); + default: + return (EOPNOTSUPP); + } +} + +#ifdef ICL_KERNEL_PROXY +static void +cfiscsi_accept(struct socket *so) +{ + struct cfiscsi_session *cs; + + cs = cfiscsi_session_new(&cfiscsi_softc); + if (cs == NULL) { + CFISCSI_WARN("failed to create session"); + return; + } + + icl_conn_handoff_sock(cs->cs_conn, so); + cs->cs_waiting_for_ctld = true; + cv_signal(&cfiscsi_softc.accept_cv); +} +#endif + +static void +cfiscsi_online(void *arg) +{ + struct cfiscsi_softc *softc; + + softc = (struct cfiscsi_softc *)arg; + + softc->online = 1; +#ifdef ICL_KERNEL_PROXY + if (softc->listener != NULL) + icl_listen_free(softc->listener); + softc->listener = icl_listen_new(cfiscsi_accept); +#endif +} + +static void +cfiscsi_offline(void *arg) +{ + struct cfiscsi_softc *softc; + struct cfiscsi_session *cs; + + softc = (struct cfiscsi_softc *)arg; + + softc->online = 0; + + mtx_lock(&softc->lock); + TAILQ_FOREACH(cs, &softc->sessions, cs_next) + cfiscsi_session_terminate(cs); + mtx_unlock(&softc->lock); + +#ifdef ICL_KERNEL_PROXY + icl_listen_free(softc->listener); + softc->listener = NULL; +#endif +} + +static int +cfiscsi_targ_enable(void *arg, struct ctl_id targ_id) +{ + + return (0); +} + +static int +cfiscsi_targ_disable(void *arg, struct ctl_id targ_id) +{ + + return (0); +} + +static void +cfiscsi_ioctl_handoff(struct ctl_iscsi *ci) +{ + struct cfiscsi_softc *softc; + struct cfiscsi_session *cs; + struct cfiscsi_target *ct; + struct ctl_iscsi_handoff_params *cihp; +#ifndef ICL_KERNEL_PROXY + int error; +#endif + + cihp = (struct ctl_iscsi_handoff_params *)&(ci->data); + softc = &cfiscsi_softc; + + CFISCSI_DEBUG("new connection from %s (%s) to %s", + cihp->initiator_name, cihp->initiator_addr, + cihp->target_name); + + if (softc->online == 0) { + ci->status = CTL_ISCSI_ERROR; + snprintf(ci->error_str, sizeof(ci->error_str), + "%s: port offline", __func__); + return; + } + + ct = cfiscsi_target_find(softc, cihp->target_name); + if (ct == NULL) { + ci->status = CTL_ISCSI_ERROR; + snprintf(ci->error_str, sizeof(ci->error_str), + "%s: target not found", __func__); + return; + } + +#ifdef ICL_KERNEL_PROXY + mtx_lock(&cfiscsi_softc.lock); + TAILQ_FOREACH(cs, &cfiscsi_softc.sessions, cs_next) { + if (cs->cs_id == cihp->socket) + break; + } + if (cs == NULL) { + mtx_unlock(&cfiscsi_softc.lock); + snprintf(ci->error_str, sizeof(ci->error_str), "connection not found"); + ci->status = CTL_ISCSI_ERROR; + return; + } + mtx_unlock(&cfiscsi_softc.lock); +#else + cs = cfiscsi_session_new(softc); + if (cs == NULL) { + ci->status = CTL_ISCSI_ERROR; + snprintf(ci->error_str, sizeof(ci->error_str), + "%s: cfiscsi_session_new failed", __func__); + cfiscsi_target_release(ct); + return; + } +#endif + cs->cs_target = ct; + + /* + * First PDU of Full Feature phase has the same CmdSN as the last + * PDU from the Login Phase received from the initiator. Thus, + * the -1 below. + */ + cs->cs_portal_group_tag = cihp->portal_group_tag; + cs->cs_cmdsn = cihp->cmdsn; + cs->cs_statsn = cihp->statsn; + cs->cs_max_data_segment_length = cihp->max_recv_data_segment_length; + cs->cs_max_burst_length = cihp->max_burst_length; + cs->cs_immediate_data = !!cihp->immediate_data; + if (cihp->header_digest == CTL_ISCSI_DIGEST_CRC32C) + cs->cs_conn->ic_header_crc32c = true; + if (cihp->data_digest == CTL_ISCSI_DIGEST_CRC32C) + cs->cs_conn->ic_data_crc32c = true; + + strlcpy(cs->cs_initiator_name, + cihp->initiator_name, sizeof(cs->cs_initiator_name)); + strlcpy(cs->cs_initiator_addr, + cihp->initiator_addr, sizeof(cs->cs_initiator_addr)); + strlcpy(cs->cs_initiator_alias, + cihp->initiator_alias, sizeof(cs->cs_initiator_alias)); + +#ifdef ICL_KERNEL_PROXY + cs->cs_login_phase = false; +#else + error = icl_conn_handoff(cs->cs_conn, cihp->socket); + if (error != 0) { + cfiscsi_session_delete(cs); + ci->status = CTL_ISCSI_ERROR; + snprintf(ci->error_str, sizeof(ci->error_str), + "%s: icl_conn_handoff failed with error %d", + __func__, error); + return; + } +#endif + + /* + * Register initiator with CTL. + */ + cfiscsi_session_register_initiator(cs); + +#ifdef ICL_KERNEL_PROXY + /* + * First PDU of the Full Feature phase has likely already arrived. + * We have to pick it up and execute properly. + */ + if (cs->cs_login_pdu != NULL) { + CFISCSI_SESSION_DEBUG(cs, "picking up first PDU"); + cfiscsi_pdu_handle(cs->cs_login_pdu); + cs->cs_login_pdu = NULL; + } +#endif + + ci->status = CTL_ISCSI_OK; +} + +static void +cfiscsi_ioctl_list(struct ctl_iscsi *ci) +{ + struct ctl_iscsi_list_params *cilp; + struct cfiscsi_session *cs; + struct cfiscsi_softc *softc; + struct sbuf *sb; + int error; + + cilp = (struct ctl_iscsi_list_params *)&(ci->data); + softc = &cfiscsi_softc; + + sb = sbuf_new(NULL, NULL, cilp->alloc_len, SBUF_FIXEDLEN); + if (sb == NULL) { + ci->status = CTL_ISCSI_ERROR; + snprintf(ci->error_str, sizeof(ci->error_str), + "Unable to allocate %d bytes for iSCSI session list", + cilp->alloc_len); + return; + } + + sbuf_printf(sb, "<ctlislist>\n"); + mtx_lock(&softc->lock); + TAILQ_FOREACH(cs, &softc->sessions, cs_next) { +#ifdef ICL_KERNEL_PROXY + if (cs->cs_target == NULL) + continue; +#endif + error = sbuf_printf(sb, "<connection id=\"%d\">" + "<initiator>%s</initiator>" + "<initiator_addr>%s</initiator_addr>" + "<initiator_alias>%s</initiator_alias>" + "<target>%s</target>" + "<target_alias>%s</target_alias>" + "<header_digest>%s</header_digest>" + "<data_digest>%s</data_digest>" + "<max_data_segment_length>%zd</max_data_segment_length>" + "<immediate_data>%d</immediate_data>" + "<iser>%d</iser>" + "</connection>\n", + cs->cs_id, + cs->cs_initiator_name, cs->cs_initiator_addr, cs->cs_initiator_alias, + cs->cs_target->ct_name, cs->cs_target->ct_alias, + cs->cs_conn->ic_header_crc32c ? "CRC32C" : "None", + cs->cs_conn->ic_data_crc32c ? "CRC32C" : "None", + cs->cs_max_data_segment_length, + cs->cs_immediate_data, + cs->cs_conn->ic_iser); + if (error != 0) + break; + } + mtx_unlock(&softc->lock); + error = sbuf_printf(sb, "</ctlislist>\n"); + if (error != 0) { + sbuf_delete(sb); + ci->status = CTL_ISCSI_LIST_NEED_MORE_SPACE; + snprintf(ci->error_str, sizeof(ci->error_str), + "Out of space, %d bytes is too small", cilp->alloc_len); + return; + } + sbuf_finish(sb); + + error = copyout(sbuf_data(sb), cilp->conn_xml, sbuf_len(sb) + 1); + cilp->fill_len = sbuf_len(sb) + 1; + ci->status = CTL_ISCSI_OK; + sbuf_delete(sb); +} + +static void +cfiscsi_ioctl_terminate(struct ctl_iscsi *ci) +{ + struct icl_pdu *response; + struct iscsi_bhs_asynchronous_message *bhsam; + struct ctl_iscsi_terminate_params *citp; + struct cfiscsi_session *cs; + struct cfiscsi_softc *softc; + int found = 0; + + citp = (struct ctl_iscsi_terminate_params *)&(ci->data); + softc = &cfiscsi_softc; + + mtx_lock(&softc->lock); + TAILQ_FOREACH(cs, &softc->sessions, cs_next) { + if (citp->all == 0 && cs->cs_id != citp->connection_id && + strcmp(cs->cs_initiator_name, citp->initiator_name) != 0 && + strcmp(cs->cs_initiator_addr, citp->initiator_addr) != 0) + continue; + + response = icl_pdu_new_bhs(cs->cs_conn, M_NOWAIT); + if (response == NULL) { + /* + * Oh well. Just terminate the connection. + */ + } else { + bhsam = (struct iscsi_bhs_asynchronous_message *) + response->ip_bhs; + bhsam->bhsam_opcode = ISCSI_BHS_OPCODE_ASYNC_MESSAGE; + bhsam->bhsam_flags = 0x80; + bhsam->bhsam_0xffffffff = 0xffffffff; + bhsam->bhsam_async_event = + BHSAM_EVENT_TARGET_TERMINATES_SESSION; + cfiscsi_pdu_queue(response); + } + cfiscsi_session_terminate(cs); + found++; + } + mtx_unlock(&softc->lock); + + if (found == 0) { + ci->status = CTL_ISCSI_SESSION_NOT_FOUND; + snprintf(ci->error_str, sizeof(ci->error_str), + "No matching connections found"); + return; + } + + ci->status = CTL_ISCSI_OK; +} + +static void +cfiscsi_ioctl_logout(struct ctl_iscsi *ci) +{ + struct icl_pdu *response; + struct iscsi_bhs_asynchronous_message *bhsam; + struct ctl_iscsi_logout_params *cilp; + struct cfiscsi_session *cs; + struct cfiscsi_softc *softc; + int found = 0; + + cilp = (struct ctl_iscsi_logout_params *)&(ci->data); + softc = &cfiscsi_softc; + + mtx_lock(&softc->lock); + TAILQ_FOREACH(cs, &softc->sessions, cs_next) { + if (cilp->all == 0 && cs->cs_id != cilp->connection_id && + strcmp(cs->cs_initiator_name, cilp->initiator_name) != 0 && + strcmp(cs->cs_initiator_addr, cilp->initiator_addr) != 0) + continue; + + response = icl_pdu_new_bhs(cs->cs_conn, M_NOWAIT); + if (response == NULL) { + ci->status = CTL_ISCSI_ERROR; + snprintf(ci->error_str, sizeof(ci->error_str), + "Unable to allocate memory"); + mtx_unlock(&softc->lock); + return; + } + bhsam = + (struct iscsi_bhs_asynchronous_message *)response->ip_bhs; + bhsam->bhsam_opcode = ISCSI_BHS_OPCODE_ASYNC_MESSAGE; + bhsam->bhsam_flags = 0x80; + bhsam->bhsam_async_event = BHSAM_EVENT_TARGET_REQUESTS_LOGOUT; + bhsam->bhsam_parameter3 = htons(10); + cfiscsi_pdu_queue(response); + found++; + } + mtx_unlock(&softc->lock); + + if (found == 0) { + ci->status = CTL_ISCSI_SESSION_NOT_FOUND; + snprintf(ci->error_str, sizeof(ci->error_str), + "No matching connections found"); + return; + } + + ci->status = CTL_ISCSI_OK; +} + +#ifdef ICL_KERNEL_PROXY +static void +cfiscsi_ioctl_listen(struct ctl_iscsi *ci) +{ + struct ctl_iscsi_listen_params *cilp; + struct sockaddr *sa; + int error; + + cilp = (struct ctl_iscsi_listen_params *)&(ci->data); + + if (cfiscsi_softc.listener == NULL) { + CFISCSI_DEBUG("no listener"); + snprintf(ci->error_str, sizeof(ci->error_str), "no listener"); + ci->status = CTL_ISCSI_ERROR; + return; + } + + error = getsockaddr(&sa, (void *)cilp->addr, cilp->addrlen); + if (error != 0) { + CFISCSI_DEBUG("getsockaddr, error %d", error); + snprintf(ci->error_str, sizeof(ci->error_str), "getsockaddr failed"); + ci->status = CTL_ISCSI_ERROR; + return; + } + + error = icl_listen_add(cfiscsi_softc.listener, cilp->iser, cilp->domain, + cilp->socktype, cilp->protocol, sa); + if (error != 0) { + free(sa, M_SONAME); + CFISCSI_DEBUG("icl_listen_add, error %d", error); + snprintf(ci->error_str, sizeof(ci->error_str), + "icl_listen_add failed, error %d", error); + ci->status = CTL_ISCSI_ERROR; + return; + } + + ci->status = CTL_ISCSI_OK; +} + +static void +cfiscsi_ioctl_accept(struct ctl_iscsi *ci) +{ + struct ctl_iscsi_accept_params *ciap; + struct cfiscsi_session *cs; + int error; + + ciap = (struct ctl_iscsi_accept_params *)&(ci->data); + + mtx_lock(&cfiscsi_softc.lock); + for (;;) { + TAILQ_FOREACH(cs, &cfiscsi_softc.sessions, cs_next) { + if (cs->cs_waiting_for_ctld) + break; + } + if (cs != NULL) + break; + error = cv_wait_sig(&cfiscsi_softc.accept_cv, &cfiscsi_softc.lock); + if (error != 0) { + mtx_unlock(&cfiscsi_softc.lock); + snprintf(ci->error_str, sizeof(ci->error_str), "interrupted"); + ci->status = CTL_ISCSI_ERROR; + return; + } + } + mtx_unlock(&cfiscsi_softc.lock); + + cs->cs_waiting_for_ctld = false; + cs->cs_login_phase = true; + + ciap->connection_id = cs->cs_id; + ci->status = CTL_ISCSI_OK; +} + +static void +cfiscsi_ioctl_send(struct ctl_iscsi *ci) +{ + struct ctl_iscsi_send_params *cisp; + struct cfiscsi_session *cs; + struct icl_pdu *ip; + size_t datalen; + void *data; + int error; + + cisp = (struct ctl_iscsi_send_params *)&(ci->data); + + mtx_lock(&cfiscsi_softc.lock); + TAILQ_FOREACH(cs, &cfiscsi_softc.sessions, cs_next) { + if (cs->cs_id == cisp->connection_id) + break; + } + if (cs == NULL) { + mtx_unlock(&cfiscsi_softc.lock); + snprintf(ci->error_str, sizeof(ci->error_str), "connection not found"); + ci->status = CTL_ISCSI_ERROR; + return; + } + mtx_unlock(&cfiscsi_softc.lock); + +#if 0 + if (cs->cs_login_phase == false) + return (EBUSY); +#endif + + if (cs->cs_terminating) { + snprintf(ci->error_str, sizeof(ci->error_str), "connection is terminating"); + ci->status = CTL_ISCSI_ERROR; + return; + } + + datalen = cisp->data_segment_len; + /* + * XXX + */ + //if (datalen > CFISCSI_MAX_DATA_SEGMENT_LENGTH) { + if (datalen > 65535) { + snprintf(ci->error_str, sizeof(ci->error_str), "data segment too big"); + ci->status = CTL_ISCSI_ERROR; + return; + } + if (datalen > 0) { + data = malloc(datalen, M_CFISCSI, M_WAITOK); + error = copyin(cisp->data_segment, data, datalen); + if (error != 0) { + free(data, M_CFISCSI); + snprintf(ci->error_str, sizeof(ci->error_str), "copyin error %d", error); + ci->status = CTL_ISCSI_ERROR; + return; + } + } + + ip = icl_pdu_new_bhs(cs->cs_conn, M_WAITOK); + memcpy(ip->ip_bhs, cisp->bhs, sizeof(*ip->ip_bhs)); + if (datalen > 0) { + icl_pdu_append_data(ip, data, datalen, M_WAITOK); + free(data, M_CFISCSI); + } + icl_pdu_queue(ip); + ci->status = CTL_ISCSI_OK; +} + +static void +cfiscsi_ioctl_receive(struct ctl_iscsi *ci) +{ + struct ctl_iscsi_receive_params *cirp; + struct cfiscsi_session *cs; + struct icl_pdu *ip; + void *data; + + cirp = (struct ctl_iscsi_receive_params *)&(ci->data); + + mtx_lock(&cfiscsi_softc.lock); + TAILQ_FOREACH(cs, &cfiscsi_softc.sessions, cs_next) { + if (cs->cs_id == cirp->connection_id) + break; + } + if (cs == NULL) { + mtx_unlock(&cfiscsi_softc.lock); + snprintf(ci->error_str, sizeof(ci->error_str), "connection not found"); + ci->status = CTL_ISCSI_ERROR; + return; + } + mtx_unlock(&cfiscsi_softc.lock); + +#if 0 + if (is->is_login_phase == false) + return (EBUSY); +#endif + + CFISCSI_SESSION_LOCK(cs); + while (cs->cs_login_pdu == NULL && + cs->cs_terminating == false) + cv_wait(&cs->cs_login_cv, &cs->cs_lock); + if (cs->cs_terminating) { + CFISCSI_SESSION_UNLOCK(cs); + snprintf(ci->error_str, sizeof(ci->error_str), "connection terminating"); + ci->status = CTL_ISCSI_ERROR; + return; + } + ip = cs->cs_login_pdu; + cs->cs_login_pdu = NULL; + CFISCSI_SESSION_UNLOCK(cs); + + if (ip->ip_data_len > cirp->data_segment_len) { + icl_pdu_free(ip); + snprintf(ci->error_str, sizeof(ci->error_str), "data segment too big"); + ci->status = CTL_ISCSI_ERROR; + return; + } + + copyout(ip->ip_bhs, cirp->bhs, sizeof(*ip->ip_bhs)); + if (ip->ip_data_len > 0) { + data = malloc(ip->ip_data_len, M_CFISCSI, M_WAITOK); + icl_pdu_get_data(ip, 0, data, ip->ip_data_len); + copyout(data, cirp->data_segment, ip->ip_data_len); + free(data, M_CFISCSI); + } + + icl_pdu_free(ip); + ci->status = CTL_ISCSI_OK; +} + +static void +cfiscsi_ioctl_close(struct ctl_iscsi *ci) +{ + /* + * XXX + */ +} +#endif /* !ICL_KERNEL_PROXY */ + +static int +cfiscsi_ioctl(struct cdev *dev, + u_long cmd, caddr_t addr, int flag, struct thread *td) +{ + struct ctl_iscsi *ci; + + if (cmd != CTL_ISCSI) + return (ENOTTY); + + ci = (struct ctl_iscsi *)addr; + switch (ci->type) { + case CTL_ISCSI_HANDOFF: + cfiscsi_ioctl_handoff(ci); + break; + case CTL_ISCSI_LIST: + cfiscsi_ioctl_list(ci); + break; + case CTL_ISCSI_TERMINATE: + cfiscsi_ioctl_terminate(ci); + break; + case CTL_ISCSI_LOGOUT: + cfiscsi_ioctl_logout(ci); + break; +#ifdef ICL_KERNEL_PROXY + case CTL_ISCSI_LISTEN: + cfiscsi_ioctl_listen(ci); + break; + case CTL_ISCSI_ACCEPT: + cfiscsi_ioctl_accept(ci); + break; + case CTL_ISCSI_SEND: + cfiscsi_ioctl_send(ci); + break; + case CTL_ISCSI_RECEIVE: + cfiscsi_ioctl_receive(ci); + break; + case CTL_ISCSI_CLOSE: + cfiscsi_ioctl_close(ci); + break; +#endif /* ICL_KERNEL_PROXY */ + default: + ci->status = CTL_ISCSI_ERROR; + snprintf(ci->error_str, sizeof(ci->error_str), + "%s: invalid iSCSI request type %d", __func__, ci->type); + break; + } + + return (0); +} + +static int +cfiscsi_devid(struct ctl_scsiio *ctsio, int alloc_len) +{ + struct cfiscsi_session *cs; + struct scsi_vpd_device_id *devid_ptr; + struct scsi_vpd_id_descriptor *desc, *desc1; + struct scsi_vpd_id_descriptor *desc2, *desc3; /* for types 4h and 5h */ + struct scsi_vpd_id_t10 *t10id; + struct ctl_lun *lun; + const struct icl_pdu *request; + size_t devid_len, wwpn_len; + + lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; + request = ctsio->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr; + cs = PDU_SESSION(request); + + wwpn_len = strlen(cs->cs_target->ct_name); + wwpn_len += strlen(",t,0x01"); + wwpn_len += 1; /* '\0' */ + if ((wwpn_len % 4) != 0) + wwpn_len += (4 - (wwpn_len % 4)); + + devid_len = sizeof(struct scsi_vpd_device_id) + + sizeof(struct scsi_vpd_id_descriptor) + + sizeof(struct scsi_vpd_id_t10) + CTL_DEVID_LEN + + sizeof(struct scsi_vpd_id_descriptor) + wwpn_len + + sizeof(struct scsi_vpd_id_descriptor) + + sizeof(struct scsi_vpd_id_rel_trgt_port_id) + + sizeof(struct scsi_vpd_id_descriptor) + + sizeof(struct scsi_vpd_id_trgt_port_grp_id); + + ctsio->kern_data_ptr = malloc(devid_len, M_CTL, M_WAITOK | M_ZERO); + devid_ptr = (struct scsi_vpd_device_id *)ctsio->kern_data_ptr; + ctsio->kern_sg_entries = 0; + + if (devid_len < alloc_len) { + ctsio->residual = alloc_len - devid_len; + ctsio->kern_data_len = devid_len; + ctsio->kern_total_len = devid_len; + } else { + ctsio->residual = 0; + ctsio->kern_data_len = alloc_len; + ctsio->kern_total_len = alloc_len; + } + ctsio->kern_data_resid = 0; + ctsio->kern_rel_offset = 0; + ctsio->kern_sg_entries = 0; + + desc = (struct scsi_vpd_id_descriptor *)devid_ptr->desc_list; + t10id = (struct scsi_vpd_id_t10 *)&desc->identifier[0]; + desc1 = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] + + sizeof(struct scsi_vpd_id_t10) + CTL_DEVID_LEN); + desc2 = (struct scsi_vpd_id_descriptor *)(&desc1->identifier[0] + + wwpn_len); + desc3 = (struct scsi_vpd_id_descriptor *)(&desc2->identifier[0] + + sizeof(struct scsi_vpd_id_rel_trgt_port_id)); + + if (lun != NULL) + devid_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | + lun->be_lun->lun_type; + else + devid_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; + + devid_ptr->page_code = SVPD_DEVICE_ID; + + scsi_ulto2b(devid_len - 4, devid_ptr->length); + + /* + * We're using a LUN association here. i.e., this device ID is a + * per-LUN identifier. + */ + desc->proto_codeset = (SCSI_PROTO_ISCSI << 4) | SVPD_ID_CODESET_ASCII; + desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_LUN | SVPD_ID_TYPE_T10; + desc->length = sizeof(*t10id) + CTL_DEVID_LEN; + strncpy((char *)t10id->vendor, CTL_VENDOR, sizeof(t10id->vendor)); + + /* + * If we've actually got a backend, copy the device id from the + * per-LUN data. Otherwise, set it to all spaces. + */ + if (lun != NULL) { + /* + * Copy the backend's LUN ID. + */ + strncpy((char *)t10id->vendor_spec_id, + (char *)lun->be_lun->device_id, CTL_DEVID_LEN); + } else { + /* + * No backend, set this to spaces. + */ + memset(t10id->vendor_spec_id, 0x20, CTL_DEVID_LEN); + } + + /* + * desc1 is for the WWPN which is a port asscociation. + */ + desc1->proto_codeset = (SCSI_PROTO_ISCSI << 4) | SVPD_ID_CODESET_UTF8; + desc1->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_PORT | + SVPD_ID_TYPE_SCSI_NAME; + desc1->length = wwpn_len; + snprintf(desc1->identifier, wwpn_len, "%s,t,0x%x", + cs->cs_target->ct_name, cs->cs_portal_group_tag); + + /* + * desc2 is for the Relative Target Port(type 4h) identifier + */ + desc2->proto_codeset = (SCSI_PROTO_ISCSI << 4) | SVPD_ID_CODESET_BINARY; + desc2->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_PORT | + SVPD_ID_TYPE_RELTARG; + desc2->length = 4; + desc2->identifier[3] = 1; + + /* + * desc3 is for the Target Port Group(type 5h) identifier + */ + desc3->proto_codeset = (SCSI_PROTO_ISCSI << 4) | SVPD_ID_CODESET_BINARY; + desc3->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_PORT | + SVPD_ID_TYPE_TPORTGRP; + desc3->length = 4; + desc3->identifier[3] = 1; + + ctsio->scsi_status = SCSI_STATUS_OK; + + ctsio->be_move_done = ctl_config_move_done; + ctl_datamove((union ctl_io *)ctsio); + + return (CTL_RETVAL_COMPLETE); +} + +static void +cfiscsi_target_hold(struct cfiscsi_target *ct) +{ + + refcount_acquire(&ct->ct_refcount); +} + +static void +cfiscsi_target_release(struct cfiscsi_target *ct) +{ + int old; + struct cfiscsi_softc *softc; + + softc = ct->ct_softc; + + old = ct->ct_refcount; + if (old > 1 && atomic_cmpset_int(&ct->ct_refcount, old, old - 1)) + return; + + mtx_lock(&softc->lock); + if (refcount_release(&ct->ct_refcount)) { + TAILQ_REMOVE(&softc->targets, ct, ct_next); + mtx_unlock(&softc->lock); + free(ct, M_CFISCSI); + + return; + } + mtx_unlock(&softc->lock); +} + +static struct cfiscsi_target * +cfiscsi_target_find(struct cfiscsi_softc *softc, const char *name) +{ + struct cfiscsi_target *ct; + + mtx_lock(&softc->lock); + TAILQ_FOREACH(ct, &softc->targets, ct_next) { + if (strcmp(name, ct->ct_name) != 0) + continue; + cfiscsi_target_hold(ct); + mtx_unlock(&softc->lock); + return (ct); + } + mtx_unlock(&softc->lock); + + return (NULL); +} + +static struct cfiscsi_target * +cfiscsi_target_find_or_create(struct cfiscsi_softc *softc, const char *name, + const char *alias) +{ + struct cfiscsi_target *ct, *newct; + int i; + + if (name[0] == '\0' || strlen(name) >= CTL_ISCSI_NAME_LEN) + return (NULL); + + newct = malloc(sizeof(*newct), M_CFISCSI, M_WAITOK | M_ZERO); + + mtx_lock(&softc->lock); + TAILQ_FOREACH(ct, &softc->targets, ct_next) { + if (strcmp(name, ct->ct_name) != 0) + continue; + cfiscsi_target_hold(ct); + mtx_unlock(&softc->lock); + free(newct, M_CFISCSI); + return (ct); + } + + for (i = 0; i < CTL_MAX_LUNS; i++) + newct->ct_luns[i] = -1; + + strlcpy(newct->ct_name, name, sizeof(newct->ct_name)); + if (alias != NULL) + strlcpy(newct->ct_alias, alias, sizeof(newct->ct_alias)); + refcount_init(&newct->ct_refcount, 1); + newct->ct_softc = softc; + TAILQ_INSERT_TAIL(&softc->targets, newct, ct_next); + mtx_unlock(&softc->lock); + + return (newct); +} + +/* + * Takes LUN from the target space and returns LUN from the CTL space. + */ +static uint32_t +cfiscsi_map_lun(void *arg, uint32_t lun) +{ + struct cfiscsi_session *cs; + + cs = arg; + + if (lun >= CTL_MAX_LUNS) { + CFISCSI_DEBUG("requested lun number %d is higher " + "than maximum %d", lun, CTL_MAX_LUNS - 1); + return (0xffffffff); + } + + if (cs->cs_target->ct_luns[lun] < 0) + return (0xffffffff); + + return (cs->cs_target->ct_luns[lun]); +} + +static int +cfiscsi_target_set_lun(struct cfiscsi_target *ct, + unsigned long lun_id, unsigned long ctl_lun_id) +{ + + if (lun_id >= CTL_MAX_LUNS) { + CFISCSI_WARN("requested lun number %ld is higher " + "than maximum %d", lun_id, CTL_MAX_LUNS - 1); + return (-1); + } + + if (ct->ct_luns[lun_id] >= 0) { + /* + * CTL calls cfiscsi_lun_enable() twice for each LUN - once + * when the LUN is created, and a second time just before + * the port is brought online; don't emit warnings + * for that case. + */ + if (ct->ct_luns[lun_id] == ctl_lun_id) + return (0); + CFISCSI_WARN("lun %ld already allocated", lun_id); + return (-1); + } + +#if 0 + CFISCSI_DEBUG("adding mapping for lun %ld, target %s " + "to ctl lun %ld", lun_id, ct->ct_name, ctl_lun_id); +#endif + + ct->ct_luns[lun_id] = ctl_lun_id; + cfiscsi_target_hold(ct); + + return (0); +} + +static int +cfiscsi_target_unset_lun(struct cfiscsi_target *ct, unsigned long lun_id) +{ + + if (ct->ct_luns[lun_id] < 0) { + CFISCSI_WARN("lun %ld not allocated", lun_id); + return (-1); + } + + ct->ct_luns[lun_id] = -1; + cfiscsi_target_release(ct); + + return (0); +} + +static int +cfiscsi_lun_enable(void *arg, struct ctl_id target_id, int lun_id) +{ + struct cfiscsi_softc *softc; + struct cfiscsi_target *ct; + struct ctl_be_lun_option *opt; + const char *target = NULL, *target_alias = NULL; + const char *lun = NULL; + unsigned long tmp; + + softc = (struct cfiscsi_softc *)arg; + + STAILQ_FOREACH(opt, + &control_softc->ctl_luns[lun_id]->be_lun->options, links) { + if (strcmp(opt->name, "cfiscsi_target") == 0) + target = opt->value; + else if (strcmp(opt->name, "cfiscsi_target_alias") == 0) + target_alias = opt->value; + else if (strcmp(opt->name, "cfiscsi_lun") == 0) + lun = opt->value; + } + + if (target == NULL && lun == NULL) + return (0); + + if (target == NULL || lun == NULL) { + CFISCSI_WARN("lun added with cfiscsi_target, but without " + "cfiscsi_lun, or the other way around; ignoring"); + return (0); + } + + ct = cfiscsi_target_find_or_create(softc, target, target_alias); + if (ct == NULL) { + CFISCSI_WARN("failed to create target \"%s\"", target); + return (0); + } + + tmp = strtoul(lun, NULL, 10); + cfiscsi_target_set_lun(ct, tmp, lun_id); + return (0); +} + +static int +cfiscsi_lun_disable(void *arg, struct ctl_id target_id, int lun_id) +{ + struct cfiscsi_softc *softc; + struct cfiscsi_target *ct; + int i; + + softc = (struct cfiscsi_softc *)arg; + + mtx_lock(&softc->lock); + TAILQ_FOREACH(ct, &softc->targets, ct_next) { + for (i = 0; i < CTL_MAX_LUNS; i++) { + if (ct->ct_luns[i] < 0) + continue; + if (ct->ct_luns[i] != lun_id) + continue; + cfiscsi_target_unset_lun(ct, i); + break; + } + } + mtx_unlock(&softc->lock); + return (0); +} + +static void +cfiscsi_datamove(union ctl_io *io) +{ + struct cfiscsi_session *cs; + struct icl_pdu *request, *response; + const struct iscsi_bhs_scsi_command *bhssc; + struct iscsi_bhs_data_in *bhsdi; + struct iscsi_bhs_r2t *bhsr2t; + struct cfiscsi_data_wait *cdw; + struct ctl_sg_entry ctl_sg_entry, *ctl_sglist; + size_t copy_len, len, off; + const char *addr; + int ctl_sg_count, i; + uint32_t target_transfer_tag; + bool done; + + request = io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr; + cs = PDU_SESSION(request); + + bhssc = (const struct iscsi_bhs_scsi_command *)request->ip_bhs; + KASSERT((bhssc->bhssc_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == + ISCSI_BHS_OPCODE_SCSI_COMMAND, + ("bhssc->bhssc_opcode != ISCSI_BHS_OPCODE_SCSI_COMMAND")); + + if (io->scsiio.kern_sg_entries > 0) { + ctl_sglist = (struct ctl_sg_entry *)io->scsiio.kern_data_ptr; + ctl_sg_count = io->scsiio.kern_sg_entries; + } else { + ctl_sglist = &ctl_sg_entry; + ctl_sglist->addr = io->scsiio.kern_data_ptr; + ctl_sglist->len = io->scsiio.kern_data_len; + ctl_sg_count = 1; + } + + /* + * We need to record it so that we can properly report + * underflow/underflow. + */ + PDU_TOTAL_TRANSFER_LEN(request) = io->scsiio.kern_total_len; + + if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) { +#if 0 + if (ctl_sg_count > 1) + CFISCSI_SESSION_DEBUG(cs, "ctl_sg_count = %d", ctl_sg_count); +#endif + + /* + * This is the offset within the current SCSI command; + * i.e. for the first call of datamove(), it will be 0, + * and for subsequent ones it will be the sum of lengths + * of previous ones. + */ + off = htonl(io->scsiio.kern_rel_offset); + if (off > 1) + CFISCSI_SESSION_DEBUG(cs, "off = %zd", off); + + i = 0; + addr = NULL; + len = 0; + response = NULL; + bhsdi = NULL; + for (;;) { + KASSERT(i < ctl_sg_count, ("i >= ctl_sg_count")); + if (response == NULL) { + response = + cfiscsi_pdu_new_response(request, M_WAITOK); + bhsdi = (struct iscsi_bhs_data_in *) + response->ip_bhs; + bhsdi->bhsdi_opcode = + ISCSI_BHS_OPCODE_SCSI_DATA_IN; + bhsdi->bhsdi_initiator_task_tag = + bhssc->bhssc_initiator_task_tag; + bhsdi->bhsdi_datasn = + htonl(PDU_EXPDATASN(request)); + PDU_EXPDATASN(request)++; + bhsdi->bhsdi_buffer_offset = htonl(off); + } + + if (len == 0) { + addr = ctl_sglist[i].addr; + len = ctl_sglist[i].len; + KASSERT(len > 0, ("len <= 0")); + } + + copy_len = len; + if (response->ip_data_len + copy_len > + cs->cs_max_data_segment_length) + copy_len = cs->cs_max_data_segment_length - + response->ip_data_len; + KASSERT(copy_len <= len, ("copy_len > len")); + icl_pdu_append_data(response, addr, copy_len, M_WAITOK); + addr += copy_len; + len -= copy_len; + off += copy_len; + io->scsiio.ext_data_filled += copy_len; + + if (len == 0) { + /* + * End of scatter-gather segment; + * proceed to the next one... + */ + if (i == ctl_sg_count - 1) { + /* + * ... unless this was the last one. + */ + break; + } + i++; + } + + if (response->ip_data_len == + cs->cs_max_data_segment_length) { + /* + * Can't stuff more data into the current PDU; + * queue it. Note that's not enough to check + * for kern_data_resid == 0 instead; there + * may be several Data-In PDUs for the final + * call to cfiscsi_datamove(), and we want + * to set the F flag only on the last of them. + */ + if (off == io->scsiio.kern_total_len) + bhsdi->bhsdi_flags |= BHSDI_FLAGS_F; + KASSERT(response->ip_data_len > 0, + ("sending empty Data-In")); + cfiscsi_pdu_queue(response); + response = NULL; + bhsdi = NULL; + } + } + KASSERT(i == ctl_sg_count - 1, ("missed SG segment")); + KASSERT(len == 0, ("missed data from SG segment")); + if (response != NULL) { + if (off == io->scsiio.kern_total_len) { + bhsdi->bhsdi_flags |= BHSDI_FLAGS_F; + } else { + CFISCSI_SESSION_DEBUG(cs, "not setting the F flag; " + "have %zd, need %zd", off, + (size_t)io->scsiio.kern_total_len); + } + KASSERT(response->ip_data_len > 0, + ("sending empty Data-In")); + cfiscsi_pdu_queue(response); + } + + io->scsiio.be_move_done(io); + } else { + CFISCSI_SESSION_LOCK(cs); + target_transfer_tag = cs->cs_target_transfer_tag; + cs->cs_target_transfer_tag++; + CFISCSI_SESSION_UNLOCK(cs); + +#if 0 + CFISCSI_SESSION_DEBUG(cs, "expecting Data-Out with initiator " + "task tag 0x%x, target transfer tag 0x%x", + bhssc->bhssc_initiator_task_tag, target_transfer_tag); +#endif + cdw = uma_zalloc(cfiscsi_data_wait_zone, M_WAITOK | M_ZERO); + cdw->cdw_ctl_io = io; + cdw->cdw_target_transfer_tag = htonl(target_transfer_tag); + cdw->cdw_initiator_task_tag = bhssc->bhssc_initiator_task_tag; + + if (cs->cs_immediate_data && + icl_pdu_data_segment_length(request) > 0) { + done = cfiscsi_handle_data_segment(request, cdw); + if (done) { + uma_zfree(cfiscsi_data_wait_zone, cdw); + io->scsiio.be_move_done(io); + return; + } + +#if 0 + if (io->scsiio.ext_data_filled != 0) + CFISCSI_SESSION_DEBUG(cs, "got %zd bytes of immediate data, need %zd", + io->scsiio.ext_data_filled, io->scsiio.kern_data_len); +#endif + } + + CFISCSI_SESSION_LOCK(cs); + TAILQ_INSERT_TAIL(&cs->cs_waiting_for_data_out, cdw, cdw_next); + CFISCSI_SESSION_UNLOCK(cs); + + /* + * XXX: We should limit the number of outstanding R2T PDUs + * per task to MaxOutstandingR2T. + */ + response = cfiscsi_pdu_new_response(request, M_WAITOK); + bhsr2t = (struct iscsi_bhs_r2t *)response->ip_bhs; + bhsr2t->bhsr2t_opcode = ISCSI_BHS_OPCODE_R2T; + bhsr2t->bhsr2t_flags = 0x80; + bhsr2t->bhsr2t_lun = bhssc->bhssc_lun; + bhsr2t->bhsr2t_initiator_task_tag = + bhssc->bhssc_initiator_task_tag; + bhsr2t->bhsr2t_target_transfer_tag = + htonl(target_transfer_tag); + /* + * XXX: Here we assume that cfiscsi_datamove() won't ever + * be running concurrently on several CPUs for a given + * command. + */ + bhsr2t->bhsr2t_r2tsn = htonl(PDU_R2TSN(request)); + PDU_R2TSN(request)++; + /* + * This is the offset within the current SCSI command; + * i.e. for the first call of datamove(), it will be 0, + * and for subsequent ones it will be the sum of lengths + * of previous ones. + * + * The ext_data_filled is to account for unsolicited + * (immediate) data that might have already arrived. + */ + bhsr2t->bhsr2t_buffer_offset = + htonl(io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled); + /* + * This is the total length (sum of S/G lengths) this call + * to cfiscsi_datamove() is supposed to handle. + * + * XXX: Limit it to MaxBurstLength. + */ + bhsr2t->bhsr2t_desired_data_transfer_length = + htonl(io->scsiio.kern_data_len - io->scsiio.ext_data_filled); + cfiscsi_pdu_queue(response); + } +} + +static void +cfiscsi_scsi_command_done(union ctl_io *io) +{ + struct icl_pdu *request, *response; + struct iscsi_bhs_scsi_command *bhssc; + struct iscsi_bhs_scsi_response *bhssr; +#ifdef DIAGNOSTIC + struct cfiscsi_data_wait *cdw; +#endif + struct cfiscsi_session *cs; + uint16_t sense_length; + + request = io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr; + cs = PDU_SESSION(request); + bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; + KASSERT((bhssc->bhssc_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == + ISCSI_BHS_OPCODE_SCSI_COMMAND, + ("replying to wrong opcode 0x%x", bhssc->bhssc_opcode)); + + //CFISCSI_SESSION_DEBUG(cs, "initiator task tag 0x%x", + // bhssc->bhssc_initiator_task_tag); + +#ifdef DIAGNOSTIC + CFISCSI_SESSION_LOCK(cs); + TAILQ_FOREACH(cdw, &cs->cs_waiting_for_data_out, cdw_next) + KASSERT(bhssc->bhssc_initiator_task_tag != + cdw->cdw_initiator_task_tag, ("dangling cdw")); + CFISCSI_SESSION_UNLOCK(cs); +#endif + + response = cfiscsi_pdu_new_response(request, M_WAITOK); + bhssr = (struct iscsi_bhs_scsi_response *)response->ip_bhs; + bhssr->bhssr_opcode = ISCSI_BHS_OPCODE_SCSI_RESPONSE; + bhssr->bhssr_flags = 0x80; + /* + * XXX: We don't deal with bidirectional under/overflows; + * does anything actually support those? + */ + if (PDU_TOTAL_TRANSFER_LEN(request) < + ntohl(bhssc->bhssc_expected_data_transfer_length)) { + bhssr->bhssr_flags |= BHSSR_FLAGS_RESIDUAL_UNDERFLOW; + bhssr->bhssr_residual_count = + htonl(ntohl(bhssc->bhssc_expected_data_transfer_length) - + PDU_TOTAL_TRANSFER_LEN(request)); + //CFISCSI_SESSION_DEBUG(cs, "underflow; residual count %d", + // ntohl(bhssr->bhssr_residual_count)); + } else if (PDU_TOTAL_TRANSFER_LEN(request) > + ntohl(bhssc->bhssc_expected_data_transfer_length)) { + bhssr->bhssr_flags |= BHSSR_FLAGS_RESIDUAL_OVERFLOW; + bhssr->bhssr_residual_count = + htonl(PDU_TOTAL_TRANSFER_LEN(request) - + ntohl(bhssc->bhssc_expected_data_transfer_length)); + //CFISCSI_SESSION_DEBUG(cs, "overflow; residual count %d", + // ntohl(bhssr->bhssr_residual_count)); + } + bhssr->bhssr_response = BHSSR_RESPONSE_COMMAND_COMPLETED; + bhssr->bhssr_status = io->scsiio.scsi_status; + bhssr->bhssr_initiator_task_tag = bhssc->bhssc_initiator_task_tag; + bhssr->bhssr_expdatasn = htonl(PDU_EXPDATASN(request)); + + if (io->scsiio.sense_len > 0) { +#if 0 + CFISCSI_SESSION_DEBUG(cs, "returning %d bytes of sense data", + io->scsiio.sense_len); +#endif + sense_length = htons(io->scsiio.sense_len); + icl_pdu_append_data(response, + &sense_length, sizeof(sense_length), M_WAITOK); + icl_pdu_append_data(response, + &io->scsiio.sense_data, io->scsiio.sense_len, M_WAITOK); + } + + ctl_free_io(io); + icl_pdu_free(request); + cfiscsi_pdu_queue(response); +} + +static void +cfiscsi_task_management_done(union ctl_io *io) +{ + struct icl_pdu *request, *response; + struct iscsi_bhs_task_management_request *bhstmr; + struct iscsi_bhs_task_management_response *bhstmr2; + struct cfiscsi_data_wait *cdw, *tmpcdw; + struct cfiscsi_session *cs; + + request = io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr; + cs = PDU_SESSION(request); + bhstmr = (struct iscsi_bhs_task_management_request *)request->ip_bhs; + KASSERT((bhstmr->bhstmr_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) == + ISCSI_BHS_OPCODE_TASK_REQUEST, + ("replying to wrong opcode 0x%x", bhstmr->bhstmr_opcode)); + +#if 0 + CFISCSI_SESSION_DEBUG(cs, "initiator task tag 0x%x; referenced task tag 0x%x", + bhstmr->bhstmr_initiator_task_tag, + bhstmr->bhstmr_referenced_task_tag); +#endif + + if ((bhstmr->bhstmr_function & ~0x80) == + BHSTMR_FUNCTION_ABORT_TASK) { + /* + * Make sure we no longer wait for Data-Out for this command. + */ + CFISCSI_SESSION_LOCK(cs); + TAILQ_FOREACH_SAFE(cdw, + &cs->cs_waiting_for_data_out, cdw_next, tmpcdw) { + if (bhstmr->bhstmr_referenced_task_tag != + cdw->cdw_initiator_task_tag) + continue; + +#if 0 + CFISCSI_SESSION_DEBUG(cs, "removing csw for initiator task " + "tag 0x%x", bhstmr->bhstmr_initiator_task_tag); +#endif + TAILQ_REMOVE(&cs->cs_waiting_for_data_out, + cdw, cdw_next); + cdw->cdw_ctl_io->scsiio.be_move_done(cdw->cdw_ctl_io); + uma_zfree(cfiscsi_data_wait_zone, cdw); + } + CFISCSI_SESSION_UNLOCK(cs); + } + + response = cfiscsi_pdu_new_response(request, M_WAITOK); + bhstmr2 = (struct iscsi_bhs_task_management_response *) + response->ip_bhs; + bhstmr2->bhstmr_opcode = ISCSI_BHS_OPCODE_TASK_RESPONSE; + bhstmr2->bhstmr_flags = 0x80; + if (io->io_hdr.status == CTL_SUCCESS) { + bhstmr2->bhstmr_response = BHSTMR_RESPONSE_FUNCTION_COMPLETE; + } else { + /* + * XXX: How to figure out what exactly went wrong? iSCSI spec + * expects us to provide detailed error, e.g. "Task does + * not exist" or "LUN does not exist". + */ + CFISCSI_SESSION_DEBUG(cs, "BHSTMR_RESPONSE_FUNCTION_NOT_SUPPORTED"); + bhstmr2->bhstmr_response = + BHSTMR_RESPONSE_FUNCTION_NOT_SUPPORTED; + } + bhstmr2->bhstmr_initiator_task_tag = bhstmr->bhstmr_initiator_task_tag; + + ctl_free_io(io); + icl_pdu_free(request); + cfiscsi_pdu_queue(response); +} + +static void +cfiscsi_done(union ctl_io *io) +{ + struct icl_pdu *request; + struct cfiscsi_session *cs; + + KASSERT(((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE), + ("invalid CTL status %#x", io->io_hdr.status)); + + request = io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr; + if (request == NULL) { + /* + * Implicit task termination has just completed; nothing to do. + */ + return; + } + + cs = PDU_SESSION(request); + refcount_release(&cs->cs_outstanding_ctl_pdus); + + switch (request->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) { + case ISCSI_BHS_OPCODE_SCSI_COMMAND: + cfiscsi_scsi_command_done(io); + break; + case ISCSI_BHS_OPCODE_TASK_REQUEST: + cfiscsi_task_management_done(io); + break; + default: + panic("cfiscsi_done called with wrong opcode 0x%x", + request->ip_bhs->bhs_opcode); + } +} diff --git a/sys/cam/ctl/ctl_frontend_iscsi.h b/sys/cam/ctl/ctl_frontend_iscsi.h new file mode 100644 index 000000000000..164294480e71 --- /dev/null +++ b/sys/cam/ctl/ctl_frontend_iscsi.h @@ -0,0 +1,112 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Edward Tomasz Napierala under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef CTL_FRONTEND_ISCSI_H +#define CTL_FRONTEND_ISCSI_H + +struct cfiscsi_target { + TAILQ_ENTRY(cfiscsi_target) ct_next; + int ct_luns[CTL_MAX_LUNS]; + struct cfiscsi_softc *ct_softc; + volatile u_int ct_refcount; + char ct_name[CTL_ISCSI_NAME_LEN]; + char ct_alias[CTL_ISCSI_ALIAS_LEN]; +}; + +struct cfiscsi_data_wait { + TAILQ_ENTRY(cfiscsi_data_wait) cdw_next; + union ctl_io *cdw_ctl_io; + uint32_t cdw_target_transfer_tag; + uint32_t cdw_initiator_task_tag; + int cdw_sg_index; + char *cdw_sg_addr; + size_t cdw_sg_len; +}; + +#define CFISCSI_SESSION_STATE_INVALID 0 +#define CFISCSI_SESSION_STATE_BHS 1 +#define CFISCSI_SESSION_STATE_AHS 2 +#define CFISCSI_SESSION_STATE_HEADER_DIGEST 3 +#define CFISCSI_SESSION_STATE_DATA 4 +#define CFISCSI_SESSION_STATE_DATA_DIGEST 5 + +struct cfiscsi_session { + TAILQ_ENTRY(cfiscsi_session) cs_next; + struct mtx cs_lock; + struct icl_conn *cs_conn; + uint32_t cs_cmdsn; + uint32_t cs_statsn; + uint32_t cs_target_transfer_tag; + volatile u_int cs_outstanding_ctl_pdus; + TAILQ_HEAD(, cfiscsi_data_wait) cs_waiting_for_data_out; + struct cfiscsi_target *cs_target; + struct callout cs_callout; + int cs_timeout; + int cs_portal_group_tag; + struct cv cs_maintenance_cv; + int cs_terminating; + size_t cs_max_data_segment_length; + size_t cs_max_burst_length; + bool cs_immediate_data; + char cs_initiator_name[CTL_ISCSI_NAME_LEN]; + char cs_initiator_addr[CTL_ISCSI_ADDR_LEN]; + char cs_initiator_alias[CTL_ISCSI_ALIAS_LEN]; + unsigned int cs_id; + int cs_ctl_initid; +#ifdef ICL_KERNEL_PROXY + bool cs_login_phase; + bool cs_waiting_for_ctld; + struct cv cs_login_cv; + struct icl_pdu *cs_login_pdu; +#endif +}; + +#ifdef ICL_KERNEL_PROXY +struct icl_listen; +#endif + +struct cfiscsi_softc { + struct ctl_frontend fe; + struct mtx lock; + char port_name[32]; + int online; + unsigned int last_session_id; + TAILQ_HEAD(, cfiscsi_target) targets; + TAILQ_HEAD(, cfiscsi_session) sessions; + char ctl_initids[CTL_MAX_INIT_PER_PORT]; + int max_initiators; +#ifdef ICL_KERNEL_PROXY + struct icl_listen *listener; + struct cv accept_cv; +#endif +}; + +#endif /* !CTL_FRONTEND_ISCSI_H */ diff --git a/sys/cam/ctl/ctl_ioctl.h b/sys/cam/ctl/ctl_ioctl.h index bc6342a0e409..e9b0181aaa71 100644 --- a/sys/cam/ctl/ctl_ioctl.h +++ b/sys/cam/ctl/ctl_ioctl.h @@ -40,6 +40,12 @@ #ifndef _CTL_IOCTL_H_ #define _CTL_IOCTL_H_ +#ifdef ICL_KERNEL_PROXY +#include <sys/socket.h> +#endif + +#include <sys/ioccom.h> + #define CTL_DEFAULT_DEV "/dev/cam/ctl" /* * Maximum number of targets we support. @@ -588,6 +594,168 @@ struct ctl_lun_list { /* passed to userland */ }; +/* + * iSCSI status + * + * OK: Request completed successfully. + * + * ERROR: An error occured, look at the error string for a + * description of the error. + * + * CTL_ISCSI_LIST_NEED_MORE_SPACE: + * User has to pass larger buffer for CTL_ISCSI_LIST ioctl. + */ +typedef enum { + CTL_ISCSI_OK, + CTL_ISCSI_ERROR, + CTL_ISCSI_LIST_NEED_MORE_SPACE, + CTL_ISCSI_SESSION_NOT_FOUND +} ctl_iscsi_status; + +typedef enum { + CTL_ISCSI_HANDOFF, + CTL_ISCSI_LIST, + CTL_ISCSI_LOGOUT, + CTL_ISCSI_TERMINATE, +#ifdef ICL_KERNEL_PROXY + CTL_ISCSI_LISTEN, + CTL_ISCSI_ACCEPT, + CTL_ISCSI_SEND, + CTL_ISCSI_RECEIVE, + CTL_ISCSI_CLOSE, +#endif +} ctl_iscsi_type; + +typedef enum { + CTL_ISCSI_DIGEST_NONE, + CTL_ISCSI_DIGEST_CRC32C +} ctl_iscsi_digest; + +#define CTL_ISCSI_NAME_LEN 224 /* 223 bytes, by RFC 3720, + '\0' */ +#define CTL_ISCSI_ADDR_LEN 47 /* INET6_ADDRSTRLEN + '\0' */ +#define CTL_ISCSI_ALIAS_LEN 128 /* Arbitrary. */ + +struct ctl_iscsi_handoff_params { + char initiator_name[CTL_ISCSI_NAME_LEN]; + char initiator_addr[CTL_ISCSI_ADDR_LEN]; + char initiator_alias[CTL_ISCSI_ALIAS_LEN]; + char target_name[CTL_ISCSI_NAME_LEN]; +#ifdef ICL_KERNEL_PROXY + int connection_id; + /* + * XXX + */ + int socket; +#else + int socket; +#endif + int portal_group_tag; + + /* + * Connection parameters negotiated by ctld(8). + */ + ctl_iscsi_digest header_digest; + ctl_iscsi_digest data_digest; + uint32_t cmdsn; + uint32_t statsn; + uint32_t max_recv_data_segment_length; + uint32_t max_burst_length; + uint32_t first_burst_length; + uint32_t immediate_data; +}; + +struct ctl_iscsi_list_params { + uint32_t alloc_len; /* passed to kernel */ + char *conn_xml; /* filled in kernel */ + uint32_t fill_len; /* passed to userland */ +}; + +struct ctl_iscsi_logout_params { + int connection_id; /* passed to kernel */ + char initiator_name[CTL_ISCSI_NAME_LEN]; + /* passed to kernel */ + char initiator_addr[CTL_ISCSI_ADDR_LEN]; + /* passed to kernel */ + int all; /* passed to kernel */ +}; + +struct ctl_iscsi_terminate_params { + int connection_id; /* passed to kernel */ + char initiator_name[CTL_ISCSI_NAME_LEN]; + /* passed to kernel */ + char initiator_addr[CTL_ISCSI_NAME_LEN]; + /* passed to kernel */ + int all; /* passed to kernel */ +}; + +#ifdef ICL_KERNEL_PROXY +struct ctl_iscsi_listen_params { + int iser; + int domain; + int socktype; + int protocol; + struct sockaddr *addr; + socklen_t addrlen; +}; + +struct ctl_iscsi_accept_params { + int connection_id; +}; + +struct ctl_iscsi_send_params { + int connection_id; + void *bhs; + size_t spare; + void *spare2; + size_t data_segment_len; + void *data_segment; +}; + +struct ctl_iscsi_receive_params { + int connection_id; + void *bhs; + size_t spare; + void *spare2; + size_t data_segment_len; + void *data_segment; +}; + +struct ctl_iscsi_close_params { + int connection_id; +}; +#endif /* ICL_KERNEL_PROXY */ + +union ctl_iscsi_data { + struct ctl_iscsi_handoff_params handoff; + struct ctl_iscsi_list_params list; + struct ctl_iscsi_logout_params logout; + struct ctl_iscsi_terminate_params terminate; +#ifdef ICL_KERNEL_PROXY + struct ctl_iscsi_listen_params listen; + struct ctl_iscsi_accept_params accept; + struct ctl_iscsi_send_params send; + struct ctl_iscsi_receive_params receive; + struct ctl_iscsi_close_params close; +#endif +}; + +/* + * iSCSI interface + * + * status: The status of the request. See above for the + * description of the values of this field. + * + * error_str: If the status indicates an error, this string will + * be filled in to describe the error. + */ +struct ctl_iscsi { + ctl_iscsi_type type; /* passed to kernel */ + union ctl_iscsi_data data; /* passed to kernel */ + ctl_iscsi_status status; /* passed to userland */ + char error_str[CTL_ERROR_STR_LEN]; + /* passed to userland */ +}; + #define CTL_IO _IOWR(CTL_MINOR, 0x00, union ctl_io) #define CTL_ENABLE_PORT _IOW(CTL_MINOR, 0x04, struct ctl_port_entry) #define CTL_DISABLE_PORT _IOW(CTL_MINOR, 0x05, struct ctl_port_entry) @@ -612,6 +780,7 @@ struct ctl_lun_list { #define CTL_LUN_LIST _IOWR(CTL_MINOR, 0x22, struct ctl_lun_list) #define CTL_ERROR_INJECT_DELETE _IOW(CTL_MINOR, 0x23, struct ctl_error_desc) #define CTL_SET_PORT_WWNS _IOW(CTL_MINOR, 0x24, struct ctl_port_entry) +#define CTL_ISCSI _IOWR(CTL_MINOR, 0x25, struct ctl_iscsi) #endif /* _CTL_IOCTL_H_ */ diff --git a/sys/conf/files b/sys/conf/files index f790acb62faa..8cee6e737ac1 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -124,6 +124,7 @@ cam/ctl/ctl_cmd_table.c optional ctl cam/ctl/ctl_frontend.c optional ctl cam/ctl/ctl_frontend_cam_sim.c optional ctl cam/ctl/ctl_frontend_internal.c optional ctl +cam/ctl/ctl_frontend_iscsi.c optional ctl cam/ctl/ctl_mem_pool.c optional ctl cam/ctl/ctl_scsi_all.c optional ctl cam/ctl/ctl_error.c optional ctl @@ -1531,6 +1532,9 @@ ipw_monitor.fw optional ipwmonitorfw | ipwfw \ compile-with "${NORMAL_FW}" \ no-obj no-implicit-rule \ clean "ipw_monitor.fw" +dev/iscsi/icl.c optional iscsi | ctl +dev/iscsi/icl_proxy.c optional iscsi | ctl +dev/iscsi/iscsi.c optional iscsi scbus dev/iscsi_initiator/iscsi.c optional iscsi_initiator scbus dev/iscsi_initiator/iscsi_subr.c optional iscsi_initiator scbus dev/iscsi_initiator/isc_cam.c optional iscsi_initiator scbus diff --git a/sys/dev/iscsi/icl.c b/sys/dev/iscsi/icl.c new file mode 100644 index 000000000000..eb9cf4efcd3a --- /dev/null +++ b/sys/dev/iscsi/icl.c @@ -0,0 +1,1292 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Edward Tomasz Napierala under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * iSCSI Common Layer. It's used by both the initiator and target to send + * and receive iSCSI PDUs. + */ + +#include <sys/param.h> +#include <sys/capability.h> +#include <sys/condvar.h> +#include <sys/conf.h> +#include <sys/file.h> +#include <sys/kernel.h> +#include <sys/kthread.h> +#include <sys/lock.h> +#include <sys/mbuf.h> +#include <sys/mutex.h> +#include <sys/module.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/sysctl.h> +#include <sys/systm.h> +#include <sys/sx.h> +#include <sys/uio.h> +#include <vm/uma.h> +#include <netinet/in.h> +#include <netinet/tcp.h> + +#include "icl.h" +#include "iscsi_proto.h" + +SYSCTL_NODE(_kern, OID_AUTO, icl, CTLFLAG_RD, 0, "iSCSI Common Layer"); +static int debug = 1; +TUNABLE_INT("kern.icl.debug", &debug); +SYSCTL_INT(_kern_icl, OID_AUTO, debug, CTLFLAG_RW, + &debug, 1, "Enable debug messages"); +static int partial_receive_len = 1 * 1024; /* XXX: More? */ +TUNABLE_INT("kern.icl.partial_receive_len", &partial_receive_len); +SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RW, + &partial_receive_len, 1 * 1024, "Minimum read size for partially received " + "data segment"); + +static uma_zone_t icl_conn_zone; +static uma_zone_t icl_pdu_zone; + +static volatile u_int icl_ncons; + +#define ICL_DEBUG(X, ...) \ + if (debug > 1) { \ + printf("%s: " X "\n", __func__, ## __VA_ARGS__);\ + } while (0) + +#define ICL_WARN(X, ...) \ + if (debug > 0) { \ + printf("WARNING: %s: " X "\n", \ + __func__, ## __VA_ARGS__); \ + } while (0) + +#define ICL_CONN_LOCK(X) mtx_lock(&X->ic_lock) +#define ICL_CONN_UNLOCK(X) mtx_unlock(&X->ic_lock) +#define ICL_CONN_LOCK_ASSERT(X) mtx_assert(&X->ic_lock, MA_OWNED) + +static void +icl_conn_fail(struct icl_conn *ic) +{ + if (ic->ic_socket == NULL) + return; + + /* + * XXX + */ + ic->ic_socket->so_error = EDOOFUS; + (ic->ic_error)(ic); +} + +static struct mbuf * +icl_conn_receive(struct icl_conn *ic, size_t len) +{ + struct uio uio; + struct socket *so; + struct mbuf *m; + int error, flags; + + so = ic->ic_socket; + + memset(&uio, 0, sizeof(uio)); + uio.uio_resid = len; + + flags = MSG_DONTWAIT; + error = soreceive(so, NULL, &uio, &m, NULL, &flags); + if (error != 0) { + ICL_DEBUG("soreceive error %d", error); + return (NULL); + } + if (uio.uio_resid != 0) { + m_freem(m); + ICL_DEBUG("short read"); + return (NULL); + } + + return (m); +} + +static struct icl_pdu * +icl_pdu_new(struct icl_conn *ic, int flags) +{ + struct icl_pdu *ip; + + refcount_acquire(&ic->ic_outstanding_pdus); + ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO); + if (ip == NULL) { + ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); + refcount_release(&ic->ic_outstanding_pdus); + return (NULL); + } + + ip->ip_conn = ic; + + return (ip); +} + +void +icl_pdu_free(struct icl_pdu *ip) +{ + struct icl_conn *ic; + + ic = ip->ip_conn; + + m_freem(ip->ip_bhs_mbuf); + m_freem(ip->ip_ahs_mbuf); + m_freem(ip->ip_data_mbuf); + uma_zfree(icl_pdu_zone, ip); + refcount_release(&ic->ic_outstanding_pdus); +} + +/* + * Allocate icl_pdu with empty BHS to fill up by the caller. + */ +struct icl_pdu * +icl_pdu_new_bhs(struct icl_conn *ic, int flags) +{ + struct icl_pdu *ip; + + ip = icl_pdu_new(ic, flags); + if (ip == NULL) + return (NULL); + + ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs), + flags, MT_DATA, M_PKTHDR); + if (ip->ip_bhs_mbuf == NULL) { + ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); + icl_pdu_free(ip); + return (NULL); + } + ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); + memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); + ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); + + return (ip); +} + +static int +icl_pdu_ahs_length(const struct icl_pdu *request) +{ + + return (request->ip_bhs->bhs_total_ahs_len * 4); +} + +size_t +icl_pdu_data_segment_length(const struct icl_pdu *request) +{ + uint32_t len = 0; + + len += request->ip_bhs->bhs_data_segment_len[0]; + len <<= 8; + len += request->ip_bhs->bhs_data_segment_len[1]; + len <<= 8; + len += request->ip_bhs->bhs_data_segment_len[2]; + + return (len); +} + +static void +icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) +{ + + response->ip_bhs->bhs_data_segment_len[2] = len; + response->ip_bhs->bhs_data_segment_len[1] = len >> 8; + response->ip_bhs->bhs_data_segment_len[0] = len >> 16; +} + +static size_t +icl_pdu_padding(const struct icl_pdu *ip) +{ + + if ((ip->ip_data_len % 4) != 0) + return (4 - (ip->ip_data_len % 4)); + + return (0); +} + +static size_t +icl_pdu_size(const struct icl_pdu *response) +{ + size_t len; + + KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); + + len = sizeof(struct iscsi_bhs) + response->ip_data_len + + icl_pdu_padding(response); + if (response->ip_conn->ic_header_crc32c) + len += ISCSI_HEADER_DIGEST_SIZE; + if (response->ip_conn->ic_data_crc32c) + len += ISCSI_DATA_DIGEST_SIZE; + + return (len); +} + +static int +icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep) +{ + struct mbuf *m; + + m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs)); + if (m == NULL) { + ICL_DEBUG("failed to receive BHS"); + return (-1); + } + + request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs)); + if (request->ip_bhs_mbuf == NULL) { + ICL_WARN("m_pullup failed"); + return (-1); + } + request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *); + + /* + * XXX: For architectures with strict alignment requirements + * we may need to allocate ip_bhs and copy the data into it. + * For some reason, though, not doing this doesn't seem + * to cause problems; tested on sparc64. + */ + + *availablep -= sizeof(struct iscsi_bhs); + return (0); +} + +static int +icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep) +{ + + request->ip_ahs_len = icl_pdu_ahs_length(request); + if (request->ip_ahs_len == 0) + return (0); + + request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn, + request->ip_ahs_len); + if (request->ip_ahs_mbuf == NULL) { + ICL_DEBUG("failed to receive AHS"); + return (-1); + } + + *availablep -= request->ip_ahs_len; + return (0); +} + +static uint32_t +icl_mbuf_to_crc32c(const struct mbuf *m0) +{ + uint32_t digest = 0xffffffff; + const struct mbuf *m; + + for (m = m0; m != NULL; m = m->m_next) + digest = calculate_crc32c(digest, + mtod(m, const void *), m->m_len); + + digest = digest ^ 0xffffffff; + + return (digest); +} + +static int +icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep) +{ + struct mbuf *m; + uint32_t received_digest, valid_digest; + + if (request->ip_conn->ic_header_crc32c == false) + return (0); + + m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE); + if (m == NULL) { + ICL_DEBUG("failed to receive header digest"); + return (-1); + } + + CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); + memcpy(&received_digest, mtod(m, void *), ISCSI_HEADER_DIGEST_SIZE); + m_freem(m); + + *availablep -= ISCSI_HEADER_DIGEST_SIZE; + + /* + * XXX: Handle AHS. + */ + valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); + if (received_digest != valid_digest) { + ICL_WARN("header digest check failed; got 0x%x, " + "should be 0x%x", received_digest, valid_digest); + return (-1); + } + + return (0); +} + +/* + * Return the number of bytes that should be waiting in the receive socket + * before icl_pdu_receive_data_segment() gets called. + */ +static size_t +icl_pdu_data_segment_receive_len(const struct icl_pdu *request) +{ + size_t len; + + len = icl_pdu_data_segment_length(request); + if (len == 0) + return (0); + + /* + * Account for the parts of data segment already read from + * the socket buffer. + */ + KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); + len -= request->ip_data_len; + + /* + * Don't always wait for the full data segment to be delivered + * to the socket; this might badly affect performance due to + * TCP window scaling. + */ + if (len > partial_receive_len) { +#if 0 + ICL_DEBUG("need %zd bytes of data, limiting to %zd", + len, partial_receive_len)); +#endif + len = partial_receive_len; + + return (len); + } + + /* + * Account for padding. Note that due to the way code is written, + * the icl_pdu_receive_data_segment() must always receive padding + * along with the last part of data segment, because it would be + * impossible to tell whether we've already received the full data + * segment including padding, or without it. + */ + if ((len % 4) != 0) + len += 4 - (len % 4); + +#if 0 + ICL_DEBUG("need %zd bytes of data", len)); +#endif + + return (len); +} + +static int +icl_pdu_receive_data_segment(struct icl_pdu *request, + size_t *availablep, bool *more_neededp) +{ + struct icl_conn *ic; + size_t len, padding = 0; + struct mbuf *m; + + ic = request->ip_conn; + + *more_neededp = false; + ic->ic_receive_len = 0; + + len = icl_pdu_data_segment_length(request); + if (len == 0) + return (0); + + if ((len % 4) != 0) + padding = 4 - (len % 4); + + /* + * Account for already received parts of data segment. + */ + KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); + len -= request->ip_data_len; + + if (len + padding > *availablep) { + /* + * Not enough data in the socket buffer. Receive as much + * as we can. Don't receive padding, since, obviously, it's + * not the end of data segment yet. + */ +#if 0 + ICL_DEBUG("limited from %zd to %zd", + len + padding, *availablep - padding)); +#endif + len = *availablep - padding; + *more_neededp = true; + padding = 0; + } + + /* + * Must not try to receive padding without at least one byte + * of actual data segment. + */ + if (len > 0) { + m = icl_conn_receive(request->ip_conn, len + padding); + if (m == NULL) { + ICL_DEBUG("failed to receive data segment"); + return (-1); + } + + if (request->ip_data_mbuf == NULL) + request->ip_data_mbuf = m; + else + m_cat(request->ip_data_mbuf, m); + + request->ip_data_len += len; + *availablep -= len + padding; + } else + ICL_DEBUG("len 0"); + + if (*more_neededp) + ic->ic_receive_len = + icl_pdu_data_segment_receive_len(request); + + return (0); +} + +static int +icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep) +{ + struct mbuf *m; + uint32_t received_digest, valid_digest; + + if (request->ip_conn->ic_data_crc32c == false) + return (0); + + if (request->ip_data_len == 0) + return (0); + + m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE); + if (m == NULL) { + ICL_DEBUG("failed to receive data digest"); + return (-1); + } + + CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); + memcpy(&received_digest, mtod(m, void *), ISCSI_DATA_DIGEST_SIZE); + m_freem(m); + + *availablep -= ISCSI_DATA_DIGEST_SIZE; + + /* + * Note that ip_data_mbuf also contains padding; since digest + * calculation is supposed to include that, we iterate over + * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. + */ + valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); + if (received_digest != valid_digest) { + ICL_WARN("data digest check failed; got 0x%x, " + "should be 0x%x", received_digest, valid_digest); + return (-1); + } + + return (0); +} + +/* + * Somewhat contrary to the name, this attempts to receive only one + * "part" of PDU at a time; call it repeatedly until it returns non-NULL. + */ +static struct icl_pdu * +icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep) +{ + struct icl_pdu *request; + struct socket *so; + size_t len; + int error; + bool more_needed; + + so = ic->ic_socket; + + if (ic->ic_receive_state == ICL_CONN_STATE_BHS) { + KASSERT(ic->ic_receive_pdu == NULL, + ("ic->ic_receive_pdu != NULL")); + request = icl_pdu_new(ic, M_NOWAIT); + if (request == NULL) { + ICL_DEBUG("failed to allocate PDU; " + "dropping connection"); + icl_conn_fail(ic); + return (NULL); + } + ic->ic_receive_pdu = request; + } else { + KASSERT(ic->ic_receive_pdu != NULL, + ("ic->ic_receive_pdu == NULL")); + request = ic->ic_receive_pdu; + } + + if (*availablep < ic->ic_receive_len) { +#if 0 + ICL_DEBUG("not enough data; need %zd, " + "have %zd", ic->ic_receive_len, *availablep); +#endif + return (NULL); + } + + switch (ic->ic_receive_state) { + case ICL_CONN_STATE_BHS: + //ICL_DEBUG("receiving BHS"); + error = icl_pdu_receive_bhs(request, availablep); + if (error != 0) { + ICL_DEBUG("failed to receive BHS; " + "dropping connection"); + break; + } + + /* + * We don't enforce any limit for AHS length; + * its length is stored in 8 bit field. + */ + + len = icl_pdu_data_segment_length(request); + if (len > ic->ic_max_data_segment_length) { + ICL_WARN("received data segment " + "length %zd is larger than negotiated " + "MaxDataSegmentLength %zd; " + "dropping connection", + len, ic->ic_max_data_segment_length); + break; + } + + ic->ic_receive_state = ICL_CONN_STATE_AHS; + ic->ic_receive_len = icl_pdu_ahs_length(request); + break; + + case ICL_CONN_STATE_AHS: + //ICL_DEBUG("receiving AHS"); + error = icl_pdu_receive_ahs(request, availablep); + if (error != 0) { + ICL_DEBUG("failed to receive AHS; " + "dropping connection"); + break; + } + ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST; + if (ic->ic_header_crc32c == false) + ic->ic_receive_len = 0; + else + ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE; + break; + + case ICL_CONN_STATE_HEADER_DIGEST: + //ICL_DEBUG("receiving header digest"); + error = icl_pdu_check_header_digest(request, availablep); + if (error != 0) { + ICL_DEBUG("header digest failed; " + "dropping connection"); + break; + } + + ic->ic_receive_state = ICL_CONN_STATE_DATA; + ic->ic_receive_len = + icl_pdu_data_segment_receive_len(request); + break; + + case ICL_CONN_STATE_DATA: + //ICL_DEBUG("receiving data segment"); + error = icl_pdu_receive_data_segment(request, availablep, + &more_needed); + if (error != 0) { + ICL_DEBUG("failed to receive data segment;" + "dropping connection"); + break; + } + + if (more_needed) + break; + + ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST; + if (ic->ic_data_crc32c == false) + ic->ic_receive_len = 0; + else + ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE; + break; + + case ICL_CONN_STATE_DATA_DIGEST: + //ICL_DEBUG("receiving data digest"); + error = icl_pdu_check_data_digest(request, availablep); + if (error != 0) { + ICL_DEBUG("data digest failed; " + "dropping connection"); + break; + } + + /* + * We've received complete PDU; reset the receive state machine + * and return the PDU. + */ + ic->ic_receive_state = ICL_CONN_STATE_BHS; + ic->ic_receive_len = sizeof(struct iscsi_bhs); + ic->ic_receive_pdu = NULL; + return (request); + + default: + panic("invalid ic_receive_state %d\n", ic->ic_receive_state); + } + + if (error != 0) { + icl_pdu_free(request); + icl_conn_fail(ic); + } + + return (NULL); +} + +static void +icl_conn_receive_pdus(struct icl_conn *ic, size_t available) +{ + struct icl_pdu *response; + struct socket *so; + + so = ic->ic_socket; + + /* + * This can never happen; we're careful to only mess with ic->ic_socket + * pointer when the send/receive threads are not running. + */ + KASSERT(so != NULL, ("NULL socket")); + + for (;;) { + if (ic->ic_disconnecting) + return; + + if (so->so_error != 0) { + ICL_DEBUG("connection error %d; " + "dropping connection", so->so_error); + icl_conn_fail(ic); + return; + } + + /* + * Loop until we have a complete PDU or there is not enough + * data in the socket buffer. + */ + if (available < ic->ic_receive_len) { +#if 0 + ICL_DEBUG("not enough data; have %zd, " + "need %zd", available, + ic->ic_receive_len); +#endif + return; + } + + response = icl_conn_receive_pdu(ic, &available); + if (response == NULL) + continue; + + if (response->ip_ahs_len > 0) { + ICL_WARN("received PDU with unsupported " + "AHS; opcode 0x%x; dropping connection", + response->ip_bhs->bhs_opcode); + icl_pdu_free(response); + icl_conn_fail(ic); + return; + } + + (ic->ic_receive)(response); + } +} + +static void +icl_receive_thread(void *arg) +{ + struct icl_conn *ic; + size_t available; + struct socket *so; + + ic = arg; + so = ic->ic_socket; + + ICL_CONN_LOCK(ic); + ic->ic_receive_running = true; + ICL_CONN_UNLOCK(ic); + + for (;;) { + if (ic->ic_disconnecting) { + //ICL_DEBUG("terminating"); + ICL_CONN_LOCK(ic); + ic->ic_receive_running = false; + ICL_CONN_UNLOCK(ic); + kthread_exit(); + return; + } + + SOCKBUF_LOCK(&so->so_rcv); + available = so->so_rcv.sb_cc; + if (available < ic->ic_receive_len) { + so->so_rcv.sb_lowat = ic->ic_receive_len; + cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx); + } + SOCKBUF_UNLOCK(&so->so_rcv); + + icl_conn_receive_pdus(ic, available); + } +} + +static int +icl_soupcall_receive(struct socket *so, void *arg, int waitflag) +{ + struct icl_conn *ic; + + ic = arg; + cv_signal(&ic->ic_receive_cv); + return (SU_OK); +} + +static int +icl_pdu_send(struct icl_pdu *request) +{ + size_t padding, pdu_len; + uint32_t digest, zero = 0; + int error, ok; + struct socket *so; + struct icl_conn *ic; + + ic = request->ip_conn; + so = request->ip_conn->ic_socket; + + ICL_CONN_LOCK_ASSERT(ic); + + icl_pdu_set_data_segment_length(request, request->ip_data_len); + + pdu_len = icl_pdu_size(request); + + if (ic->ic_header_crc32c) { + digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); + ok = m_append(request->ip_bhs_mbuf, sizeof(digest), + (void *)&digest); + if (ok != 1) { + ICL_WARN("failed to append header digest"); + return (1); + } + } + + if (request->ip_data_len != 0) { + padding = icl_pdu_padding(request); + if (padding > 0) { + ok = m_append(request->ip_data_mbuf, padding, + (void *)&zero); + if (ok != 1) { + ICL_WARN("failed to append padding"); + return (1); + } + } + + if (ic->ic_data_crc32c) { + digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); + + ok = m_append(request->ip_data_mbuf, sizeof(digest), + (void *)&digest); + if (ok != 1) { + ICL_WARN("failed to append header digest"); + return (1); + } + } + + m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); + request->ip_data_mbuf = NULL; + } + + request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; + + error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, + NULL, MSG_DONTWAIT, curthread); + request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ + if (error != 0) { + ICL_DEBUG("sosend error %d", error); + return (error); + } + + return (0); +} + +static void +icl_conn_send_pdus(struct icl_conn *ic) +{ + struct icl_pdu *request; + struct socket *so; + size_t available, size; + int error; + + ICL_CONN_LOCK_ASSERT(ic); + + so = ic->ic_socket; + + SOCKBUF_LOCK(&so->so_snd); + available = sbspace(&so->so_snd); + SOCKBUF_UNLOCK(&so->so_snd); + + while (!TAILQ_EMPTY(&ic->ic_to_send)) { + if (ic->ic_disconnecting) + return; + + request = TAILQ_FIRST(&ic->ic_to_send); + size = icl_pdu_size(request); + if (available < size) { + /* + * Set the low watermark on the socket, + * to avoid waking up until there is enough + * space. + */ + SOCKBUF_LOCK(&so->so_snd); + so->so_snd.sb_lowat = size; + SOCKBUF_UNLOCK(&so->so_snd); +#if 1 + ICL_DEBUG("no space to send; " + "have %zd, need %zd", + available, size); +#endif + return; + } + available -= size; + TAILQ_REMOVE(&ic->ic_to_send, request, ip_next); + error = icl_pdu_send(request); + if (error != 0) { + ICL_DEBUG("failed to send PDU; " + "dropping connection"); + icl_conn_fail(ic); + return; + } + icl_pdu_free(request); + } +} + +static void +icl_send_thread(void *arg) +{ + struct icl_conn *ic; + + ic = arg; + + ICL_CONN_LOCK(ic); + ic->ic_send_running = true; + ICL_CONN_UNLOCK(ic); + + for (;;) { + ICL_CONN_LOCK(ic); + if (ic->ic_disconnecting) { + //ICL_DEBUG("terminating"); + ic->ic_send_running = false; + ICL_CONN_UNLOCK(ic); + kthread_exit(); + return; + } + if (TAILQ_EMPTY(&ic->ic_to_send)) + cv_wait(&ic->ic_send_cv, &ic->ic_lock); + icl_conn_send_pdus(ic); + ICL_CONN_UNLOCK(ic); + } +} + +static int +icl_soupcall_send(struct socket *so, void *arg, int waitflag) +{ + struct icl_conn *ic; + + ic = arg; + cv_signal(&ic->ic_send_cv); + return (SU_OK); +} + +int +icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len, int flags) +{ + struct mbuf *mb, *newmb; + size_t copylen, off = 0; + + KASSERT(len > 0, ("len == 0")); + + newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR); + if (newmb == NULL) { + ICL_WARN("failed to allocate mbuf for %zd bytes", len); + return (ENOMEM); + } + + for (mb = newmb; mb != NULL; mb = mb->m_next) { + copylen = min(M_TRAILINGSPACE(mb), len - off); + memcpy(mtod(mb, char *), (const char *)addr + off, copylen); + mb->m_len = copylen; + off += copylen; + } + KASSERT(off == len, ("%s: off != len", __func__)); + + if (request->ip_data_mbuf == NULL) { + request->ip_data_mbuf = newmb; + request->ip_data_len = len; + } else { + m_cat(request->ip_data_mbuf, newmb); + request->ip_data_len += len; + } + + return (0); +} + +void +icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len) +{ + + m_copydata(ip->ip_data_mbuf, off, len, addr); +} + +void +icl_pdu_queue(struct icl_pdu *ip) +{ + struct icl_conn *ic; + + ic = ip->ip_conn; + + ICL_CONN_LOCK(ic); + if (ic->ic_disconnecting || ic->ic_socket == NULL) { + ICL_DEBUG("icl_pdu_queue on closed connection"); + ICL_CONN_UNLOCK(ic); + return; + } + TAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); + ICL_CONN_UNLOCK(ic); + cv_signal(&ic->ic_send_cv); +} + +struct icl_conn * +icl_conn_new(void) +{ + struct icl_conn *ic; + + refcount_acquire(&icl_ncons); + + ic = uma_zalloc(icl_conn_zone, M_WAITOK | M_ZERO); + + TAILQ_INIT(&ic->ic_to_send); + mtx_init(&ic->ic_lock, "icl_lock", NULL, MTX_DEF); + cv_init(&ic->ic_send_cv, "icl_tx"); + cv_init(&ic->ic_receive_cv, "icl_rx"); + refcount_init(&ic->ic_outstanding_pdus, 0); + ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH; + + return (ic); +} + +void +icl_conn_free(struct icl_conn *ic) +{ + + mtx_destroy(&ic->ic_lock); + cv_destroy(&ic->ic_send_cv); + cv_destroy(&ic->ic_receive_cv); + uma_zfree(icl_conn_zone, ic); + refcount_release(&icl_ncons); +} + +static int +icl_conn_start(struct icl_conn *ic) +{ + size_t bufsize; + struct sockopt opt; + int error, one = 1; + + ICL_CONN_LOCK(ic); + + /* + * XXX: Ugly hack. + */ + if (ic->ic_socket == NULL) { + ICL_CONN_UNLOCK(ic); + return (EINVAL); + } + + ic->ic_receive_state = ICL_CONN_STATE_BHS; + ic->ic_receive_len = sizeof(struct iscsi_bhs); + ic->ic_disconnecting = false; + + ICL_CONN_UNLOCK(ic); + + /* + * Use max available sockbuf size for sending. Do it manually + * instead of sbreserve(9) to work around resource limits. + * + * XXX: This kind of sucks. On one hand, we don't currently support + * sending a part of data segment; we always do it in one piece, + * so we have to make sure it can fit in the socket buffer. + * Once I've implemented partial send, we'll get rid of this + * and use autoscaling. + */ + bufsize = (sizeof(struct iscsi_bhs) + + ic->ic_max_data_segment_length) * 8; + error = soreserve(ic->ic_socket, bufsize, bufsize); + if (error != 0) { + ICL_WARN("soreserve failed with error %d", error); + icl_conn_close(ic); + return (error); + } + + /* + * Disable Nagle. + */ + bzero(&opt, sizeof(opt)); + opt.sopt_dir = SOPT_SET; + opt.sopt_level = IPPROTO_TCP; + opt.sopt_name = TCP_NODELAY; + opt.sopt_val = &one; + opt.sopt_valsize = sizeof(one); + error = sosetopt(ic->ic_socket, &opt); + if (error != 0) { + ICL_WARN("disabling TCP_NODELAY failed with error %d", error); + icl_conn_close(ic); + return (error); + } + + /* + * Start threads. + */ + error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "icltx"); + if (error != 0) { + ICL_WARN("kthread_add(9) failed with error %d", error); + icl_conn_close(ic); + return (error); + } + + error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "iclrx"); + if (error != 0) { + ICL_WARN("kthread_add(9) failed with error %d", error); + icl_conn_close(ic); + return (error); + } + + /* + * Register socket upcall, to get notified about incoming PDUs + * and free space to send outgoing ones. + */ + SOCKBUF_LOCK(&ic->ic_socket->so_snd); + soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); + SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); + SOCKBUF_LOCK(&ic->ic_socket->so_rcv); + soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); + SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); + + return (0); +} + +int +icl_conn_handoff(struct icl_conn *ic, int fd) +{ + struct file *fp; + struct socket *so; + cap_rights_t rights; + int error; + + /* + * Steal the socket from userland. + */ + error = fget(curthread, fd, + cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); + if (error != 0) + return (error); + if (fp->f_type != DTYPE_SOCKET) { + fdrop(fp, curthread); + return (EINVAL); + } + so = fp->f_data; + if (so->so_type != SOCK_STREAM) { + fdrop(fp, curthread); + return (EINVAL); + } + + ICL_CONN_LOCK(ic); + + if (ic->ic_socket != NULL) { + ICL_CONN_UNLOCK(ic); + fdrop(fp, curthread); + return (EBUSY); + } + + ic->ic_socket = fp->f_data; + fp->f_ops = &badfileops; + fp->f_data = NULL; + fdrop(fp, curthread); + ICL_CONN_UNLOCK(ic); + + error = icl_conn_start(ic); + + return (error); +} + +void +icl_conn_shutdown(struct icl_conn *ic) +{ + + ICL_CONN_LOCK(ic); + if (ic->ic_socket == NULL) { + ICL_CONN_UNLOCK(ic); + return; + } + ICL_CONN_UNLOCK(ic); + + soshutdown(ic->ic_socket, SHUT_RDWR); +} + +void +icl_conn_close(struct icl_conn *ic) +{ + struct icl_pdu *pdu; + + ICL_CONN_LOCK(ic); + if (ic->ic_socket == NULL) { + ICL_CONN_UNLOCK(ic); + return; + } + + ic->ic_disconnecting = true; + + /* + * Wake up the threads, so they can properly terminate. + */ + cv_signal(&ic->ic_receive_cv); + cv_signal(&ic->ic_send_cv); + while (ic->ic_receive_running || ic->ic_send_running) { + //ICL_DEBUG("waiting for send/receive threads to terminate"); + ICL_CONN_UNLOCK(ic); + cv_signal(&ic->ic_receive_cv); + cv_signal(&ic->ic_send_cv); + pause("icl_close", 1 * hz); + ICL_CONN_LOCK(ic); + } + //ICL_DEBUG("send/receive threads terminated"); + + soclose(ic->ic_socket); + ic->ic_socket = NULL; + + if (ic->ic_receive_pdu != NULL) { + //ICL_DEBUG("freeing partially received PDU"); + icl_pdu_free(ic->ic_receive_pdu); + ic->ic_receive_pdu = NULL; + } + + /* + * Remove any outstanding PDUs from the send queue. + */ + while (!TAILQ_EMPTY(&ic->ic_to_send)) { + pdu = TAILQ_FIRST(&ic->ic_to_send); + TAILQ_REMOVE(&ic->ic_to_send, pdu, ip_next); + icl_pdu_free(pdu); + } + + KASSERT(TAILQ_EMPTY(&ic->ic_to_send), + ("destroying session with non-empty send queue")); + /* + * XXX + */ +#if 0 + KASSERT(ic->ic_outstanding_pdus == 0, + ("destroying session with %d outstanding PDUs", + ic->ic_outstanding_pdus)); +#endif + ICL_CONN_UNLOCK(ic); +} + +bool +icl_conn_connected(struct icl_conn *ic) +{ + + ICL_CONN_LOCK(ic); + if (ic->ic_socket == NULL) { + ICL_CONN_UNLOCK(ic); + return (false); + } + if (ic->ic_socket->so_error != 0) { + ICL_CONN_UNLOCK(ic); + return (false); + } + ICL_CONN_UNLOCK(ic); + return (true); +} + +#ifdef ICL_KERNEL_PROXY +int +icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so) +{ + int error; + + if (so->so_type != SOCK_STREAM) + return (EINVAL); + + ICL_CONN_LOCK(ic); + if (ic->ic_socket != NULL) { + ICL_CONN_UNLOCK(ic); + return (EBUSY); + } + ic->ic_socket = so; + ICL_CONN_UNLOCK(ic); + + error = icl_conn_start(ic); + + return (error); +} +#endif /* ICL_KERNEL_PROXY */ + +static int +icl_unload(void) +{ + + if (icl_ncons != 0) + return (EBUSY); + + uma_zdestroy(icl_conn_zone); + uma_zdestroy(icl_pdu_zone); + + return (0); +} + +static void +icl_load(void) +{ + + icl_conn_zone = uma_zcreate("icl_conn", + sizeof(struct icl_conn), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + icl_pdu_zone = uma_zcreate("icl_pdu", + sizeof(struct icl_pdu), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + + refcount_init(&icl_ncons, 0); +} + +static int +icl_modevent(module_t mod, int what, void *arg) +{ + + switch (what) { + case MOD_LOAD: + icl_load(); + return (0); + case MOD_UNLOAD: + return (icl_unload()); + default: + return (EINVAL); + } +} + +moduledata_t icl_data = { + "icl", + icl_modevent, + 0 +}; + +DECLARE_MODULE(icl, icl_data, SI_SUB_DRIVERS, SI_ORDER_FIRST); +MODULE_VERSION(icl, 1); diff --git a/sys/dev/iscsi/icl.h b/sys/dev/iscsi/icl.h new file mode 100644 index 000000000000..87a565269562 --- /dev/null +++ b/sys/dev/iscsi/icl.h @@ -0,0 +1,151 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Edward Tomasz Napierala under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef ICL_H +#define ICL_H + +/* + * iSCSI Common Layer. It's used by both the initiator and target to send + * and receive iSCSI PDUs. + */ + +struct icl_conn; + +struct icl_pdu { + TAILQ_ENTRY(icl_pdu) ip_next; + struct icl_conn *ip_conn; + struct iscsi_bhs *ip_bhs; + struct mbuf *ip_bhs_mbuf; + size_t ip_ahs_len; + struct mbuf *ip_ahs_mbuf; + size_t ip_data_len; + struct mbuf *ip_data_mbuf; + + /* + * User (initiator or provider) private fields. + */ + uint32_t ip_prv0; + uint32_t ip_prv1; + uint32_t ip_prv2; +}; + +struct icl_pdu *icl_pdu_new_bhs(struct icl_conn *ic, int flags); +size_t icl_pdu_data_segment_length(const struct icl_pdu *ip); +int icl_pdu_append_data(struct icl_pdu *ip, const void *addr, size_t len, int flags); +void icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len); +void icl_pdu_queue(struct icl_pdu *ip); +void icl_pdu_free(struct icl_pdu *ip); + +#define ICL_CONN_STATE_INVALID 0 +#define ICL_CONN_STATE_BHS 1 +#define ICL_CONN_STATE_AHS 2 +#define ICL_CONN_STATE_HEADER_DIGEST 3 +#define ICL_CONN_STATE_DATA 4 +#define ICL_CONN_STATE_DATA_DIGEST 5 + +#define ICL_MAX_DATA_SEGMENT_LENGTH (128 * 1024) + +struct icl_conn { + struct mtx ic_lock; + struct socket *ic_socket; + volatile u_int ic_outstanding_pdus; + TAILQ_HEAD(, icl_pdu) ic_to_send; + size_t ic_receive_len; + int ic_receive_state; + struct icl_pdu *ic_receive_pdu; + struct cv ic_send_cv; + struct cv ic_receive_cv; + bool ic_header_crc32c; + bool ic_data_crc32c; + bool ic_send_running; + bool ic_receive_running; + size_t ic_max_data_segment_length; + bool ic_disconnecting; + bool ic_iser; + + void (*ic_receive)(struct icl_pdu *); + void (*ic_error)(struct icl_conn *); + + /* + * User (initiator or provider) private fields. + */ + void *ic_prv0; +}; + +struct icl_conn *icl_conn_new(void); +void icl_conn_free(struct icl_conn *ic); +int icl_conn_handoff(struct icl_conn *ic, int fd); +void icl_conn_shutdown(struct icl_conn *ic); +void icl_conn_close(struct icl_conn *ic); +bool icl_conn_connected(struct icl_conn *ic); + +#ifdef ICL_KERNEL_PROXY + +struct sockaddr; +struct icl_listen; + +struct icl_listen_sock { + TAILQ_ENTRY(icl_listen_sock) ils_next; + struct icl_listen *ils_listen; + struct socket *ils_socket; + bool ils_running; + bool ils_disconnecting; +}; + +struct icl_listen { + TAILQ_HEAD(, icl_listen_sock) il_sockets; + struct sx il_lock; + void (*il_accept)(struct socket *); +}; + +/* + * Initiator part. + */ +int icl_conn_connect(struct icl_conn *ic, bool rdma, + int domain, int socktype, int protocol, + struct sockaddr *from_sa, struct sockaddr *to_sa); +/* + * Target part. + */ +struct icl_listen *icl_listen_new(void (*accept_cb)(struct socket *)); +void icl_listen_free(struct icl_listen *il); +int icl_listen_add(struct icl_listen *il, bool rdma, int domain, + int socktype, int protocol, struct sockaddr *sa); +int icl_listen_remove(struct icl_listen *il, struct sockaddr *sa); + +/* + * This one is not a public API; only to be used by icl_proxy.c. + */ +int icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so); + +#endif /* ICL_KERNEL_PROXY */ + +#endif /* !ICL_H */ diff --git a/sys/dev/iscsi/icl_proxy.c b/sys/dev/iscsi/icl_proxy.c new file mode 100644 index 000000000000..41e64c352a9e --- /dev/null +++ b/sys/dev/iscsi/icl_proxy.c @@ -0,0 +1,397 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Edward Tomasz Napierala under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +/*- + * Copyright (c) 1982, 1986, 1989, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * sendfile(2) and related extensions: + * Copyright (c) 1998, David Greenman. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 + */ + +/* + * iSCSI Common Layer, kernel proxy part. + */ + +#ifdef ICL_KERNEL_PROXY + +#include <sys/param.h> +#include <sys/capability.h> +#include <sys/condvar.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/kthread.h> +#include <sys/malloc.h> +#include <sys/proc.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/sx.h> +#include <sys/systm.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <linux/types.h> +#include <rdma/rdma_cm.h> + +#include "icl.h" + +static int debug = 1; + +#define ICL_DEBUG(X, ...) \ + if (debug > 1) { \ + printf("%s: " X "\n", __func__, ## __VA_ARGS__);\ + } while (0) + +#define ICL_WARN(X, ...) \ + if (debug > 0) { \ + printf("WARNING: %s: " X "\n", \ + __func__, ## __VA_ARGS__); \ + } while (0) + +static MALLOC_DEFINE(M_ICL_PROXY, "ICL_PROXY", "iSCSI common layer proxy"); + +#ifdef ICL_RDMA +static int icl_conn_connect_rdma(struct icl_conn *ic, int domain, int socktype, + int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa); +static int icl_listen_add_rdma(struct icl_listen *il, int domain, int socktype, int protocol, + struct sockaddr *sa); +#endif /* ICL_RDMA */ + +static int +icl_conn_connect_tcp(struct icl_conn *ic, int domain, int socktype, + int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) +{ + struct socket *so; + int error; + int interrupted = 0; + + error = socreate(domain, &so, socktype, protocol, + curthread->td_ucred, curthread); + if (error != 0) + return (error); + + if (from_sa != NULL) { + error = sobind(so, from_sa, curthread); + if (error != 0) { + soclose(so); + return (error); + } + } + + error = soconnect(so, to_sa, curthread); + if (error != 0) { + soclose(so); + return (error); + } + + SOCK_LOCK(so); + while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { + error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, + "icl_connect", 0); + if (error) { + if (error == EINTR || error == ERESTART) + interrupted = 1; + break; + } + } + if (error == 0) { + error = so->so_error; + so->so_error = 0; + } + SOCK_UNLOCK(so); + + if (error != 0) { + soclose(so); + return (error); + } + + error = icl_conn_handoff_sock(ic, so); + if (error != 0) + soclose(so); + + return (error); +} + +int +icl_conn_connect(struct icl_conn *ic, bool rdma, int domain, int socktype, + int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) +{ + + if (rdma) { +#ifdef ICL_RDMA + return (icl_conn_connect_rdma(ic, domain, socktype, protocol, from_sa, to_sa)); +#else + ICL_DEBUG("RDMA not supported"); + return (EOPNOTSUPP); +#endif + } + + return (icl_conn_connect_tcp(ic, domain, socktype, protocol, from_sa, to_sa)); +} + +struct icl_listen * +icl_listen_new(void (*accept_cb)(struct socket *)) +{ + struct icl_listen *il; + + il = malloc(sizeof(*il), M_ICL_PROXY, M_ZERO | M_WAITOK); + TAILQ_INIT(&il->il_sockets); + sx_init(&il->il_lock, "icl_listen"); + il->il_accept = accept_cb; + + return (il); +} + +void +icl_listen_free(struct icl_listen *il) +{ + struct icl_listen_sock *ils; + + sx_xlock(&il->il_lock); + while (!TAILQ_EMPTY(&il->il_sockets)) { + ils = TAILQ_FIRST(&il->il_sockets); + while (ils->ils_running) { + ICL_DEBUG("waiting for accept thread to terminate"); + sx_xunlock(&il->il_lock); + ils->ils_disconnecting = true; + wakeup(&ils->ils_socket->so_timeo); + pause("icl_unlisten", 1 * hz); + sx_xlock(&il->il_lock); + } + + TAILQ_REMOVE(&il->il_sockets, ils, ils_next); + soclose(ils->ils_socket); + free(ils, M_ICL_PROXY); + } + sx_xunlock(&il->il_lock); + + free(il, M_ICL_PROXY); +} + +/* + * XXX: Doing accept in a separate thread in each socket might not be the best way + * to do stuff, but it's pretty clean and debuggable - and you probably won't + * have hundreds of listening sockets anyway. + */ +static void +icl_accept_thread(void *arg) +{ + struct icl_listen_sock *ils; + struct socket *head, *so; + struct sockaddr *sa; + int error; + + ils = arg; + head = ils->ils_socket; + + ils->ils_running = true; + + for (;;) { + ACCEPT_LOCK(); + while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0 && ils->ils_disconnecting == false) { + if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { + head->so_error = ECONNABORTED; + break; + } + error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, + "accept", 0); + if (error) { + ACCEPT_UNLOCK(); + ICL_WARN("msleep failed with error %d", error); + continue; + } + if (ils->ils_disconnecting) { + ACCEPT_UNLOCK(); + ICL_DEBUG("terminating"); + ils->ils_running = false; + kthread_exit(); + return; + } + } + if (head->so_error) { + error = head->so_error; + head->so_error = 0; + ACCEPT_UNLOCK(); + ICL_WARN("socket error %d", error); + continue; + } + so = TAILQ_FIRST(&head->so_comp); + KASSERT(so != NULL, ("NULL so")); + KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); + KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); + + /* + * Before changing the flags on the socket, we have to bump the + * reference count. Otherwise, if the protocol calls sofree(), + * the socket will be released due to a zero refcount. + */ + SOCK_LOCK(so); /* soref() and so_state update */ + soref(so); /* file descriptor reference */ + + TAILQ_REMOVE(&head->so_comp, so, so_list); + head->so_qlen--; + so->so_state |= (head->so_state & SS_NBIO); + so->so_qstate &= ~SQ_COMP; + so->so_head = NULL; + + SOCK_UNLOCK(so); + ACCEPT_UNLOCK(); + + sa = NULL; + error = soaccept(so, &sa); + if (error != 0) { + ICL_WARN("soaccept error %d", error); + if (sa != NULL) + free(sa, M_SONAME); + soclose(so); + } + + (ils->ils_listen->il_accept)(so); + } +} + +static int +icl_listen_add_tcp(struct icl_listen *il, int domain, int socktype, int protocol, + struct sockaddr *sa) +{ + struct icl_listen_sock *ils; + struct socket *so; + struct sockopt sopt; + int error, one = 1; + + error = socreate(domain, &so, socktype, protocol, + curthread->td_ucred, curthread); + if (error != 0) { + ICL_WARN("socreate failed with error %d", error); + return (error); + } + + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_REUSEADDR; + sopt.sopt_val = &one; + sopt.sopt_valsize = sizeof(one); + sopt.sopt_td = NULL; + error = sosetopt(so, &sopt); + if (error != 0) { + ICL_WARN("failed to set SO_REUSEADDR with error %d", error); + soclose(so); + return (error); + } + + error = sobind(so, sa, curthread); + if (error != 0) { + ICL_WARN("sobind failed with error %d", error); + soclose(so); + return (error); + } + + error = solisten(so, -1, curthread); + if (error != 0) { + ICL_WARN("solisten failed with error %d", error); + soclose(so); + return (error); + } + + ils = malloc(sizeof(*ils), M_ICL_PROXY, M_ZERO | M_WAITOK); + ils->ils_listen = il; + ils->ils_socket = so; + + error = kthread_add(icl_accept_thread, ils, NULL, NULL, 0, 0, "iclacc"); + if (error != 0) { + ICL_WARN("kthread_add failed with error %d", error); + soclose(so); + free(ils, M_ICL_PROXY); + + return (error); + } + + sx_xlock(&il->il_lock); + TAILQ_INSERT_TAIL(&il->il_sockets, ils, ils_next); + sx_xunlock(&il->il_lock); + + return (0); +} + +int +icl_listen_add(struct icl_listen *il, bool rdma, int domain, int socktype, int protocol, + struct sockaddr *sa) +{ + + if (rdma) { +#ifndef ICL_RDMA + ICL_DEBUG("RDMA not supported"); + return (EOPNOTSUPP); +#else + return (icl_listen_add_rdma(il, domain, socktype, protocol, sa)); +#endif + } + + + return (icl_listen_add_tcp(il, domain, socktype, protocol, sa)); +} + +int +icl_listen_remove(struct icl_listen *il, struct sockaddr *sa) +{ + + /* + * XXX + */ + + return (EOPNOTSUPP); +} + +#endif /* ICL_KERNEL_PROXY */ diff --git a/sys/dev/iscsi/iscsi.c b/sys/dev/iscsi/iscsi.c new file mode 100644 index 000000000000..0a956e832a94 --- /dev/null +++ b/sys/dev/iscsi/iscsi.c @@ -0,0 +1,2109 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Edward Tomasz Napierala under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/condvar.h> +#include <sys/conf.h> +#include <sys/eventhandler.h> +#include <sys/file.h> +#include <sys/kernel.h> +#include <sys/kthread.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/module.h> +#include <sys/sysctl.h> +#include <sys/systm.h> +#include <sys/sx.h> +#include <vm/uma.h> + +#include <cam/cam.h> +#include <cam/cam_ccb.h> +#include <cam/cam_xpt.h> +#include <cam/cam_debug.h> +#include <cam/cam_sim.h> +#include <cam/cam_xpt_sim.h> +#include <cam/cam_xpt_periph.h> +#include <cam/cam_periph.h> +#include <cam/scsi/scsi_all.h> +#include <cam/scsi/scsi_message.h> + +#include "iscsi_ioctl.h" +#include "iscsi.h" +#include "icl.h" +#include "iscsi_proto.h" + +#ifdef ICL_KERNEL_PROXY +#include <sys/socketvar.h> +#endif + +/* + * XXX: This is global so the iscsi_unload() can access it. + * Think about how to do this properly. + */ +static struct iscsi_softc *sc; + +SYSCTL_NODE(_kern, OID_AUTO, iscsi, CTLFLAG_RD, 0, "iSCSI initiator"); +static int debug = 1; +TUNABLE_INT("kern.iscsi.debug", &debug); +SYSCTL_INT(_kern_iscsi, OID_AUTO, debug, CTLFLAG_RW, + &debug, 2, "Enable debug messages"); +static int ping_timeout = 5; +TUNABLE_INT("kern.iscsi.ping_timeout", &ping_timeout); +SYSCTL_INT(_kern_iscsi, OID_AUTO, ping_timeout, CTLFLAG_RW, &ping_timeout, + 5, "Timeout for ping (NOP-Out) requests, in seconds"); +static int iscsid_timeout = 60; +TUNABLE_INT("kern.iscsi.iscsid_timeout", &iscsid_timeout); +SYSCTL_INT(_kern_iscsi, OID_AUTO, iscsid_timeout, CTLFLAG_RW, &iscsid_timeout, + 60, "Time to wait for iscsid(8) to handle reconnection, in seconds"); +static int login_timeout = 60; +TUNABLE_INT("kern.iscsi.login_timeout", &login_timeout); +SYSCTL_INT(_kern_iscsi, OID_AUTO, login_timeout, CTLFLAG_RW, &login_timeout, + 60, "Time to wait for iscsid(8) to finish Login Phase, in seconds"); +static int maxtags = 255; +TUNABLE_INT("kern.iscsi.maxtags", &maxtags); +SYSCTL_INT(_kern_iscsi, OID_AUTO, maxtags, CTLFLAG_RW, &maxtags, + 255, "Max number of IO requests queued"); + +static MALLOC_DEFINE(M_ISCSI, "iSCSI", "iSCSI initiator"); +static uma_zone_t iscsi_outstanding_zone; + +#define CONN_SESSION(X) ((struct iscsi_session *)X->ic_prv0) +#define PDU_SESSION(X) (CONN_SESSION(X->ip_conn)) + +#define ISCSI_DEBUG(X, ...) \ + if (debug > 1) { \ + printf("%s: " X "\n", __func__, ## __VA_ARGS__);\ + } while (0) + +#define ISCSI_WARN(X, ...) \ + if (debug > 0) { \ + printf("WARNING: %s: " X "\n", \ + __func__, ## __VA_ARGS__); \ + } while (0) + +#define ISCSI_SESSION_DEBUG(S, X, ...) \ + if (debug > 1) { \ + printf("%s: %s (%s): " X "\n", \ + __func__, S->is_conf.isc_target_addr, \ + S->is_conf.isc_target, ## __VA_ARGS__); \ + } while (0) + +#define ISCSI_SESSION_WARN(S, X, ...) \ + if (debug > 0) { \ + printf("WARNING: %s (%s): " X "\n", \ + S->is_conf.isc_target_addr, \ + S->is_conf.isc_target, ## __VA_ARGS__); \ + } while (0) + +#define ISCSI_SESSION_LOCK(X) mtx_lock(&X->is_lock) +#define ISCSI_SESSION_UNLOCK(X) mtx_unlock(&X->is_lock) +#define ISCSI_SESSION_LOCK_ASSERT(X) mtx_assert(&X->is_lock, MA_OWNED) + +static int iscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, + int mode, struct thread *td); + +static struct cdevsw iscsi_cdevsw = { + .d_version = D_VERSION, + .d_ioctl = iscsi_ioctl, + .d_name = "iscsi", +}; + +static void iscsi_pdu_queue_locked(struct icl_pdu *request); +static void iscsi_pdu_queue(struct icl_pdu *request); +static void iscsi_pdu_update_statsn(const struct icl_pdu *response); +static void iscsi_pdu_handle_nop_in(struct icl_pdu *response); +static void iscsi_pdu_handle_scsi_response(struct icl_pdu *response); +static void iscsi_pdu_handle_data_in(struct icl_pdu *response); +static void iscsi_pdu_handle_logout_response(struct icl_pdu *response); +static void iscsi_pdu_handle_r2t(struct icl_pdu *response); +static void iscsi_pdu_handle_async_message(struct icl_pdu *response); +static void iscsi_pdu_handle_reject(struct icl_pdu *response); +static void iscsi_session_reconnect(struct iscsi_session *is); +static void iscsi_session_terminate(struct iscsi_session *is); +static void iscsi_action(struct cam_sim *sim, union ccb *ccb); +static void iscsi_poll(struct cam_sim *sim); +static struct iscsi_outstanding *iscsi_outstanding_find(struct iscsi_session *is, + uint32_t initiator_task_tag); +static int iscsi_outstanding_add(struct iscsi_session *is, + uint32_t initiator_task_tag, union ccb *ccb); +static void iscsi_outstanding_remove(struct iscsi_session *is, + struct iscsi_outstanding *io); + +static bool +iscsi_pdu_prepare(struct icl_pdu *request) +{ + struct iscsi_session *is; + struct iscsi_bhs_scsi_command *bhssc; + + is = PDU_SESSION(request); + + ISCSI_SESSION_LOCK_ASSERT(is); + + /* + * We're only using fields common for all the request + * (initiator -> target) PDUs. + */ + bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; + + /* + * Data-Out PDU does not contain CmdSN. + */ + if (bhssc->bhssc_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_OUT) { + if (is->is_cmdsn > is->is_maxcmdsn && + (bhssc->bhssc_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0) { + /* + * Current MaxCmdSN prevents us from sending any more + * SCSI Command PDUs to the target; postpone the PDU. + * It will get resent by either iscsi_pdu_queue(), + * or by maintenance thread. + */ +#if 0 + ISCSI_SESSION_DEBUG(is, "postponing send, CmdSN %d, ExpCmdSN %d, MaxCmdSN %d, opcode 0x%x", + is->is_cmdsn, is->is_expcmdsn, is->is_maxcmdsn, bhssc->bhssc_opcode); +#endif + return (true); + } + bhssc->bhssc_cmdsn = htonl(is->is_cmdsn); + if ((bhssc->bhssc_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0) + is->is_cmdsn++; + } + bhssc->bhssc_expstatsn = htonl(is->is_statsn + 1); + + return (false); +} + +static void +iscsi_session_send_postponed(struct iscsi_session *is) +{ + struct icl_pdu *request; + bool postpone; + + ISCSI_SESSION_LOCK_ASSERT(is); + + while (!TAILQ_EMPTY(&is->is_postponed)) { + request = TAILQ_FIRST(&is->is_postponed); + postpone = iscsi_pdu_prepare(request); + if (postpone) + break; + TAILQ_REMOVE(&is->is_postponed, request, ip_next); + icl_pdu_queue(request); + } +} + +static void +iscsi_pdu_queue_locked(struct icl_pdu *request) +{ + struct iscsi_session *is; + bool postpone; + + is = PDU_SESSION(request); + ISCSI_SESSION_LOCK_ASSERT(is); + iscsi_session_send_postponed(is); + postpone = iscsi_pdu_prepare(request); + if (postpone) { + TAILQ_INSERT_TAIL(&is->is_postponed, request, ip_next); + return; + } + icl_pdu_queue(request); +} + +static void +iscsi_pdu_queue(struct icl_pdu *request) +{ + struct iscsi_session *is; + + is = PDU_SESSION(request); + ISCSI_SESSION_LOCK(is); + iscsi_pdu_queue_locked(request); + ISCSI_SESSION_UNLOCK(is); +} + +static void +iscsi_session_logout(struct iscsi_session *is) +{ + struct icl_pdu *request; + struct iscsi_bhs_logout_request *bhslr; + + request = icl_pdu_new_bhs(is->is_conn, M_NOWAIT); + if (request == NULL) + return; + + bhslr = (struct iscsi_bhs_logout_request *)request->ip_bhs; + bhslr->bhslr_opcode = ISCSI_BHS_OPCODE_LOGOUT_REQUEST; + bhslr->bhslr_reason = BHSLR_REASON_CLOSE_SESSION; + iscsi_pdu_queue_locked(request); +} + +static void +iscsi_session_terminate_tasks(struct iscsi_session *is, bool requeue) +{ + struct iscsi_outstanding *io, *tmp; + + ISCSI_SESSION_LOCK_ASSERT(is); + + TAILQ_FOREACH_SAFE(io, &is->is_outstanding, io_next, tmp) { + if (requeue) { + io->io_ccb->ccb_h.status &= ~CAM_SIM_QUEUED; + io->io_ccb->ccb_h.status |= CAM_REQUEUE_REQ; + } else { + io->io_ccb->ccb_h.status = CAM_REQ_ABORTED; + } + + if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { + xpt_freeze_devq(io->io_ccb->ccb_h.path, 1); + ISCSI_SESSION_DEBUG(is, "freezing devq"); + } + io->io_ccb->ccb_h.status |= CAM_DEV_QFRZN; + xpt_done(io->io_ccb); + iscsi_outstanding_remove(is, io); + } +} + +static void +iscsi_maintenance_thread_reconnect(struct iscsi_session *is) +{ + struct icl_pdu *pdu; + + icl_conn_shutdown(is->is_conn); + icl_conn_close(is->is_conn); + + ISCSI_SESSION_LOCK(is); + +#ifdef ICL_KERNEL_PROXY + if (is->is_login_pdu != NULL) { + icl_pdu_free(is->is_login_pdu); + is->is_login_pdu = NULL; + } + cv_signal(&is->is_login_cv); +#endif + + /* + * Don't queue any new PDUs. + */ + if (is->is_sim != NULL && is->is_simq_frozen == false) { + ISCSI_SESSION_DEBUG(is, "freezing"); + xpt_freeze_simq(is->is_sim, 1); + is->is_simq_frozen = true; + } + + /* + * Remove postponed PDUs. + */ + while (!TAILQ_EMPTY(&is->is_postponed)) { + pdu = TAILQ_FIRST(&is->is_postponed); + TAILQ_REMOVE(&is->is_postponed, pdu, ip_next); + icl_pdu_free(pdu); + } + + /* + * Terminate SCSI tasks, asking CAM to requeue them. + */ + //ISCSI_SESSION_DEBUG(is, "terminating tasks"); + iscsi_session_terminate_tasks(is, true); + + KASSERT(TAILQ_EMPTY(&is->is_outstanding), + ("destroying session with active tasks")); + KASSERT(TAILQ_EMPTY(&is->is_postponed), + ("destroying session with postponed PDUs")); + + /* + * Request immediate reconnection from iscsid(8). + */ + //ISCSI_SESSION_DEBUG(is, "waking up iscsid(8)"); + is->is_connected = false; + is->is_reconnecting = false; + is->is_login_phase = false; + is->is_waiting_for_iscsid = true; + strlcpy(is->is_reason, "Waiting for iscsid(8)", sizeof(is->is_reason)); + is->is_timeout = 0; + ISCSI_SESSION_UNLOCK(is); + cv_signal(&is->is_softc->sc_cv); +} + +static void +iscsi_maintenance_thread_terminate(struct iscsi_session *is) +{ + struct iscsi_softc *sc; + struct icl_pdu *pdu; + + sc = is->is_softc; + sx_xlock(&sc->sc_lock); + TAILQ_REMOVE(&sc->sc_sessions, is, is_next); + sx_xunlock(&sc->sc_lock); + + icl_conn_close(is->is_conn); + + ISCSI_SESSION_LOCK(is); + + KASSERT(is->is_terminating, ("is_terminating == false")); + +#ifdef ICL_KERNEL_PROXY + if (is->is_login_pdu != NULL) { + icl_pdu_free(is->is_login_pdu); + is->is_login_pdu = NULL; + } + cv_signal(&is->is_login_cv); +#endif + + /* + * Don't queue any new PDUs. + */ + callout_drain(&is->is_callout); + if (is->is_sim != NULL && is->is_simq_frozen == false) { + ISCSI_SESSION_DEBUG(is, "freezing"); + xpt_freeze_simq(is->is_sim, 1); + is->is_simq_frozen = true; + } + + /* + * Remove postponed PDUs. + */ + while (!TAILQ_EMPTY(&is->is_postponed)) { + pdu = TAILQ_FIRST(&is->is_postponed); + TAILQ_REMOVE(&is->is_postponed, pdu, ip_next); + icl_pdu_free(pdu); + } + + /* + * Forcibly terminate SCSI tasks. + */ + ISCSI_SESSION_DEBUG(is, "terminating tasks"); + iscsi_session_terminate_tasks(is, false); + + /* + * Deregister CAM. + */ + if (is->is_sim != NULL) { + ISCSI_SESSION_DEBUG(is, "deregistering SIM"); + xpt_async(AC_LOST_DEVICE, is->is_path, NULL); + + if (is->is_simq_frozen) { + xpt_release_simq(is->is_sim, 1); + is->is_simq_frozen = false; + } + + xpt_free_path(is->is_path); + xpt_bus_deregister(cam_sim_path(is->is_sim)); + cam_sim_free(is->is_sim, TRUE /*free_devq*/); + is->is_sim = NULL; + } + + KASSERT(TAILQ_EMPTY(&is->is_outstanding), + ("destroying session with active tasks")); + KASSERT(TAILQ_EMPTY(&is->is_postponed), + ("destroying session with postponed PDUs")); + + ISCSI_SESSION_UNLOCK(is); + + icl_conn_free(is->is_conn); + mtx_destroy(&is->is_lock); + cv_destroy(&is->is_maintenance_cv); +#ifdef ICL_KERNEL_PROXY + cv_destroy(&is->is_login_cv); +#endif + ISCSI_SESSION_DEBUG(is, "terminated"); + free(is, M_ISCSI); + + /* + * The iscsi_unload() routine might be waiting. + */ + cv_signal(&sc->sc_cv); +} + +static void +iscsi_maintenance_thread(void *arg) +{ + struct iscsi_session *is; + + is = arg; + + for (;;) { + ISCSI_SESSION_LOCK(is); + if (is->is_reconnecting == false && + is->is_terminating == false && + TAILQ_EMPTY(&is->is_postponed)) + cv_wait(&is->is_maintenance_cv, &is->is_lock); + + if (is->is_reconnecting) { + ISCSI_SESSION_UNLOCK(is); + iscsi_maintenance_thread_reconnect(is); + continue; + } + + if (is->is_terminating) { + ISCSI_SESSION_UNLOCK(is); + iscsi_maintenance_thread_terminate(is); + kthread_exit(); + return; + } + + iscsi_session_send_postponed(is); + ISCSI_SESSION_UNLOCK(is); + } +} + +static void +iscsi_session_reconnect(struct iscsi_session *is) +{ + + /* + * XXX: We can't use locking here, because + * it's being called from various contexts. + * Hope it doesn't break anything. + */ + if (is->is_reconnecting) + return; + + is->is_reconnecting = true; + cv_signal(&is->is_maintenance_cv); +} + +static void +iscsi_session_terminate(struct iscsi_session *is) +{ + if (is->is_terminating) + return; + + is->is_terminating = true; + +#if 0 + iscsi_session_logout(is); +#endif + cv_signal(&is->is_maintenance_cv); +} + +static void +iscsi_callout(void *context) +{ + struct icl_pdu *request; + struct iscsi_bhs_nop_out *bhsno; + struct iscsi_session *is; + bool reconnect_needed = false; + + is = context; + + if (is->is_terminating) + return; + + callout_schedule(&is->is_callout, 1 * hz); + + ISCSI_SESSION_LOCK(is); + is->is_timeout++; + + if (is->is_waiting_for_iscsid) { + if (is->is_timeout > iscsid_timeout) { + ISCSI_SESSION_WARN(is, "timed out waiting for iscsid(8) " + "for %d seconds; reconnecting", + is->is_timeout); + reconnect_needed = true; + } + goto out; + } + + if (is->is_login_phase) { + if (is->is_timeout > login_timeout) { + ISCSI_SESSION_WARN(is, "login timed out after %d seconds; " + "reconnecting", is->is_timeout); + reconnect_needed = true; + } + goto out; + } + + if (is->is_timeout >= ping_timeout) { + ISCSI_SESSION_WARN(is, "no ping reply (NOP-In) after %d seconds; " + "reconnecting", ping_timeout); + reconnect_needed = true; + goto out; + } + + ISCSI_SESSION_UNLOCK(is); + + /* + * If the ping was reset less than one second ago - which means + * that we've received some PDU during the last second - assume + * the traffic flows correctly and don't bother sending a NOP-Out. + * + * (It's 2 - one for one second, and one for incrementing is_timeout + * earlier in this routine.) + */ + if (is->is_timeout < 2) + return; + + request = icl_pdu_new_bhs(is->is_conn, M_WAITOK); + bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs; + bhsno->bhsno_opcode = ISCSI_BHS_OPCODE_NOP_OUT | + ISCSI_BHS_OPCODE_IMMEDIATE; + bhsno->bhsno_flags = 0x80; + bhsno->bhsno_target_transfer_tag = 0xffffffff; + iscsi_pdu_queue(request); + return; + +out: + ISCSI_SESSION_UNLOCK(is); + + if (reconnect_needed) + iscsi_session_reconnect(is); +} + +static void +iscsi_pdu_update_statsn(const struct icl_pdu *response) +{ + const struct iscsi_bhs_data_in *bhsdi; + struct iscsi_session *is; + uint32_t expcmdsn, maxcmdsn; + + is = PDU_SESSION(response); + + ISCSI_SESSION_LOCK_ASSERT(is); + + /* + * We're only using fields common for all the response + * (target -> initiator) PDUs. + */ + bhsdi = (const struct iscsi_bhs_data_in *)response->ip_bhs; + /* + * Ok, I lied. In case of Data-In, "The fields StatSN, Status, + * and Residual Count only have meaningful content if the S bit + * is set to 1", so we also need to check the bit specific for + * Data-In PDU. + */ + if (bhsdi->bhsdi_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_IN || + (bhsdi->bhsdi_flags & BHSDI_FLAGS_S) != 0) { + if (ntohl(bhsdi->bhsdi_statsn) < is->is_statsn) { + ISCSI_SESSION_WARN(is, + "PDU StatSN %d >= session StatSN %d, opcode 0x%x", + is->is_statsn, ntohl(bhsdi->bhsdi_statsn), + bhsdi->bhsdi_opcode); + } + is->is_statsn = ntohl(bhsdi->bhsdi_statsn); + } + + expcmdsn = ntohl(bhsdi->bhsdi_expcmdsn); + maxcmdsn = ntohl(bhsdi->bhsdi_maxcmdsn); + + /* + * XXX: Compare using Serial Arithmetic Sense. + */ + if (maxcmdsn + 1 < expcmdsn) { + ISCSI_SESSION_DEBUG(is, "PDU MaxCmdSN %d + 1 < PDU ExpCmdSN %d; ignoring", + maxcmdsn, expcmdsn); + } else { + if (maxcmdsn > is->is_maxcmdsn) { + is->is_maxcmdsn = maxcmdsn; + + /* + * Command window increased; kick the maintanance thread + * to send out postponed commands. + */ + if (!TAILQ_EMPTY(&is->is_postponed)) + cv_signal(&is->is_maintenance_cv); + } else if (maxcmdsn < is->is_maxcmdsn) { + ISCSI_SESSION_DEBUG(is, "PDU MaxCmdSN %d < session MaxCmdSN %d; ignoring", + maxcmdsn, is->is_maxcmdsn); + } + + if (expcmdsn > is->is_expcmdsn) { + is->is_expcmdsn = expcmdsn; + } else if (expcmdsn < is->is_expcmdsn) { + ISCSI_SESSION_DEBUG(is, "PDU ExpCmdSN %d < session ExpCmdSN %d; ignoring", + expcmdsn, is->is_expcmdsn); + } + } + + /* + * Every incoming PDU - not just NOP-In - resets the ping timer. + * The purpose of the timeout is to reset the connection when it stalls; + * we don't want this to happen when NOP-In or NOP-Out ends up delayed + * in some queue. + */ + is->is_timeout = 0; +} + +static void +iscsi_receive_callback(struct icl_pdu *response) +{ + struct iscsi_session *is; + + is = PDU_SESSION(response); + + ISCSI_SESSION_LOCK(is); + +#ifdef ICL_KERNEL_PROXY + if (is->is_login_phase) { + if (is->is_login_pdu == NULL) + is->is_login_pdu = response; + else + icl_pdu_free(response); + ISCSI_SESSION_UNLOCK(is); + cv_signal(&is->is_login_cv); + return; + } +#endif + + iscsi_pdu_update_statsn(response); + + /* + * The handling routine is responsible for freeing the PDU + * when it's no longer needed. + */ + switch (response->ip_bhs->bhs_opcode) { + case ISCSI_BHS_OPCODE_NOP_IN: + iscsi_pdu_handle_nop_in(response); + break; + case ISCSI_BHS_OPCODE_SCSI_RESPONSE: + iscsi_pdu_handle_scsi_response(response); + break; + case ISCSI_BHS_OPCODE_SCSI_DATA_IN: + iscsi_pdu_handle_data_in(response); + break; + case ISCSI_BHS_OPCODE_LOGOUT_RESPONSE: + iscsi_pdu_handle_logout_response(response); + break; + case ISCSI_BHS_OPCODE_R2T: + iscsi_pdu_handle_r2t(response); + break; + case ISCSI_BHS_OPCODE_ASYNC_MESSAGE: + iscsi_pdu_handle_async_message(response); + break; + case ISCSI_BHS_OPCODE_REJECT: + iscsi_pdu_handle_reject(response); + break; + default: + ISCSI_SESSION_WARN(is, "received PDU with unsupported " + "opcode 0x%x; reconnecting", + response->ip_bhs->bhs_opcode); + iscsi_session_reconnect(is); + icl_pdu_free(response); + } + + ISCSI_SESSION_UNLOCK(is); +} + +static void +iscsi_error_callback(struct icl_conn *ic) +{ + struct iscsi_session *is; + + is = CONN_SESSION(ic); + + ISCSI_SESSION_WARN(is, "connection error; reconnecting"); + iscsi_session_reconnect(is); +} + +static void +iscsi_pdu_handle_nop_in(struct icl_pdu *response) +{ + struct iscsi_bhs_nop_out *bhsno; + struct iscsi_bhs_nop_in *bhsni; + struct icl_pdu *request; + + bhsni = (struct iscsi_bhs_nop_in *)response->ip_bhs; + + if (bhsni->bhsni_target_transfer_tag == 0xffffffff) { + /* + * Nothing to do; iscsi_pdu_update_statsn() already + * zeroed the timeout. + */ + icl_pdu_free(response); + return; + } + + request = icl_pdu_new_bhs(response->ip_conn, M_NOWAIT); + if (request == NULL) { + icl_pdu_free(response); + return; + } + bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs; + bhsno->bhsno_opcode = ISCSI_BHS_OPCODE_NOP_OUT | + ISCSI_BHS_OPCODE_IMMEDIATE; + bhsno->bhsno_flags = 0x80; + bhsno->bhsno_initiator_task_tag = 0xffffffff; /* XXX */ + bhsno->bhsno_target_transfer_tag = bhsni->bhsni_target_transfer_tag; + + request->ip_data_len = response->ip_data_len; + request->ip_data_mbuf = response->ip_data_mbuf; + response->ip_data_len = 0; + response->ip_data_mbuf = NULL; + + icl_pdu_free(response); + iscsi_pdu_queue_locked(request); +} + +static void +iscsi_pdu_handle_scsi_response(struct icl_pdu *response) +{ + struct iscsi_bhs_scsi_response *bhssr; + struct iscsi_outstanding *io; + struct iscsi_session *is; + struct ccb_scsiio *csio; + size_t data_segment_len; + uint16_t sense_len; + + is = PDU_SESSION(response); + + bhssr = (struct iscsi_bhs_scsi_response *)response->ip_bhs; + io = iscsi_outstanding_find(is, bhssr->bhssr_initiator_task_tag); + if (io == NULL) { + ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhssr->bhssr_initiator_task_tag); + icl_pdu_free(response); + iscsi_session_reconnect(is); + return; + } + + if (bhssr->bhssr_response != BHSSR_RESPONSE_COMMAND_COMPLETED) { + ISCSI_SESSION_WARN(is, "service response 0x%x", bhssr->bhssr_response); + if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { + xpt_freeze_devq(io->io_ccb->ccb_h.path, 1); + ISCSI_SESSION_DEBUG(is, "freezing devq"); + } + io->io_ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN; + } else if (bhssr->bhssr_status == 0) { + io->io_ccb->ccb_h.status = CAM_REQ_CMP; + } else { + if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { + xpt_freeze_devq(io->io_ccb->ccb_h.path, 1); + ISCSI_SESSION_DEBUG(is, "freezing devq"); + } + io->io_ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_DEV_QFRZN; + io->io_ccb->csio.scsi_status = bhssr->bhssr_status; + } + + if (bhssr->bhssr_flags & BHSSR_FLAGS_RESIDUAL_OVERFLOW) { + ISCSI_SESSION_WARN(is, "target indicated residual overflow"); + icl_pdu_free(response); + iscsi_session_reconnect(is); + return; + } + + csio = &io->io_ccb->csio; + + data_segment_len = icl_pdu_data_segment_length(response); + if (data_segment_len > 0) { + if (data_segment_len < sizeof(sense_len)) { + ISCSI_SESSION_WARN(is, "truncated data segment (%zd bytes)", + data_segment_len); + if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { + xpt_freeze_devq(io->io_ccb->ccb_h.path, 1); + ISCSI_SESSION_DEBUG(is, "freezing devq"); + } + io->io_ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN; + goto out; + } + icl_pdu_get_data(response, 0, &sense_len, sizeof(sense_len)); + sense_len = ntohs(sense_len); +#if 0 + ISCSI_SESSION_DEBUG(is, "sense_len %d, data len %zd", + sense_len, data_segment_len); +#endif + if (sizeof(sense_len) + sense_len > data_segment_len) { + ISCSI_SESSION_WARN(is, "truncated data segment " + "(%zd bytes, should be %zd)", + data_segment_len, sizeof(sense_len) + sense_len); + if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { + xpt_freeze_devq(io->io_ccb->ccb_h.path, 1); + ISCSI_SESSION_DEBUG(is, "freezing devq"); + } + io->io_ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN; + goto out; + } else if (sizeof(sense_len) + sense_len < data_segment_len) + ISCSI_SESSION_WARN(is, "oversize data segment " + "(%zd bytes, should be %zd)", + data_segment_len, sizeof(sense_len) + sense_len); + if (sense_len > csio->sense_len) { + ISCSI_SESSION_DEBUG(is, "truncating sense from %d to %d", + sense_len, csio->sense_len); + sense_len = csio->sense_len; + } + icl_pdu_get_data(response, sizeof(sense_len), &csio->sense_data, sense_len); + csio->sense_resid = csio->sense_len - sense_len; + io->io_ccb->ccb_h.status |= CAM_AUTOSNS_VALID; + } + +out: + if (bhssr->bhssr_flags & BHSSR_FLAGS_RESIDUAL_UNDERFLOW) + csio->resid = ntohl(bhssr->bhssr_residual_count); + + if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) { + KASSERT(io->io_received <= csio->dxfer_len, + ("io->io_received > csio->dxfer_len")); + if (io->io_received < csio->dxfer_len) { + if (csio->resid != csio->dxfer_len - io->io_received) { + ISCSI_SESSION_WARN(is, "underflow mismatch: " + "target indicates %d, we calculated %zd", + csio->resid, + csio->dxfer_len - io->io_received); + } + csio->resid = csio->dxfer_len - io->io_received; + } + } + + xpt_done(io->io_ccb); + iscsi_outstanding_remove(is, io); + icl_pdu_free(response); +} + +static void +iscsi_pdu_handle_data_in(struct icl_pdu *response) +{ + struct iscsi_bhs_data_in *bhsdi; + struct iscsi_outstanding *io; + struct iscsi_session *is; + struct ccb_scsiio *csio; + size_t data_segment_len; + + is = PDU_SESSION(response); + bhsdi = (struct iscsi_bhs_data_in *)response->ip_bhs; + io = iscsi_outstanding_find(is, bhsdi->bhsdi_initiator_task_tag); + if (io == NULL) { + ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhsdi->bhsdi_initiator_task_tag); + icl_pdu_free(response); + iscsi_session_reconnect(is); + return; + } + + data_segment_len = icl_pdu_data_segment_length(response); + if (data_segment_len == 0) { + /* + * "The sending of 0 length data segments should be avoided, + * but initiators and targets MUST be able to properly receive + * 0 length data segments." + */ + icl_pdu_free(response); + return; + } + + /* + * We need to track this for security reasons - without it, malicious target + * could respond to SCSI READ without sending Data-In PDUs, which would result + * in read operation on the initiator side returning random kernel data. + */ + if (ntohl(bhsdi->bhsdi_buffer_offset) != io->io_received) { + ISCSI_SESSION_WARN(is, "data out of order; expected offset %zd, got %zd", + io->io_received, (size_t)ntohl(bhsdi->bhsdi_buffer_offset)); + icl_pdu_free(response); + iscsi_session_reconnect(is); + return; + } + + csio = &io->io_ccb->csio; + + if (ntohl(bhsdi->bhsdi_buffer_offset) + data_segment_len > + csio->dxfer_len) { + ISCSI_SESSION_WARN(is, "oversize data segment (%zd bytes " + "at offset %d, buffer is %d)", + data_segment_len, ntohl(bhsdi->bhsdi_buffer_offset), + csio->dxfer_len); + icl_pdu_free(response); + iscsi_session_reconnect(is); + return; + } + + icl_pdu_get_data(response, 0, csio->data_ptr + ntohl(bhsdi->bhsdi_buffer_offset), data_segment_len); + io->io_received += data_segment_len; + + /* + * XXX: Check DataSN. + * XXX: Check F. + */ + if (bhsdi->bhsdi_flags & BHSDI_FLAGS_S) { + //ISCSI_SESSION_DEBUG(is, "got S flag; status 0x%x", bhsdi->bhsdi_status); + if (bhsdi->bhsdi_status == 0) { + io->io_ccb->ccb_h.status = CAM_REQ_CMP; + } else { + if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { + xpt_freeze_devq(io->io_ccb->ccb_h.path, 1); + ISCSI_SESSION_DEBUG(is, "freezing devq"); + } + io->io_ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_DEV_QFRZN; + csio->scsi_status = bhsdi->bhsdi_status; + } + xpt_done(io->io_ccb); + iscsi_outstanding_remove(is, io); + } + + icl_pdu_free(response); +} + +static void +iscsi_pdu_handle_logout_response(struct icl_pdu *response) +{ + + ISCSI_SESSION_DEBUG(PDU_SESSION(response), "logout response"); + icl_pdu_free(response); +} + +static void +iscsi_pdu_handle_r2t(struct icl_pdu *response) +{ + struct icl_pdu *request; + struct iscsi_session *is; + struct iscsi_bhs_r2t *bhsr2t; + struct iscsi_bhs_data_out *bhsdo; + struct iscsi_outstanding *io; + struct ccb_scsiio *csio; + size_t off, len, total_len; + int error; + + is = PDU_SESSION(response); + + bhsr2t = (struct iscsi_bhs_r2t *)response->ip_bhs; + io = iscsi_outstanding_find(is, bhsr2t->bhsr2t_initiator_task_tag); + if (io == NULL) { + ISCSI_SESSION_WARN(is, "bad itt 0x%x; reconnecting", + bhsr2t->bhsr2t_initiator_task_tag); + icl_pdu_free(response); + iscsi_session_reconnect(is); + return; + } + + csio = &io->io_ccb->csio; + + if ((csio->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_OUT) { + ISCSI_SESSION_WARN(is, "received R2T for read command; reconnecting"); + icl_pdu_free(response); + iscsi_session_reconnect(is); + return; + } + + /* + * XXX: Verify R2TSN. + */ + + io->io_datasn = 0; + off = ntohl(bhsr2t->bhsr2t_buffer_offset); + total_len = ntohl(bhsr2t->bhsr2t_desired_data_transfer_length); + + //ISCSI_SESSION_DEBUG(is, "r2t; off %zd, len %zd", off, total_len); + + for (;;) { + len = total_len; + + if (len > is->is_max_data_segment_length) + len = is->is_max_data_segment_length; + + if (off + len > csio->dxfer_len) { + ISCSI_SESSION_WARN(is, "bad off %zd, len %d", + off + len, csio->dxfer_len); + icl_pdu_free(response); + iscsi_session_reconnect(is); + return; + } + + request = icl_pdu_new_bhs(response->ip_conn, M_NOWAIT); + if (request == NULL) { + icl_pdu_free(response); + iscsi_session_reconnect(is); + return; + } + + bhsdo = (struct iscsi_bhs_data_out *)request->ip_bhs; + bhsdo->bhsdo_opcode = ISCSI_BHS_OPCODE_SCSI_DATA_OUT; + bhsdo->bhsdo_lun = bhsr2t->bhsr2t_lun; + bhsdo->bhsdo_initiator_task_tag = + bhsr2t->bhsr2t_initiator_task_tag; + bhsdo->bhsdo_target_transfer_tag = + bhsr2t->bhsr2t_target_transfer_tag; + bhsdo->bhsdo_datasn = htonl(io->io_datasn++); + bhsdo->bhsdo_buffer_offset = htonl(off); + error = icl_pdu_append_data(request, csio->data_ptr + off, len, M_NOWAIT); + if (error != 0) { + icl_pdu_free(request); + icl_pdu_free(response); + iscsi_session_reconnect(is); + return; + } + + off += len; + total_len -= len; + + if (total_len == 0) { + bhsdo->bhsdo_flags |= BHSDO_FLAGS_F; + //ISCSI_SESSION_DEBUG(is, "setting F, off %zd", off); + } else { + //ISCSI_SESSION_DEBUG(is, "not finished, off %zd", off); + } + + iscsi_pdu_queue_locked(request); + + if (total_len == 0) + break; + } + + icl_pdu_free(response); +} + +static void +iscsi_pdu_handle_async_message(struct icl_pdu *response) +{ + struct iscsi_bhs_asynchronous_message *bhsam; + struct iscsi_session *is; + + is = PDU_SESSION(response); + bhsam = (struct iscsi_bhs_asynchronous_message *)response->ip_bhs; + switch (bhsam->bhsam_async_event) { + case BHSAM_EVENT_TARGET_REQUESTS_LOGOUT: + ISCSI_SESSION_WARN(is, "target requests logout; removing session"); + iscsi_session_logout(is); + iscsi_session_terminate(is); + break; + case BHSAM_EVENT_TARGET_TERMINATES_CONNECTION: + ISCSI_SESSION_WARN(is, "target indicates it will drop drop the connection"); + break; + case BHSAM_EVENT_TARGET_TERMINATES_SESSION: + ISCSI_SESSION_WARN(is, "target indicates it will drop drop the session"); + break; + default: + /* + * XXX: Technically, we're obligated to also handle + * parameter renegotiation. + */ + ISCSI_SESSION_WARN(is, "ignoring AsyncEvent %d", bhsam->bhsam_async_event); + break; + } + + icl_pdu_free(response); +} + +static void +iscsi_pdu_handle_reject(struct icl_pdu *response) +{ + struct iscsi_bhs_reject *bhsr; + struct iscsi_session *is; + + is = PDU_SESSION(response); + bhsr = (struct iscsi_bhs_reject *)response->ip_bhs; + ISCSI_SESSION_WARN(is, "received Reject PDU, reason 0x%x; protocol error?", + bhsr->bhsr_reason); + + icl_pdu_free(response); +} + +static int +iscsi_ioctl_daemon_wait(struct iscsi_softc *sc, + struct iscsi_daemon_request *request) +{ + struct iscsi_session *is; + int error; + + sx_slock(&sc->sc_lock); + for (;;) { + TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { + if (is->is_waiting_for_iscsid) + break; + } + + if (is == NULL) { + /* + * No session requires attention from iscsid(8); wait. + */ + error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock); + if (error != 0) { + sx_sunlock(&sc->sc_lock); + return (error); + } + continue; + } + + ISCSI_SESSION_LOCK(is); + is->is_waiting_for_iscsid = false; + is->is_login_phase = true; + is->is_reason[0] = '\0'; + ISCSI_SESSION_UNLOCK(is); + + request->idr_session_id = is->is_id; + memcpy(&request->idr_conf, &is->is_conf, + sizeof(request->idr_conf)); + + sx_sunlock(&sc->sc_lock); + return (0); + } +} + +static int +iscsi_ioctl_daemon_handoff(struct iscsi_softc *sc, + struct iscsi_daemon_handoff *handoff) +{ + struct iscsi_session *is; + int error; + + sx_slock(&sc->sc_lock); + + /* + * Find the session to hand off socket to. + */ + TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { + if (is->is_id == handoff->idh_session_id) + break; + } + if (is == NULL) { + sx_sunlock(&sc->sc_lock); + return (ESRCH); + } + ISCSI_SESSION_LOCK(is); + if (is->is_conf.isc_discovery || is->is_terminating) { + ISCSI_SESSION_UNLOCK(is); + sx_sunlock(&sc->sc_lock); + return (EINVAL); + } + + strlcpy(is->is_target_alias, handoff->idh_target_alias, + sizeof(is->is_target_alias)); + memcpy(is->is_isid, handoff->idh_isid, sizeof(is->is_isid)); + is->is_statsn = handoff->idh_statsn; + is->is_initial_r2t = handoff->idh_initial_r2t; + is->is_immediate_data = handoff->idh_immediate_data; + is->is_max_data_segment_length = handoff->idh_max_data_segment_length; + is->is_max_burst_length = handoff->idh_max_burst_length; + is->is_first_burst_length = handoff->idh_first_burst_length; + + if (handoff->idh_header_digest == ISCSI_DIGEST_CRC32C) + is->is_conn->ic_header_crc32c = true; + else + is->is_conn->ic_header_crc32c = false; + if (handoff->idh_data_digest == ISCSI_DIGEST_CRC32C) + is->is_conn->ic_data_crc32c = true; + else + is->is_conn->ic_data_crc32c = false; + + is->is_cmdsn = 0; + is->is_expcmdsn = 1; + is->is_maxcmdsn = 1; + is->is_waiting_for_iscsid = false; + is->is_login_phase = false; + is->is_timeout = 0; + is->is_connected = true; + is->is_reason[0] = '\0'; + + ISCSI_SESSION_UNLOCK(is); + +#ifndef ICL_KERNEL_PROXY + error = icl_conn_handoff(is->is_conn, handoff->idh_socket); + if (error != 0) { + sx_sunlock(&sc->sc_lock); + iscsi_session_terminate(is); + return (error); + } +#endif + + sx_sunlock(&sc->sc_lock); + + if (is->is_sim != NULL) { + /* + * When reconnecting, there already is SIM allocated for the session. + */ + KASSERT(is->is_simq_frozen, ("reconnect without frozen simq")); + ISCSI_SESSION_LOCK(is); + ISCSI_SESSION_DEBUG(is, "releasing"); + xpt_release_simq(is->is_sim, 1); + is->is_simq_frozen = false; + ISCSI_SESSION_UNLOCK(is); + + } else { + ISCSI_SESSION_LOCK(is); + is->is_devq = cam_simq_alloc(maxtags); + if (is->is_devq == NULL) { + ISCSI_SESSION_WARN(is, "failed to allocate simq"); + iscsi_session_terminate(is); + return (ENOMEM); + } + + is->is_sim = cam_sim_alloc(iscsi_action, iscsi_poll, "iscsi", + is, is->is_id /* unit */, &is->is_lock, + maxtags, maxtags, is->is_devq); + if (is->is_sim == NULL) { + ISCSI_SESSION_UNLOCK(is); + ISCSI_SESSION_WARN(is, "failed to allocate SIM"); + cam_simq_free(is->is_devq); + iscsi_session_terminate(is); + return (ENOMEM); + } + + error = xpt_bus_register(is->is_sim, NULL, 0); + if (error != 0) { + ISCSI_SESSION_UNLOCK(is); + ISCSI_SESSION_WARN(is, "failed to register bus"); + iscsi_session_terminate(is); + return (ENOMEM); + } + + error = xpt_create_path(&is->is_path, /*periph*/NULL, + cam_sim_path(is->is_sim), CAM_TARGET_WILDCARD, + CAM_LUN_WILDCARD); + if (error != CAM_REQ_CMP) { + ISCSI_SESSION_UNLOCK(is); + ISCSI_SESSION_WARN(is, "failed to create path"); + iscsi_session_terminate(is); + return (ENOMEM); + } + ISCSI_SESSION_UNLOCK(is); + } + + return (0); +} + +static int +iscsi_ioctl_daemon_fail(struct iscsi_softc *sc, + struct iscsi_daemon_fail *fail) +{ + struct iscsi_session *is; + + sx_slock(&sc->sc_lock); + + TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { + if (is->is_id == fail->idf_session_id) + break; + } + if (is == NULL) { + sx_sunlock(&sc->sc_lock); + return (ESRCH); + } + ISCSI_SESSION_LOCK(is); + ISCSI_SESSION_DEBUG(is, "iscsid(8) failed: %s", + fail->idf_reason); + strlcpy(is->is_reason, fail->idf_reason, sizeof(is->is_reason)); + //is->is_waiting_for_iscsid = false; + //is->is_login_phase = true; + //iscsi_session_reconnect(is); + ISCSI_SESSION_UNLOCK(is); + sx_sunlock(&sc->sc_lock); + + return (0); +} + +#ifdef ICL_KERNEL_PROXY +static int +iscsi_ioctl_daemon_connect(struct iscsi_softc *sc, + struct iscsi_daemon_connect *idc) +{ + struct iscsi_session *is; + struct sockaddr *from_sa, *to_sa; + int error; + + sx_slock(&sc->sc_lock); + TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { + if (is->is_id == idc->idc_session_id) + break; + } + if (is == NULL) { + sx_sunlock(&sc->sc_lock); + return (ESRCH); + } + sx_sunlock(&sc->sc_lock); + + if (idc->idc_from_addrlen > 0) { + error = getsockaddr(&from_sa, (void *)idc->idc_from_addr, idc->idc_from_addrlen); + if (error != 0) + return (error); + } else { + from_sa = NULL; + } + error = getsockaddr(&to_sa, (void *)idc->idc_to_addr, idc->idc_to_addrlen); + if (error != 0) { + free(from_sa, M_SONAME); + return (error); + } + + ISCSI_SESSION_LOCK(is); + is->is_waiting_for_iscsid = false; + is->is_login_phase = true; + is->is_timeout = 0; + ISCSI_SESSION_UNLOCK(is); + + error = icl_conn_connect(is->is_conn, idc->idc_iser, idc->idc_domain, + idc->idc_socktype, idc->idc_protocol, from_sa, to_sa); + free(from_sa, M_SONAME); + free(to_sa, M_SONAME); + + /* + * Digests are always disabled during login phase. + */ + is->is_conn->ic_header_crc32c = false; + is->is_conn->ic_data_crc32c = false; + + return (error); +} + +static int +iscsi_ioctl_daemon_send(struct iscsi_softc *sc, + struct iscsi_daemon_send *ids) +{ + struct iscsi_session *is; + struct icl_pdu *ip; + size_t datalen; + void *data; + int error; + + sx_slock(&sc->sc_lock); + TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { + if (is->is_id == ids->ids_session_id) + break; + } + if (is == NULL) { + sx_sunlock(&sc->sc_lock); + return (ESRCH); + } + sx_sunlock(&sc->sc_lock); + + if (is->is_login_phase == false) + return (EBUSY); + + if (is->is_terminating || is->is_reconnecting) + return (EIO); + + datalen = ids->ids_data_segment_len; + if (datalen > ISCSI_MAX_DATA_SEGMENT_LENGTH) + return (EINVAL); + if (datalen > 0) { + data = malloc(datalen, M_ISCSI, M_WAITOK); + error = copyin(ids->ids_data_segment, data, datalen); + if (error != 0) { + free(data, M_ISCSI); + return (error); + } + } + + ip = icl_pdu_new_bhs(is->is_conn, M_WAITOK); + memcpy(ip->ip_bhs, ids->ids_bhs, sizeof(*ip->ip_bhs)); + if (datalen > 0) { + error = icl_pdu_append_data(ip, data, datalen, M_WAITOK); + KASSERT(error == 0, ("icl_pdu_append_data(..., M_WAITOK) failed")); + free(data, M_ISCSI); + } + icl_pdu_queue(ip); + + return (0); +} + +static int +iscsi_ioctl_daemon_receive(struct iscsi_softc *sc, + struct iscsi_daemon_receive *idr) +{ + struct iscsi_session *is; + struct icl_pdu *ip; + void *data; + + sx_slock(&sc->sc_lock); + TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { + if (is->is_id == idr->idr_session_id) + break; + } + if (is == NULL) { + sx_sunlock(&sc->sc_lock); + return (ESRCH); + } + sx_sunlock(&sc->sc_lock); + + if (is->is_login_phase == false) + return (EBUSY); + + ISCSI_SESSION_LOCK(is); + while (is->is_login_pdu == NULL && + is->is_terminating == false && + is->is_reconnecting == false) + cv_wait(&is->is_login_cv, &is->is_lock); + if (is->is_terminating || is->is_reconnecting) { + ISCSI_SESSION_UNLOCK(is); + return (EIO); + } + ip = is->is_login_pdu; + is->is_login_pdu = NULL; + ISCSI_SESSION_UNLOCK(is); + + if (ip->ip_data_len > idr->idr_data_segment_len) { + icl_pdu_free(ip); + return (EMSGSIZE); + } + + copyout(ip->ip_bhs, idr->idr_bhs, sizeof(*ip->ip_bhs)); + if (ip->ip_data_len > 0) { + data = malloc(ip->ip_data_len, M_ISCSI, M_WAITOK); + icl_pdu_get_data(ip, 0, data, ip->ip_data_len); + copyout(data, idr->idr_data_segment, ip->ip_data_len); + free(data, M_ISCSI); + } + + icl_pdu_free(ip); + + return (0); +} + +static int +iscsi_ioctl_daemon_close(struct iscsi_softc *sc, + struct iscsi_daemon_close *idc) +{ + struct iscsi_session *is; + + sx_slock(&sc->sc_lock); + TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { + if (is->is_id == idc->idc_session_id) + break; + } + if (is == NULL) { + sx_sunlock(&sc->sc_lock); + return (ESRCH); + } + sx_sunlock(&sc->sc_lock); + + iscsi_session_reconnect(is); + + return (0); +} +#endif /* ICL_KERNEL_PROXY */ + +static void +iscsi_sanitize_session_conf(struct iscsi_session_conf *isc) +{ + /* + * Just make sure all the fields are null-terminated. + * + * XXX: This is not particularly secure. We should + * create our own conf and then copy in relevant + * fields. + */ + isc->isc_initiator[ISCSI_NAME_LEN - 1] = '\0'; + isc->isc_initiator_addr[ISCSI_ADDR_LEN - 1] = '\0'; + isc->isc_initiator_alias[ISCSI_ALIAS_LEN - 1] = '\0'; + isc->isc_target[ISCSI_NAME_LEN - 1] = '\0'; + isc->isc_target_addr[ISCSI_ADDR_LEN - 1] = '\0'; + isc->isc_user[ISCSI_NAME_LEN - 1] = '\0'; + isc->isc_secret[ISCSI_SECRET_LEN - 1] = '\0'; + isc->isc_mutual_user[ISCSI_NAME_LEN - 1] = '\0'; + isc->isc_mutual_secret[ISCSI_SECRET_LEN - 1] = '\0'; +} + +static int +iscsi_ioctl_session_add(struct iscsi_softc *sc, struct iscsi_session_add *isa) +{ + struct iscsi_session *is; + const struct iscsi_session *is2; + int error; + + iscsi_sanitize_session_conf(&isa->isa_conf); + + is = malloc(sizeof(*is), M_ISCSI, M_ZERO | M_WAITOK); + memcpy(&is->is_conf, &isa->isa_conf, sizeof(is->is_conf)); + + if (is->is_conf.isc_initiator[0] == '\0' || + is->is_conf.isc_target == '\0' || + is->is_conf.isc_target_addr == '\0') { + free(is, M_ISCSI); + return (EINVAL); + } + + sx_xlock(&sc->sc_lock); + + /* + * Prevent duplicates. + */ + TAILQ_FOREACH(is2, &sc->sc_sessions, is_next) { + if (strcmp(is2->is_conf.isc_target, + is->is_conf.isc_target) == 0) { + sx_xunlock(&sc->sc_lock); + return (EBUSY); + } + } + + is->is_conn = icl_conn_new(); + is->is_conn->ic_receive = iscsi_receive_callback; + is->is_conn->ic_error = iscsi_error_callback; + is->is_conn->ic_prv0 = is; + TAILQ_INIT(&is->is_outstanding); + TAILQ_INIT(&is->is_postponed); + mtx_init(&is->is_lock, "iscsi_lock", NULL, MTX_DEF); + cv_init(&is->is_maintenance_cv, "iscsi_mt"); +#ifdef ICL_KERNEL_PROXY + cv_init(&is->is_login_cv, "iscsi_login"); +#endif + + is->is_softc = sc; + sc->sc_last_session_id++; + is->is_id = sc->sc_last_session_id; + callout_init(&is->is_callout, 1); + callout_reset(&is->is_callout, 1 * hz, iscsi_callout, is); + TAILQ_INSERT_TAIL(&sc->sc_sessions, is, is_next); + + error = kthread_add(iscsi_maintenance_thread, is, NULL, NULL, 0, 0, "iscsimt"); + if (error != 0) { + ISCSI_SESSION_WARN(is, "kthread_add(9) failed with error %d", error); + return (error); + } + + /* + * Trigger immediate reconnection. + */ + is->is_waiting_for_iscsid = true; + strlcpy(is->is_reason, "Waiting for iscsid(8)", sizeof(is->is_reason)); + cv_signal(&sc->sc_cv); + + sx_xunlock(&sc->sc_lock); + + return (0); +} + +static bool +iscsi_session_conf_matches(unsigned int id1, const struct iscsi_session_conf *c1, + unsigned int id2, const struct iscsi_session_conf *c2) +{ + if (id2 == 0 && c2->isc_target[0] == '\0' && + c2->isc_target_addr[0] == '\0') + return (true); + if (id2 != 0 && id2 == id1) + return (true); + if (c2->isc_target[0] != '\0' && + strcmp(c1->isc_target, c2->isc_target) == 0) + return (true); + if (c2->isc_target_addr[0] != '\0' && + strcmp(c1->isc_target_addr, c2->isc_target_addr) == 0) + return (true); + return (false); +} + +static int +iscsi_ioctl_session_remove(struct iscsi_softc *sc, + struct iscsi_session_remove *isr) +{ + struct iscsi_session *is, *tmp; + bool found = false; + + iscsi_sanitize_session_conf(&isr->isr_conf); + + sx_xlock(&sc->sc_lock); + TAILQ_FOREACH_SAFE(is, &sc->sc_sessions, is_next, tmp) { + ISCSI_SESSION_LOCK(is); + if (iscsi_session_conf_matches(is->is_id, &is->is_conf, + isr->isr_session_id, &isr->isr_conf)) { + found = true; + iscsi_session_logout(is); + iscsi_session_terminate(is); + } + ISCSI_SESSION_UNLOCK(is); + } + sx_xunlock(&sc->sc_lock); + + if (!found) + return (ESRCH); + + return (0); +} + +static int +iscsi_ioctl_session_list(struct iscsi_softc *sc, struct iscsi_session_list *isl) +{ + int error; + unsigned int i = 0; + struct iscsi_session *is; + struct iscsi_session_state iss; + + sx_slock(&sc->sc_lock); + TAILQ_FOREACH(is, &sc->sc_sessions, is_next) { + if (i >= isl->isl_nentries) { + sx_sunlock(&sc->sc_lock); + return (EMSGSIZE); + } + memset(&iss, 0, sizeof(iss)); + memcpy(&iss.iss_conf, &is->is_conf, sizeof(iss.iss_conf)); + iss.iss_id = is->is_id; + strlcpy(iss.iss_target_alias, is->is_target_alias, sizeof(iss.iss_target_alias)); + strlcpy(iss.iss_reason, is->is_reason, sizeof(iss.iss_reason)); + + if (is->is_conn->ic_header_crc32c) + iss.iss_header_digest = ISCSI_DIGEST_CRC32C; + else + iss.iss_header_digest = ISCSI_DIGEST_NONE; + + if (is->is_conn->ic_data_crc32c) + iss.iss_data_digest = ISCSI_DIGEST_CRC32C; + else + iss.iss_data_digest = ISCSI_DIGEST_NONE; + + iss.iss_max_data_segment_length = is->is_max_data_segment_length; + iss.iss_immediate_data = is->is_immediate_data; + iss.iss_connected = is->is_connected; + + error = copyout(&iss, isl->isl_pstates + i, sizeof(iss)); + if (error != 0) { + sx_sunlock(&sc->sc_lock); + return (error); + } + i++; + } + sx_sunlock(&sc->sc_lock); + + isl->isl_nentries = i; + + return (0); +} + +static int +iscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int mode, + struct thread *td) +{ + struct iscsi_softc *sc; + + sc = dev->si_drv1; + + switch (cmd) { + case ISCSIDWAIT: + return (iscsi_ioctl_daemon_wait(sc, + (struct iscsi_daemon_request *)arg)); + case ISCSIDHANDOFF: + return (iscsi_ioctl_daemon_handoff(sc, + (struct iscsi_daemon_handoff *)arg)); + case ISCSIDFAIL: + return (iscsi_ioctl_daemon_fail(sc, + (struct iscsi_daemon_fail *)arg)); +#ifdef ICL_KERNEL_PROXY + case ISCSIDCONNECT: + return (iscsi_ioctl_daemon_connect(sc, + (struct iscsi_daemon_connect *)arg)); + case ISCSIDSEND: + return (iscsi_ioctl_daemon_send(sc, + (struct iscsi_daemon_send *)arg)); + case ISCSIDRECEIVE: + return (iscsi_ioctl_daemon_receive(sc, + (struct iscsi_daemon_receive *)arg)); + case ISCSIDCLOSE: + return (iscsi_ioctl_daemon_close(sc, + (struct iscsi_daemon_close *)arg)); +#endif /* ICL_KERNEL_PROXY */ + case ISCSISADD: + return (iscsi_ioctl_session_add(sc, + (struct iscsi_session_add *)arg)); + case ISCSISREMOVE: + return (iscsi_ioctl_session_remove(sc, + (struct iscsi_session_remove *)arg)); + case ISCSISLIST: + return (iscsi_ioctl_session_list(sc, + (struct iscsi_session_list *)arg)); + default: + return (EINVAL); + } +} + +static uint64_t +iscsi_encode_lun(uint32_t lun) +{ + uint8_t encoded[8]; + uint64_t result; + + memset(encoded, 0, sizeof(encoded)); + + if (lun < 256) { + /* + * Peripheral device addressing. + */ + encoded[1] = lun; + } else if (lun < 16384) { + /* + * Flat space addressing. + */ + encoded[0] = 0x40; + encoded[0] |= (lun >> 8) & 0x3f; + encoded[1] = lun & 0xff; + } else { + /* + * Extended flat space addressing. + */ + encoded[0] = 0xd2; + encoded[1] = lun >> 16; + encoded[2] = lun >> 8; + encoded[3] = lun; + } + + memcpy(&result, encoded, sizeof(result)); + return (result); +} + +static struct iscsi_outstanding * +iscsi_outstanding_find(struct iscsi_session *is, uint32_t initiator_task_tag) +{ + struct iscsi_outstanding *io; + + ISCSI_SESSION_LOCK_ASSERT(is); + + TAILQ_FOREACH(io, &is->is_outstanding, io_next) { + if (io->io_initiator_task_tag == initiator_task_tag) + return (io); + } + return (NULL); +} + +static int +iscsi_outstanding_add(struct iscsi_session *is, + uint32_t initiator_task_tag, union ccb *ccb) +{ + struct iscsi_outstanding *io; + + ISCSI_SESSION_LOCK_ASSERT(is); + + KASSERT(iscsi_outstanding_find(is, initiator_task_tag) == NULL, + ("initiator_task_tag 0x%x already added", initiator_task_tag)); + + io = uma_zalloc(iscsi_outstanding_zone, M_NOWAIT | M_ZERO); + if (io == NULL) { + ISCSI_SESSION_WARN(is, "failed to allocate %zd bytes", sizeof(*io)); + return (ENOMEM); + } + io->io_initiator_task_tag = initiator_task_tag; + io->io_ccb = ccb; + TAILQ_INSERT_TAIL(&is->is_outstanding, io, io_next); + return (0); +} + +static void +iscsi_outstanding_remove(struct iscsi_session *is, struct iscsi_outstanding *io) +{ + + ISCSI_SESSION_LOCK_ASSERT(is); + + TAILQ_REMOVE(&is->is_outstanding, io, io_next); + uma_zfree(iscsi_outstanding_zone, io); +} + +static void +iscsi_action_scsiio(struct iscsi_session *is, union ccb *ccb) +{ + struct icl_pdu *request; + struct iscsi_bhs_scsi_command *bhssc; + struct ccb_scsiio *csio; + size_t len; + int error; + + ISCSI_SESSION_LOCK_ASSERT(is); + +#if 0 + KASSERT(is->is_login_phase == false, ("%s called during Login Phase", __func__)); +#else + if (is->is_login_phase) { + ISCSI_SESSION_DEBUG(is, "called during login phase"); + if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { + xpt_freeze_devq(ccb->ccb_h.path, 1); + ISCSI_SESSION_DEBUG(is, "freezing devq"); + } + ccb->ccb_h.status = CAM_REQ_ABORTED | CAM_DEV_QFRZN; + xpt_done(ccb); + return; + } +#endif + + request = icl_pdu_new_bhs(is->is_conn, M_NOWAIT); + if (request == NULL) { + if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { + xpt_freeze_devq(ccb->ccb_h.path, 1); + ISCSI_SESSION_DEBUG(is, "freezing devq"); + } + ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN; + xpt_done(ccb); + return; + } + + csio = &ccb->csio; + bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs; + bhssc->bhssc_opcode = ISCSI_BHS_OPCODE_SCSI_COMMAND; + bhssc->bhssc_flags |= BHSSC_FLAGS_F; + switch (csio->ccb_h.flags & CAM_DIR_MASK) { + case CAM_DIR_IN: + bhssc->bhssc_flags |= BHSSC_FLAGS_R; + break; + case CAM_DIR_OUT: + bhssc->bhssc_flags |= BHSSC_FLAGS_W; + break; + } + + switch (csio->tag_action) { + case MSG_HEAD_OF_Q_TAG: + bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_HOQ; + break; + break; + case MSG_ORDERED_Q_TAG: + bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_ORDERED; + break; + case MSG_ACA_TASK: + bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_ACA; + break; + case CAM_TAG_ACTION_NONE: + case MSG_SIMPLE_Q_TAG: + default: + bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_SIMPLE; + break; + } + + bhssc->bhssc_lun = iscsi_encode_lun(csio->ccb_h.target_lun); + bhssc->bhssc_initiator_task_tag = is->is_initiator_task_tag; + is->is_initiator_task_tag++; + bhssc->bhssc_expected_data_transfer_length = htonl(csio->dxfer_len); + KASSERT(csio->cdb_len <= sizeof(bhssc->bhssc_cdb), + ("unsupported CDB size %zd", (size_t)csio->cdb_len)); + + if (csio->ccb_h.flags & CAM_CDB_POINTER) + memcpy(&bhssc->bhssc_cdb, csio->cdb_io.cdb_ptr, csio->cdb_len); + else + memcpy(&bhssc->bhssc_cdb, csio->cdb_io.cdb_bytes, csio->cdb_len); + + error = iscsi_outstanding_add(is, bhssc->bhssc_initiator_task_tag, ccb); + if (error != 0) { + icl_pdu_free(request); + if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { + xpt_freeze_devq(ccb->ccb_h.path, 1); + ISCSI_SESSION_DEBUG(is, "freezing devq"); + } + ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN; + xpt_done(ccb); + return; + } + + if (is->is_immediate_data && + (csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_OUT) { + len = csio->dxfer_len; + //ISCSI_SESSION_DEBUG(is, "adding %zd of immediate data", len); + if (len > is->is_first_burst_length) { + ISCSI_SESSION_DEBUG(is, "len %zd -> %zd", len, is->is_first_burst_length); + len = is->is_first_burst_length; + } + + error = icl_pdu_append_data(request, csio->data_ptr, len, M_NOWAIT); + if (error != 0) { + icl_pdu_free(request); + if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) { + xpt_freeze_devq(ccb->ccb_h.path, 1); + ISCSI_SESSION_DEBUG(is, "freezing devq"); + } + ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN; + xpt_done(ccb); + return; + } + } + iscsi_pdu_queue_locked(request); +} + +static void +iscsi_action(struct cam_sim *sim, union ccb *ccb) +{ + struct iscsi_session *is; + + is = cam_sim_softc(sim); + + ISCSI_SESSION_LOCK_ASSERT(is); + + if (is->is_terminating) { + ISCSI_SESSION_DEBUG(is, "called during termination"); + ccb->ccb_h.status = CAM_DEV_NOT_THERE; + xpt_done(ccb); + return; + } + + switch (ccb->ccb_h.func_code) { + case XPT_PATH_INQ: + { + struct ccb_pathinq *cpi = &ccb->cpi; + + cpi->version_num = 1; + cpi->hba_inquiry = PI_TAG_ABLE; + cpi->target_sprt = 0; + //cpi->hba_misc = PIM_NOBUSRESET; + cpi->hba_misc = 0; + cpi->hba_eng_cnt = 0; + cpi->max_target = 0; + cpi->max_lun = 255; + //cpi->initiator_id = 0; /* XXX */ + cpi->initiator_id = 64; /* XXX */ + strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); + strncpy(cpi->hba_vid, "iSCSI", HBA_IDLEN); + strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); + cpi->unit_number = cam_sim_unit(sim); + cpi->bus_id = cam_sim_bus(sim); + cpi->base_transfer_speed = 150000; /* XXX */ + cpi->transport = XPORT_ISCSI; + cpi->transport_version = 0; + cpi->protocol = PROTO_SCSI; + cpi->protocol_version = SCSI_REV_SPC3; + cpi->maxio = MAXPHYS; + cpi->ccb_h.status = CAM_REQ_CMP; + break; + } + case XPT_CALC_GEOMETRY: + cam_calc_geometry(&ccb->ccg, /*extended*/1); + ccb->ccb_h.status = CAM_REQ_CMP; + break; +#if 0 + /* + * XXX: What's the point? + */ + case XPT_RESET_BUS: + case XPT_ABORT: + case XPT_TERM_IO: + ISCSI_SESSION_DEBUG(is, "faking success for reset, abort, or term_io"); + ccb->ccb_h.status = CAM_REQ_CMP; + break; +#endif + case XPT_SCSI_IO: + iscsi_action_scsiio(is, ccb); + return; + default: +#if 0 + ISCSI_SESSION_DEBUG(is, "got unsupported code 0x%x", ccb->ccb_h.func_code); +#endif + ccb->ccb_h.status = CAM_FUNC_NOTAVAIL; + break; + } + xpt_done(ccb); +} + +static void +iscsi_poll(struct cam_sim *sim) +{ + + KASSERT(0, ("%s: you're not supposed to be here", __func__)); +} + +static void +iscsi_shutdown(struct iscsi_softc *sc) +{ + struct iscsi_session *is; + + ISCSI_DEBUG("removing all sessions due to shutdown"); + + sx_slock(&sc->sc_lock); + TAILQ_FOREACH(is, &sc->sc_sessions, is_next) + iscsi_session_terminate(is); + sx_sunlock(&sc->sc_lock); +} + +static int +iscsi_load(void) +{ + int error; + + sc = malloc(sizeof(*sc), M_ISCSI, M_ZERO | M_WAITOK); + sx_init(&sc->sc_lock, "iscsi"); + TAILQ_INIT(&sc->sc_sessions); + cv_init(&sc->sc_cv, "iscsi_cv"); + + iscsi_outstanding_zone = uma_zcreate("iscsi_outstanding", + sizeof(struct iscsi_outstanding), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + + error = make_dev_p(MAKEDEV_CHECKNAME, &sc->sc_cdev, &iscsi_cdevsw, + NULL, UID_ROOT, GID_WHEEL, 0600, "iscsi"); + if (error != 0) { + ISCSI_WARN("failed to create device node, error %d", error); + sx_destroy(&sc->sc_lock); + cv_destroy(&sc->sc_cv); + uma_zdestroy(iscsi_outstanding_zone); + free(sc, M_ISCSI); + return (error); + } + sc->sc_cdev->si_drv1 = sc; + + /* + * XXX: For some reason this doesn't do its job; active sessions still hang out there + * after final sync, making the reboot effectively hang. + */ + sc->sc_shutdown_eh = EVENTHANDLER_REGISTER(shutdown_post_sync, iscsi_shutdown, sc, SHUTDOWN_PRI_DEFAULT); + + return (0); +} + +static int +iscsi_unload(void) +{ + /* + * XXX: kldunload hangs on "devdrn". + */ + struct iscsi_session *is, *tmp; + + ISCSI_DEBUG("removing device node"); + destroy_dev(sc->sc_cdev); + ISCSI_DEBUG("device node removed"); + + EVENTHANDLER_DEREGISTER(shutdown_post_sync, sc->sc_shutdown_eh); + + sx_slock(&sc->sc_lock); + TAILQ_FOREACH_SAFE(is, &sc->sc_sessions, is_next, tmp) + iscsi_session_terminate(is); + while(!TAILQ_EMPTY(&sc->sc_sessions)) { + ISCSI_DEBUG("waiting for sessions to terminate"); + cv_wait(&sc->sc_cv, &sc->sc_lock); + } + ISCSI_DEBUG("all sessions terminated"); + sx_sunlock(&sc->sc_lock); + + uma_zdestroy(iscsi_outstanding_zone); + sx_destroy(&sc->sc_lock); + cv_destroy(&sc->sc_cv); + free(sc, M_ISCSI); + return (0); +} + +static int +iscsi_quiesce(void) +{ + sx_slock(&sc->sc_lock); + if (!TAILQ_EMPTY(&sc->sc_sessions)) { + sx_sunlock(&sc->sc_lock); + return (EBUSY); + } + sx_sunlock(&sc->sc_lock); + return (0); +} + +static int +iscsi_modevent(module_t mod, int what, void *arg) +{ + int error; + + switch (what) { + case MOD_LOAD: + error = iscsi_load(); + break; + case MOD_UNLOAD: + error = iscsi_unload(); + break; + case MOD_QUIESCE: + error = iscsi_quiesce(); + break; + default: + error = EINVAL; + break; + } + return (error); +} + +moduledata_t iscsi_data = { + "iscsi", + iscsi_modevent, + 0 +}; + +DECLARE_MODULE(iscsi, iscsi_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); +MODULE_DEPEND(iscsi, cam, 1, 1, 1); +MODULE_DEPEND(iscsi, icl, 1, 1, 1); diff --git a/sys/dev/iscsi/iscsi.h b/sys/dev/iscsi/iscsi.h new file mode 100644 index 000000000000..dd78852b4716 --- /dev/null +++ b/sys/dev/iscsi/iscsi.h @@ -0,0 +1,135 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Edward Tomasz Napierala under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef ISCSI_H +#define ISCSI_H + +struct iscsi_softc; +struct icl_conn; + +#define ISCSI_NAME_LEN 224 /* 223 bytes, by RFC 3720, + '\0' */ +#define ISCSI_ADDR_LEN 47 /* INET6_ADDRSTRLEN + '\0' */ +#define ISCSI_SECRET_LEN 17 /* 16 + '\0' */ + +struct iscsi_outstanding { + TAILQ_ENTRY(iscsi_outstanding) io_next; + union ccb *io_ccb; + size_t io_received; + uint32_t io_initiator_task_tag; + uint32_t io_datasn; +}; + +struct iscsi_session { + TAILQ_ENTRY(iscsi_session) is_next; + + struct icl_conn *is_conn; + struct mtx is_lock; + + uint32_t is_statsn; + uint32_t is_cmdsn; + uint32_t is_expcmdsn; + uint32_t is_maxcmdsn; + uint32_t is_initiator_task_tag; + int is_header_digest; + int is_data_digest; + int is_initial_r2t; + size_t is_max_burst_length; + size_t is_first_burst_length; + uint8_t is_isid[6]; + bool is_immediate_data; + size_t is_max_data_segment_length; + char is_target_alias[ISCSI_ALIAS_LEN]; + + TAILQ_HEAD(, iscsi_outstanding) is_outstanding; + TAILQ_HEAD(, icl_pdu) is_postponed; + + struct callout is_callout; + unsigned int is_timeout; + + /* + * XXX: This could be rewritten using a single variable, + * but somehow it results in uglier code. + */ + /* + * We're waiting for iscsid(8); after iscsid_timeout + * expires, kernel will wake up an iscsid(8) to handle + * the session. + */ + bool is_waiting_for_iscsid; + + /* + * Some iscsid(8) instance is handling the session; + * after login_timeout expires, kernel will wake up + * another iscsid(8) to handle the session. + */ + bool is_login_phase; + + /* + * We're in the process of removing the iSCSI session. + */ + bool is_terminating; + + /* + * We're waiting for the maintenance thread to do some + * reconnection tasks. + */ + bool is_reconnecting; + + bool is_connected; + + struct cam_devq *is_devq; + struct cam_sim *is_sim; + struct cam_path *is_path; + struct cv is_maintenance_cv; + struct iscsi_softc *is_softc; + unsigned int is_id; + struct iscsi_session_conf is_conf; + bool is_simq_frozen; + + char is_reason[ISCSI_REASON_LEN]; + +#ifdef ICL_KERNEL_PROXY + struct cv is_login_cv;; + struct icl_pdu *is_login_pdu; +#endif +}; + +struct iscsi_softc { + device_t sc_dev; + struct sx sc_lock; + struct cdev *sc_cdev; + TAILQ_HEAD(, iscsi_session) sc_sessions; + struct cv sc_cv; + unsigned int sc_last_session_id; + eventhandler_tag sc_shutdown_eh; +}; + +#endif /* !ISCSI_H */ diff --git a/sys/dev/iscsi/iscsi_ioctl.h b/sys/dev/iscsi/iscsi_ioctl.h new file mode 100644 index 000000000000..9e278445d815 --- /dev/null +++ b/sys/dev/iscsi/iscsi_ioctl.h @@ -0,0 +1,201 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Edward Tomasz Napierala under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef ISCSI_IOCTL_H +#define ISCSI_IOCTL_H + +#ifdef ICL_KERNEL_PROXY +#include <sys/socket.h> +#endif + +#define ISCSI_PATH "/dev/iscsi" +#define ISCSI_MAX_DATA_SEGMENT_LENGTH (128 * 1024) + +#define ISCSI_NAME_LEN 224 /* 223 bytes, by RFC 3720, + '\0' */ +#define ISCSI_ADDR_LEN 47 /* INET6_ADDRSTRLEN + '\0' */ +#define ISCSI_ALIAS_LEN 256 /* XXX: Where did it come from? */ +#define ISCSI_SECRET_LEN 17 /* 16 + '\0' */ +#define ISCSI_REASON_LEN 32 + +#define ISCSI_DIGEST_NONE 0 +#define ISCSI_DIGEST_CRC32C 1 + +/* + * Session configuration, set when adding the session. + */ +struct iscsi_session_conf { + char isc_initiator[ISCSI_NAME_LEN]; + char isc_initiator_addr[ISCSI_ADDR_LEN]; + char isc_initiator_alias[ISCSI_ALIAS_LEN]; + char isc_target[ISCSI_NAME_LEN]; + char isc_target_addr[ISCSI_ADDR_LEN]; + char isc_user[ISCSI_NAME_LEN]; + char isc_secret[ISCSI_SECRET_LEN]; + char isc_mutual_user[ISCSI_NAME_LEN]; + char isc_mutual_secret[ISCSI_SECRET_LEN]; + int isc_discovery; + int isc_header_digest; + int isc_data_digest; + int isc_iser; + int isc_spare[4]; +}; + +/* + * Session state, negotiated by iscsid(8) and queried by iscsictl(8). + */ +struct iscsi_session_state { + struct iscsi_session_conf iss_conf; + unsigned int iss_id; + char iss_target_alias[ISCSI_ALIAS_LEN]; + int iss_header_digest; + int iss_data_digest; + int iss_max_data_segment_length; + int iss_immediate_data; + int iss_connected; + char iss_reason[ISCSI_REASON_LEN]; + int iss_spare[4]; +}; + +/* + * For use with iscsid(8). + */ + +struct iscsi_daemon_request { + unsigned int idr_session_id; + struct iscsi_session_conf idr_conf; + int idr_spare[4]; +}; + +struct iscsi_daemon_handoff { + unsigned int idh_session_id; + int idh_socket; + char idh_target_alias[ISCSI_ALIAS_LEN]; + uint8_t idh_isid[6]; + uint32_t idh_statsn; + int idh_header_digest; + int idh_data_digest; + int idh_initial_r2t; + int idh_immediate_data; + size_t idh_max_data_segment_length; + size_t idh_max_burst_length; + size_t idh_first_burst_length; +}; + +struct iscsi_daemon_fail { + unsigned int idf_session_id; + char idf_reason[ISCSI_REASON_LEN]; +}; + +#define ISCSIDWAIT _IOR('I', 0x01, struct iscsi_daemon_request) +#define ISCSIDHANDOFF _IOW('I', 0x02, struct iscsi_daemon_handoff) +#define ISCSIDFAIL _IOW('I', 0x03, struct iscsi_daemon_fail) + +#ifdef ICL_KERNEL_PROXY + +/* + * When ICL_KERNEL_PROXY is not defined, the iscsid(8) is responsible + * for creating the socket, connecting, performing Login Phase using + * socked in the usual userspace way, and then passing the socket file + * descriptor to the kernel part using ISCSIDHANDOFF. + * + * When ICL_KERNEL_PROXY is defined, the iscsid(8) creates the session + * using ISCSICONNECT, performs Login Phase using ISCSISEND/ISCSIRECEIVE + * instead of read(2)/write(2), and then calls ISCSIDHANDOFF with + * idh_socket set to 0. + * + * The purpose of ICL_KERNEL_PROXY is to workaround the fact that, + * at this time, it's not possible to do iWARP (RDMA) in userspace. + */ + +struct iscsi_daemon_connect { + int idc_session_id; + int idc_iser; + int idc_domain; + int idc_socktype; + int idc_protocol; + struct sockaddr *idc_from_addr; + socklen_t idc_from_addrlen; + struct sockaddr *idc_to_addr; + socklen_t idc_to_addrlen; +}; + +struct iscsi_daemon_send { + int ids_session_id; + void *ids_bhs; + size_t ids_spare; + void *ids_spare2; + size_t ids_data_segment_len; + void *ids_data_segment; +}; + +struct iscsi_daemon_receive { + int idr_session_id; + void *idr_bhs; + size_t idr_spare; + void *idr_spare2; + size_t idr_data_segment_len; + void *idr_data_segment; +}; + +struct iscsi_daemon_close { + int idc_session_id; +}; + +#define ISCSIDCONNECT _IOWR('I', 0x04, struct iscsi_daemon_connect) +#define ISCSIDSEND _IOWR('I', 0x05, struct iscsi_daemon_send) +#define ISCSIDRECEIVE _IOWR('I', 0x06, struct iscsi_daemon_receive) +#define ISCSIDCLOSE _IOWR('I', 0x07, struct iscsi_daemon_close) + +#endif /* ICL_KERNEL_PROXY */ + +/* + * For use with iscsictl(8). + */ + +struct iscsi_session_add { + struct iscsi_session_conf isa_conf; +}; + +struct iscsi_session_remove { + unsigned int isr_session_id; + struct iscsi_session_conf isr_conf; +}; + +struct iscsi_session_list { + unsigned int isl_nentries; + struct iscsi_session_state *isl_pstates; +}; + +#define ISCSISADD _IOW('I', 0x11, struct iscsi_session_add) +#define ISCSISREMOVE _IOW('I', 0x12, struct iscsi_session_remove) +#define ISCSISLIST _IOWR('I', 0x13, struct iscsi_session_list) + +#endif /* !ISCSI_IOCTL_H */ diff --git a/sys/dev/iscsi/iscsi_proto.h b/sys/dev/iscsi/iscsi_proto.h new file mode 100644 index 000000000000..97d73a7a074e --- /dev/null +++ b/sys/dev/iscsi/iscsi_proto.h @@ -0,0 +1,439 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Edward Tomasz Napierala under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef ISCSI_PROTO_H +#define ISCSI_PROTO_H + +#ifndef CTASSERT +#define CTASSERT(x) _CTASSERT(x, __LINE__) +#define _CTASSERT(x, y) __CTASSERT(x, y) +#define __CTASSERT(x, y) typedef char __assert_ ## y [(x) ? 1 : -1] +#endif + +#define ISCSI_BHS_SIZE 48 +#define ISCSI_HEADER_DIGEST_SIZE 4 +#define ISCSI_DATA_DIGEST_SIZE 4 + +#define ISCSI_BHS_OPCODE_IMMEDIATE 0x40 + +#define ISCSI_BHS_OPCODE_NOP_OUT 0x00 +#define ISCSI_BHS_OPCODE_SCSI_COMMAND 0x01 +#define ISCSI_BHS_OPCODE_TASK_REQUEST 0x02 +#define ISCSI_BHS_OPCODE_LOGIN_REQUEST 0x03 +#define ISCSI_BHS_OPCODE_TEXT_REQUEST 0x04 +#define ISCSI_BHS_OPCODE_SCSI_DATA_OUT 0x05 +#define ISCSI_BHS_OPCODE_LOGOUT_REQUEST 0x06 + +#define ISCSI_BHS_OPCODE_NOP_IN 0x20 +#define ISCSI_BHS_OPCODE_SCSI_RESPONSE 0x21 +#define ISCSI_BHS_OPCODE_TASK_RESPONSE 0x22 +#define ISCSI_BHS_OPCODE_LOGIN_RESPONSE 0x23 +#define ISCSI_BHS_OPCODE_TEXT_RESPONSE 0x24 +#define ISCSI_BHS_OPCODE_SCSI_DATA_IN 0x25 +#define ISCSI_BHS_OPCODE_LOGOUT_RESPONSE 0x26 +#define ISCSI_BHS_OPCODE_R2T 0x31 +#define ISCSI_BHS_OPCODE_ASYNC_MESSAGE 0x32 +#define ISCSI_BHS_OPCODE_REJECT 0x3f + +struct iscsi_bhs { + uint8_t bhs_opcode; + uint8_t bhs_opcode_specific1[3]; + uint8_t bhs_total_ahs_len; + uint8_t bhs_data_segment_len[3]; + uint64_t bhs_lun; + uint8_t bhs_inititator_task_tag[4]; + uint8_t bhs_opcode_specific4[28]; +}; +CTASSERT(sizeof(struct iscsi_bhs) == ISCSI_BHS_SIZE); + +#define BHSSC_FLAGS_F 0x80 +#define BHSSC_FLAGS_R 0x40 +#define BHSSC_FLAGS_W 0x20 +#define BHSSC_FLAGS_ATTR 0x07 + +#define BHSSC_FLAGS_ATTR_UNTAGGED 0 +#define BHSSC_FLAGS_ATTR_SIMPLE 1 +#define BHSSC_FLAGS_ATTR_ORDERED 2 +#define BHSSC_FLAGS_ATTR_HOQ 3 +#define BHSSC_FLAGS_ATTR_ACA 4 + +struct iscsi_bhs_scsi_command { + uint8_t bhssc_opcode; + uint8_t bhssc_flags; + uint8_t bhssc_reserved[2]; + uint8_t bhssc_total_ahs_len; + uint8_t bhssc_data_segment_len[3]; + uint64_t bhssc_lun; + uint32_t bhssc_initiator_task_tag; + uint32_t bhssc_expected_data_transfer_length; + uint32_t bhssc_cmdsn; + uint32_t bhssc_expstatsn; + uint8_t bhssc_cdb[16]; +}; +CTASSERT(sizeof(struct iscsi_bhs_scsi_command) == ISCSI_BHS_SIZE); + +#define BHSSR_FLAGS_RESIDUAL_UNDERFLOW 0x02 +#define BHSSR_FLAGS_RESIDUAL_OVERFLOW 0x04 + +#define BHSSR_RESPONSE_COMMAND_COMPLETED 0x00 + +struct iscsi_bhs_scsi_response { + uint8_t bhssr_opcode; + uint8_t bhssr_flags; + uint8_t bhssr_response; + uint8_t bhssr_status; + uint8_t bhssr_total_ahs_len; + uint8_t bhssr_data_segment_len[3]; + uint64_t bhssr_reserved; + uint32_t bhssr_initiator_task_tag; + uint32_t bhssr_snack_tag; + uint32_t bhssr_statsn; + uint32_t bhssr_expcmdsn; + uint32_t bhssr_maxcmdsn; + uint32_t bhssr_expdatasn; + uint32_t bhssr_bidirectional_read_residual_count; + uint32_t bhssr_residual_count; +}; +CTASSERT(sizeof(struct iscsi_bhs_scsi_response) == ISCSI_BHS_SIZE); + +#define BHSTMR_FUNCTION_ABORT_TASK 1 +#define BHSTMR_FUNCTION_ABORT_TASK_SET 2 +#define BHSTMR_FUNCTION_CLEAR_ACA 3 +#define BHSTMR_FUNCTION_CLEAR_TASK_SET 4 +#define BHSTMR_FUNCTION_LOGICAL_UNIT_RESET 5 +#define BHSTMR_FUNCTION_TARGET_WARM_RESET 6 +#define BHSTMR_FUNCTION_TARGET_COLD_RESET 7 +#define BHSTMR_FUNCTION_TASK_REASSIGN 8 + +struct iscsi_bhs_task_management_request { + uint8_t bhstmr_opcode; + uint8_t bhstmr_function; + uint8_t bhstmr_reserved[2]; + uint8_t bhstmr_total_ahs_len; + uint8_t bhstmr_data_segment_len[3]; + uint64_t bhstmr_lun; + uint32_t bhstmr_initiator_task_tag; + uint32_t bhstmr_referenced_task_tag; + uint32_t bhstmr_cmdsn; + uint32_t bhstmr_expstatsn; + uint32_t bhstmr_refcmdsn; + uint32_t bhstmr_expdatasn; + uint64_t bhstmr_reserved2; +}; +CTASSERT(sizeof(struct iscsi_bhs_task_management_request) == ISCSI_BHS_SIZE); + +#define BHSTMR_RESPONSE_FUNCTION_COMPLETE 0 +#define BHSTMR_RESPONSE_FUNCTION_NOT_SUPPORTED 5 + +struct iscsi_bhs_task_management_response { + uint8_t bhstmr_opcode; + uint8_t bhstmr_flags; + uint8_t bhstmr_response; + uint8_t bhstmr_reserved; + uint8_t bhstmr_total_ahs_len; + uint8_t bhstmr_data_segment_len[3]; + uint64_t bhstmr_reserved2; + uint32_t bhstmr_initiator_task_tag; + uint32_t bhstmr_reserved3; + uint32_t bhstmr_statsn; + uint32_t bhstmr_expcmdsn; + uint32_t bhstmr_maxcmdsn; + uint8_t bhstmr_reserved4[12]; +}; +CTASSERT(sizeof(struct iscsi_bhs_task_management_response) == ISCSI_BHS_SIZE); + +#define BHSLR_FLAGS_TRANSIT 0x80 +#define BHSLR_FLAGS_CONTINUE 0x40 + +#define BHSLR_STAGE_SECURITY_NEGOTIATION 0 +#define BHSLR_STAGE_OPERATIONAL_NEGOTIATION 1 +#define BHSLR_STAGE_FULL_FEATURE_PHASE 3 /* Yes, 3. */ + +struct iscsi_bhs_login_request { + uint8_t bhslr_opcode; + uint8_t bhslr_flags; + uint8_t bhslr_version_max; + uint8_t bhslr_version_min; + uint8_t bhslr_total_ahs_len; + uint8_t bhslr_data_segment_len[3]; + uint8_t bhslr_isid[6]; + uint16_t bhslr_tsih; + uint32_t bhslr_initiator_task_tag; + uint16_t bhslr_cid; + uint16_t bhslr_reserved; + uint32_t bhslr_cmdsn; + uint32_t bhslr_expstatsn; + uint8_t bhslr_reserved2[16]; +}; +CTASSERT(sizeof(struct iscsi_bhs_login_request) == ISCSI_BHS_SIZE); + +struct iscsi_bhs_login_response { + uint8_t bhslr_opcode; + uint8_t bhslr_flags; + uint8_t bhslr_version_max; + uint8_t bhslr_version_active; + uint8_t bhslr_total_ahs_len; + uint8_t bhslr_data_segment_len[3]; + uint8_t bhslr_isid[6]; + uint16_t bhslr_tsih; + uint32_t bhslr_initiator_task_tag; + uint32_t bhslr_reserved; + uint32_t bhslr_statsn; + uint32_t bhslr_expcmdsn; + uint32_t bhslr_maxcmdsn; + uint8_t bhslr_status_class; + uint8_t bhslr_status_detail; + uint16_t bhslr_reserved2; + uint8_t bhslr_reserved3[8]; +}; +CTASSERT(sizeof(struct iscsi_bhs_login_response) == ISCSI_BHS_SIZE); + +#define BHSTR_FLAGS_FINAL 0x80 +#define BHSTR_FLAGS_CONTINUE 0x40 + +struct iscsi_bhs_text_request { + uint8_t bhstr_opcode; + uint8_t bhstr_flags; + uint16_t bhstr_reserved; + uint8_t bhstr_total_ahs_len; + uint8_t bhstr_data_segment_len[3]; + uint64_t bhstr_lun; + uint32_t bhstr_initiator_task_tag; + uint32_t bhstr_target_transfer_tag; + uint32_t bhstr_cmdsn; + uint32_t bhstr_expstatsn; + uint8_t bhstr_reserved2[16]; +}; +CTASSERT(sizeof(struct iscsi_bhs_text_request) == ISCSI_BHS_SIZE); + +struct iscsi_bhs_text_response { + uint8_t bhstr_opcode; + uint8_t bhstr_flags; + uint16_t bhstr_reserved; + uint8_t bhstr_total_ahs_len; + uint8_t bhstr_data_segment_len[3]; + uint64_t bhstr_lun; + uint32_t bhstr_initiator_task_tag; + uint32_t bhstr_target_transfer_tag; + uint32_t bhstr_statsn; + uint32_t bhstr_expcmdsn; + uint32_t bhstr_maxcmdsn; + uint8_t bhstr_reserved2[12]; +}; +CTASSERT(sizeof(struct iscsi_bhs_text_response) == ISCSI_BHS_SIZE); + +#define BHSDO_FLAGS_F 0x80 + +struct iscsi_bhs_data_out { + uint8_t bhsdo_opcode; + uint8_t bhsdo_flags; + uint8_t bhsdo_reserved[2]; + uint8_t bhsdo_total_ahs_len; + uint8_t bhsdo_data_segment_len[3]; + uint64_t bhsdo_lun; + uint32_t bhsdo_initiator_task_tag; + uint32_t bhsdo_target_transfer_tag; + uint32_t bhsdo_reserved2; + uint32_t bhsdo_expstatsn; + uint32_t bhsdo_reserved3; + uint32_t bhsdo_datasn; + uint32_t bhsdo_buffer_offset; + uint32_t bhsdo_reserved4; +}; +CTASSERT(sizeof(struct iscsi_bhs_data_out) == ISCSI_BHS_SIZE); + +#define BHSDI_FLAGS_F 0x80 +#define BHSDI_FLAGS_A 0x40 +#define BHSDI_FLAGS_O 0x04 +#define BHSDI_FLAGS_U 0x02 +#define BHSDI_FLAGS_S 0x01 + +struct iscsi_bhs_data_in { + uint8_t bhsdi_opcode; + uint8_t bhsdi_flags; + uint8_t bhsdi_reserved; + uint8_t bhsdi_status; + uint8_t bhsdi_total_ahs_len; + uint8_t bhsdi_data_segment_len[3]; + uint64_t bhsdi_lun; + uint32_t bhsdi_initiator_task_tag; + uint32_t bhsdi_target_transfer_tag; + uint32_t bhsdi_statsn; + uint32_t bhsdi_expcmdsn; + uint32_t bhsdi_maxcmdsn; + uint32_t bhsdi_datasn; + uint32_t bhsdi_buffer_offset; + uint32_t bhsdi_residual_count; +}; +CTASSERT(sizeof(struct iscsi_bhs_data_in) == ISCSI_BHS_SIZE); + +struct iscsi_bhs_r2t { + uint8_t bhsr2t_opcode; + uint8_t bhsr2t_flags; + uint16_t bhsr2t_reserved; + uint8_t bhsr2t_total_ahs_len; + uint8_t bhsr2t_data_segment_len[3]; + uint64_t bhsr2t_lun; + uint32_t bhsr2t_initiator_task_tag; + uint32_t bhsr2t_target_transfer_tag; + uint32_t bhsr2t_statsn; + uint32_t bhsr2t_expcmdsn; + uint32_t bhsr2t_maxcmdsn; + uint32_t bhsr2t_r2tsn; + uint32_t bhsr2t_buffer_offset; + uint32_t bhsr2t_desired_data_transfer_length; +}; +CTASSERT(sizeof(struct iscsi_bhs_r2t) == ISCSI_BHS_SIZE); + +struct iscsi_bhs_nop_out { + uint8_t bhsno_opcode; + uint8_t bhsno_flags; + uint16_t bhsno_reserved; + uint8_t bhsno_total_ahs_len; + uint8_t bhsno_data_segment_len[3]; + uint64_t bhsno_lun; + uint32_t bhsno_initiator_task_tag; + uint32_t bhsno_target_transfer_tag; + uint32_t bhsno_cmdsn; + uint32_t bhsno_expstatsn; + uint8_t bhsno_reserved2[16]; +}; +CTASSERT(sizeof(struct iscsi_bhs_nop_out) == ISCSI_BHS_SIZE); + +struct iscsi_bhs_nop_in { + uint8_t bhsni_opcode; + uint8_t bhsni_flags; + uint16_t bhsni_reserved; + uint8_t bhsni_total_ahs_len; + uint8_t bhsni_data_segment_len[3]; + uint64_t bhsni_lun; + uint32_t bhsni_initiator_task_tag; + uint32_t bhsni_target_transfer_tag; + uint32_t bhsni_statsn; + uint32_t bhsni_expcmdsn; + uint32_t bhsni_maxcmdsn; + uint8_t bhsno_reserved2[12]; +}; +CTASSERT(sizeof(struct iscsi_bhs_nop_in) == ISCSI_BHS_SIZE); + +#define BHSLR_REASON_CLOSE_SESSION 0 +#define BHSLR_REASON_CLOSE_CONNECTION 1 +#define BHSLR_REASON_REMOVE_FOR_RECOVERY 2 + +struct iscsi_bhs_logout_request { + uint8_t bhslr_opcode; + uint8_t bhslr_reason; + uint16_t bhslr_reserved; + uint8_t bhslr_total_ahs_len; + uint8_t bhslr_data_segment_len[3]; + uint64_t bhslr_reserved2; + uint32_t bhslr_initiator_task_tag; + uint16_t bhslr_cid; + uint16_t bhslr_reserved3; + uint32_t bhslr_cmdsn; + uint32_t bhslr_expstatsn; + uint8_t bhslr_reserved4[16]; +}; +CTASSERT(sizeof(struct iscsi_bhs_logout_request) == ISCSI_BHS_SIZE); + +#define BHSLR_RESPONSE_CLOSED_SUCCESSFULLY 0 +#define BHSLR_RESPONSE_RECOVERY_NOT_SUPPORTED 2 + +struct iscsi_bhs_logout_response { + uint8_t bhslr_opcode; + uint8_t bhslr_flags; + uint8_t bhslr_response; + uint8_t bhslr_reserved; + uint8_t bhslr_total_ahs_len; + uint8_t bhslr_data_segment_len[3]; + uint64_t bhslr_reserved2; + uint32_t bhslr_initiator_task_tag; + uint32_t bhslr_reserved3; + uint32_t bhslr_statsn; + uint32_t bhslr_expcmdsn; + uint32_t bhslr_maxcmdsn; + uint32_t bhslr_reserved4; + uint16_t bhslr_time2wait; + uint16_t bhslr_time2retain; + uint32_t bhslr_reserved5; +}; +CTASSERT(sizeof(struct iscsi_bhs_logout_response) == ISCSI_BHS_SIZE); + +#define BHSAM_EVENT_TARGET_REQUESTS_LOGOUT 1 +#define BHSAM_EVENT_TARGET_TERMINATES_CONNECTION 2 +#define BHSAM_EVENT_TARGET_TERMINATES_SESSION 3 + +struct iscsi_bhs_asynchronous_message { + uint8_t bhsam_opcode; + uint8_t bhsam_flags; + uint16_t bhsam_reserved; + uint8_t bhsam_total_ahs_len; + uint8_t bhsam_data_segment_len[3]; + uint64_t bhsam_lun; + uint32_t bhsam_0xffffffff; + uint32_t bhsam_reserved2; + uint32_t bhsam_statsn; + uint32_t bhsam_expcmdsn; + uint32_t bhsam_maxcmdsn; + uint8_t bhsam_async_event; + uint8_t bhsam_async_vcode; + uint16_t bhsam_parameter1; + uint16_t bhsam_parameter2; + uint16_t bhsam_parameter3; + uint32_t bhsam_reserved3; +}; +CTASSERT(sizeof(struct iscsi_bhs_asynchronous_message) == ISCSI_BHS_SIZE); + +#define BHSSR_REASON_DATA_DIGEST_ERROR 0x02 +#define BHSSR_PROTOCOL_ERROR 0x04 +#define BHSSR_COMMAND_NOT_SUPPORTED 0x05 +#define BHSSR_INVALID_PDU_FIELD 0x09 + +struct iscsi_bhs_reject { + uint8_t bhsr_opcode; + uint8_t bhsr_flags; + uint8_t bhsr_reason; + uint8_t bhsr_reserved; + uint8_t bhsr_total_ahs_len; + uint8_t bhsr_data_segment_len[3]; + uint64_t bhsr_reserved2; + uint32_t bhsr_0xffffffff; + uint32_t bhsr_reserved3; + uint32_t bhsr_statsn; + uint32_t bhsr_expcmdsn; + uint32_t bhsr_maxcmdsn; + uint32_t bhsr_datasn_r2tsn; + uint32_t bhsr_reserved4; + uint32_t bhsr_reserved5; +}; +CTASSERT(sizeof(struct iscsi_bhs_reject) == ISCSI_BHS_SIZE); + +#endif /* !ISCSI_PROTO_H */ diff --git a/sys/dev/iscsi_initiator/iscsi.c b/sys/dev/iscsi_initiator/iscsi.c index 4a1cb967ffa0..6e06718aabfc 100644 --- a/sys/dev/iscsi_initiator/iscsi.c +++ b/sys/dev/iscsi_initiator/iscsi.c @@ -858,7 +858,7 @@ iscsi_modevent(module_t mod, int what, void *arg) } moduledata_t iscsi_mod = { - "iscsi", + "iscsi_initiator", (modeventhand_t) iscsi_modevent, 0 }; @@ -878,5 +878,5 @@ iscsi_rootconf(void) SYSINIT(cpu_rootconf1, SI_SUB_ROOT_CONF, SI_ORDER_FIRST, iscsi_rootconf, NULL) #endif -DECLARE_MODULE(iscsi, iscsi_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); -MODULE_DEPEND(iscsi, cam, 1, 1, 1); +DECLARE_MODULE(iscsi_initiator, iscsi_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); +MODULE_DEPEND(iscsi_initiator, cam, 1, 1, 1); diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 0f38aaff0a92..2161b8b407af 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -160,6 +160,7 @@ SUBDIR= \ ${_ipw} \ ${_ipwfw} \ ${_isci} \ + iscsi \ iscsi_initiator \ isp \ ${_ispfw} \ diff --git a/sys/modules/ctl/Makefile b/sys/modules/ctl/Makefile index 5744b7d08ccd..39a61f612efc 100644 --- a/sys/modules/ctl/Makefile +++ b/sys/modules/ctl/Makefile @@ -12,6 +12,7 @@ SRCS+= ctl_cmd_table.c SRCS+= ctl_frontend.c SRCS+= ctl_frontend_cam_sim.c SRCS+= ctl_frontend_internal.c +SRCS+= ctl_frontend_iscsi.c SRCS+= ctl_mem_pool.c SRCS+= ctl_scsi_all.c SRCS+= ctl_error.c @@ -23,4 +24,6 @@ SRCS+= vnode_if.h SRCS+= opt_cam.h SRCS+= opt_kdtrace.h +#CFLAGS+=-DICL_KERNEL_PROXY + .include <bsd.kmod.mk> diff --git a/sys/modules/iscsi/Makefile b/sys/modules/iscsi/Makefile index 625e3be97837..d072cfb39269 100644 --- a/sys/modules/iscsi/Makefile +++ b/sys/modules/iscsi/Makefile @@ -1,5 +1,25 @@ # $FreeBSD$ -SUBDIR= initiator +.PATH: ${.CURDIR}/../../dev/iscsi/ +KMOD= iscsi -.include <bsd.subdir.mk> +SRCS= iscsi.c +.if defined(ICL_RDMA) +SRCS+= icl_rdma.c +.else +SRCS+= icl.c +.endif +SRCS+= icl_proxy.c +SRCS+= opt_cam.h +SRCS+= bus_if.h +SRCS+= device_if.h + +# Those below are required for RDMA. +SRCS+= vnode_if.h +SRCS+= opt_inet.h +SRCS+= opt_inet6.h + +CFLAGS+= -I${.CURDIR}/../../ofed/include +#CFLAGS+=-DICL_KERNEL_PROXY + +.include <bsd.kmod.mk> |