diff options
| author | Wojciech Macek <wma@FreeBSD.org> | 2018-02-22 13:32:31 +0000 |
|---|---|---|
| committer | Wojciech Macek <wma@FreeBSD.org> | 2018-02-22 13:32:31 +0000 |
| commit | 0d787e9b35c2fc01e724f06b4eda2807774e7f4e (patch) | |
| tree | 7d726d1f69ac9c9898d000ebe2371389a02be550 /sys/dev/nvme | |
| parent | de2cb430adf18cf00184df694fc62e0eab9dfabe (diff) | |
Notes
Diffstat (limited to 'sys/dev/nvme')
| -rw-r--r-- | sys/dev/nvme/nvme.c | 31 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme.h | 676 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_ctrlr.c | 224 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_ctrlr_cmd.c | 58 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_ns.c | 44 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_ns_cmd.c | 8 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_private.h | 10 | ||||
| -rw-r--r-- | sys/dev/nvme/nvme_qpair.c | 91 |
8 files changed, 688 insertions, 454 deletions
diff --git a/sys/dev/nvme/nvme.c b/sys/dev/nvme/nvme.c index b13cad7577598..4013601ec5f00 100644 --- a/sys/dev/nvme/nvme.c +++ b/sys/dev/nvme/nvme.c @@ -222,23 +222,38 @@ nvme_modevent(module_t mod, int type, void *arg) void nvme_dump_command(struct nvme_command *cmd) { + uint8_t opc, fuse; + + opc = (cmd->opc_fuse >> NVME_CMD_OPC_SHIFT) & NVME_CMD_OPC_MASK; + fuse = (cmd->opc_fuse >> NVME_CMD_FUSE_SHIFT) & NVME_CMD_FUSE_MASK; + printf( -"opc:%x f:%x r1:%x cid:%x nsid:%x r2:%x r3:%x mptr:%jx prp1:%jx prp2:%jx cdw:%x %x %x %x %x %x\n", - cmd->opc, cmd->fuse, cmd->rsvd1, cmd->cid, cmd->nsid, +"opc:%x f:%x cid:%x nsid:%x r2:%x r3:%x mptr:%jx prp1:%jx prp2:%jx cdw:%x %x %x %x %x %x\n", + opc, fuse, cmd->cid, le32toh(cmd->nsid), cmd->rsvd2, cmd->rsvd3, - (uintmax_t)cmd->mptr, (uintmax_t)cmd->prp1, (uintmax_t)cmd->prp2, - cmd->cdw10, cmd->cdw11, cmd->cdw12, cmd->cdw13, cmd->cdw14, - cmd->cdw15); + (uintmax_t)le64toh(cmd->mptr), (uintmax_t)le64toh(cmd->prp1), (uintmax_t)le64toh(cmd->prp2), + le32toh(cmd->cdw10), le32toh(cmd->cdw11), le32toh(cmd->cdw12), + le32toh(cmd->cdw13), le32toh(cmd->cdw14), le32toh(cmd->cdw15)); } void nvme_dump_completion(struct nvme_completion *cpl) { + uint8_t p, sc, sct, m, dnr; + uint16_t status; + + status = le16toh(cpl->status); + + p = NVME_STATUS_GET_P(status); + sc = NVME_STATUS_GET_SC(status); + sct = NVME_STATUS_GET_SCT(status); + m = NVME_STATUS_GET_M(status); + dnr = NVME_STATUS_GET_DNR(status); + printf("cdw0:%08x sqhd:%04x sqid:%04x " "cid:%04x p:%x sc:%02x sct:%x m:%x dnr:%x\n", - cpl->cdw0, cpl->sqhd, cpl->sqid, - cpl->cid, cpl->status.p, cpl->status.sc, cpl->status.sct, - cpl->status.m, cpl->status.dnr); + le32toh(cpl->cdw0), le16toh(cpl->sqhd), le16toh(cpl->sqid), + cpl->cid, p, sc, sct, m, dnr); } static int diff --git a/sys/dev/nvme/nvme.h b/sys/dev/nvme/nvme.h index 5d11f79337fec..cf49a443dafab 100644 --- a/sys/dev/nvme/nvme.h +++ b/sys/dev/nvme/nvme.h @@ -36,6 +36,7 @@ #endif #include <sys/param.h> +#include <sys/endian.h> #define NVME_PASSTHROUGH_CMD _IOWR('n', 0, struct nvme_pt_command) #define NVME_RESET_CONTROLLER _IO('n', 1) @@ -59,153 +60,264 @@ /* Cap nvme to 1MB transfers driver explodes with larger sizes */ #define NVME_MAX_XFER_SIZE (MAXPHYS < (1<<20) ? MAXPHYS : (1<<20)) -union cap_lo_register { - uint32_t raw; - struct { - /** maximum queue entries supported */ - uint32_t mqes : 16; +/* Register field definitions */ +#define NVME_CAP_LO_REG_MQES_SHIFT (0) +#define NVME_CAP_LO_REG_MQES_MASK (0xFFFF) +#define NVME_CAP_LO_REG_CQR_SHIFT (16) +#define NVME_CAP_LO_REG_CQR_MASK (0x1) +#define NVME_CAP_LO_REG_AMS_SHIFT (17) +#define NVME_CAP_LO_REG_AMS_MASK (0x3) +#define NVME_CAP_LO_REG_TO_SHIFT (24) +#define NVME_CAP_LO_REG_TO_MASK (0xFF) - /** contiguous queues required */ - uint32_t cqr : 1; +#define NVME_CAP_HI_REG_DSTRD_SHIFT (0) +#define NVME_CAP_HI_REG_DSTRD_MASK (0xF) +#define NVME_CAP_HI_REG_CSS_NVM_SHIFT (5) +#define NVME_CAP_HI_REG_CSS_NVM_MASK (0x1) +#define NVME_CAP_HI_REG_MPSMIN_SHIFT (16) +#define NVME_CAP_HI_REG_MPSMIN_MASK (0xF) +#define NVME_CAP_HI_REG_MPSMAX_SHIFT (20) +#define NVME_CAP_HI_REG_MPSMAX_MASK (0xF) - /** arbitration mechanism supported */ - uint32_t ams : 2; +#define NVME_CC_REG_EN_SHIFT (0) +#define NVME_CC_REG_EN_MASK (0x1) +#define NVME_CC_REG_CSS_SHIFT (4) +#define NVME_CC_REG_CSS_MASK (0x7) +#define NVME_CC_REG_MPS_SHIFT (7) +#define NVME_CC_REG_MPS_MASK (0xF) +#define NVME_CC_REG_AMS_SHIFT (11) +#define NVME_CC_REG_AMS_MASK (0x7) +#define NVME_CC_REG_SHN_SHIFT (14) +#define NVME_CC_REG_SHN_MASK (0x3) +#define NVME_CC_REG_IOSQES_SHIFT (16) +#define NVME_CC_REG_IOSQES_MASK (0xF) +#define NVME_CC_REG_IOCQES_SHIFT (20) +#define NVME_CC_REG_IOCQES_MASK (0xF) - uint32_t reserved1 : 5; +#define NVME_CSTS_REG_RDY_SHIFT (0) +#define NVME_CSTS_REG_RDY_MASK (0x1) +#define NVME_CSTS_REG_CFS_SHIFT (1) +#define NVME_CSTS_REG_CFS_MASK (0x1) +#define NVME_CSTS_REG_SHST_SHIFT (2) +#define NVME_CSTS_REG_SHST_MASK (0x3) - /** timeout */ - uint32_t to : 8; - } bits __packed; -} __packed; +#define NVME_CSTS_GET_SHST(csts) (((csts) >> NVME_CSTS_REG_SHST_SHIFT) & NVME_CSTS_REG_SHST_MASK) -_Static_assert(sizeof(union cap_lo_register) == 4, "bad size for cap_lo_register"); +#define NVME_AQA_REG_ASQS_SHIFT (0) +#define NVME_AQA_REG_ASQS_MASK (0xFFF) +#define NVME_AQA_REG_ACQS_SHIFT (16) +#define NVME_AQA_REG_ACQS_MASK (0xFFF) -union cap_hi_register { - uint32_t raw; - struct { - /** doorbell stride */ - uint32_t dstrd : 4; +/* Command field definitions */ - uint32_t reserved3 : 1; +#define NVME_CMD_OPC_SHIFT (0) +#define NVME_CMD_OPC_MASK (0xFF) +#define NVME_CMD_FUSE_SHIFT (8) +#define NVME_CMD_FUSE_MASK (0x3) - /** command sets supported */ - uint32_t css_nvm : 1; +#define NVME_CMD_SET_OPC(opc) (htole16(((opc) & NVME_CMD_OPC_MASK) << NVME_CMD_OPC_SHIFT)) - uint32_t css_reserved : 3; - uint32_t reserved2 : 7; +#define NVME_STATUS_P_SHIFT (0) +#define NVME_STATUS_P_MASK (0x1) +#define NVME_STATUS_SC_SHIFT (1) +#define NVME_STATUS_SC_MASK (0xFF) +#define NVME_STATUS_SCT_SHIFT (9) +#define NVME_STATUS_SCT_MASK (0x7) +#define NVME_STATUS_M_SHIFT (14) +#define NVME_STATUS_M_MASK (0x1) +#define NVME_STATUS_DNR_SHIFT (15) +#define NVME_STATUS_DNR_MASK (0x1) - /** memory page size minimum */ - uint32_t mpsmin : 4; +#define NVME_STATUS_GET_P(st) (((st) >> NVME_STATUS_P_SHIFT) & NVME_STATUS_P_MASK) +#define NVME_STATUS_GET_SC(st) (((st) >> NVME_STATUS_SC_SHIFT) & NVME_STATUS_SC_MASK) +#define NVME_STATUS_GET_SCT(st) (((st) >> NVME_STATUS_SCT_SHIFT) & NVME_STATUS_SCT_MASK) +#define NVME_STATUS_GET_M(st) (((st) >> NVME_STATUS_M_SHIFT) & NVME_STATUS_M_MASK) +#define NVME_STATUS_GET_DNR(st) (((st) >> NVME_STATUS_DNR_SHIFT) & NVME_STATUS_DNR_MASK) - /** memory page size maximum */ - uint32_t mpsmax : 4; +#define NVME_PWR_ST_MPS_SHIFT (0) +#define NVME_PWR_ST_MPS_MASK (0x1) +#define NVME_PWR_ST_NOPS_SHIFT (1) +#define NVME_PWR_ST_NOPS_MASK (0x1) +#define NVME_PWR_ST_RRT_SHIFT (0) +#define NVME_PWR_ST_RRT_MASK (0x1F) +#define NVME_PWR_ST_RRL_SHIFT (0) +#define NVME_PWR_ST_RRL_MASK (0x1F) +#define NVME_PWR_ST_RWT_SHIFT (0) +#define NVME_PWR_ST_RWT_MASK (0x1F) +#define NVME_PWR_ST_RWL_SHIFT (0) +#define NVME_PWR_ST_RWL_MASK (0x1F) +#define NVME_PWR_ST_IPS_SHIFT (6) +#define NVME_PWR_ST_IPS_MASK (0x3) +#define NVME_PWR_ST_APW_SHIFT (0) +#define NVME_PWR_ST_APW_MASK (0x7) +#define NVME_PWR_ST_APS_SHIFT (6) +#define NVME_PWR_ST_APS_MASK (0x3) - uint32_t reserved1 : 8; - } bits __packed; -} __packed; +/** OACS - optional admin command support */ +/* supports security send/receive commands */ +#define NVME_CTRLR_DATA_OACS_SECURITY_SHIFT (0) +#define NVME_CTRLR_DATA_OACS_SECURITY_MASK (0x1) +/* supports format nvm command */ +#define NVME_CTRLR_DATA_OACS_FORMAT_SHIFT (1) +#define NVME_CTRLR_DATA_OACS_FORMAT_MASK (0x1) +/* supports firmware activate/download commands */ +#define NVME_CTRLR_DATA_OACS_FIRMWARE_SHIFT (2) +#define NVME_CTRLR_DATA_OACS_FIRMWARE_MASK (0x1) +/* supports namespace management commands */ +#define NVME_CTRLR_DATA_OACS_NSMGMT_SHIFT (3) +#define NVME_CTRLR_DATA_OACS_NSMGMT_MASK (0x1) -_Static_assert(sizeof(union cap_hi_register) == 4, "bad size of cap_hi_register"); +/** firmware updates */ +/* first slot is read-only */ +#define NVME_CTRLR_DATA_FRMW_SLOT1_RO_SHIFT (0) +#define NVME_CTRLR_DATA_FRMW_SLOT1_RO_MASK (0x1) +/* number of firmware slots */ +#define NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT (1) +#define NVME_CTRLR_DATA_FRMW_NUM_SLOTS_MASK (0x7) -union cc_register { - uint32_t raw; - struct { - /** enable */ - uint32_t en : 1; +/** log page attributes */ +/* per namespace smart/health log page */ +#define NVME_CTRLR_DATA_LPA_NS_SMART_SHIFT (0) +#define NVME_CTRLR_DATA_LPA_NS_SMART_MASK (0x1) - uint32_t reserved1 : 3; +/** AVSCC - admin vendor specific command configuration */ +/* admin vendor specific commands use spec format */ +#define NVME_CTRLR_DATA_AVSCC_SPEC_FORMAT_SHIFT (0) +#define NVME_CTRLR_DATA_AVSCC_SPEC_FORMAT_MASK (0x1) - /** i/o command set selected */ - uint32_t css : 3; +/** Autonomous Power State Transition Attributes */ +/* Autonomous Power State Transitions supported */ +#define NVME_CTRLR_DATA_APSTA_APST_SUPP_SHIFT (0) +#define NVME_CTRLR_DATA_APSTA_APST_SUPP_MASK (0x1) - /** memory page size */ - uint32_t mps : 4; +/** submission queue entry size */ +#define NVME_CTRLR_DATA_SQES_MIN_SHIFT (0) +#define NVME_CTRLR_DATA_SQES_MIN_MASK (0xF) +#define NVME_CTRLR_DATA_SQES_MAX_SHIFT (4) +#define NVME_CTRLR_DATA_SQES_MAX_MASK (0xF) - /** arbitration mechanism selected */ - uint32_t ams : 3; +/** completion queue entry size */ +#define NVME_CTRLR_DATA_CQES_MIN_SHIFT (0) +#define NVME_CTRLR_DATA_CQES_MIN_MASK (0xF) +#define NVME_CTRLR_DATA_CQES_MAX_SHIFT (4) +#define NVME_CTRLR_DATA_CQES_MAX_MASK (0xF) - /** shutdown notification */ - uint32_t shn : 2; +/** optional nvm command support */ +#define NVME_CTRLR_DATA_ONCS_COMPARE_SHIFT (0) +#define NVME_CTRLR_DATA_ONCS_COMPARE_MASK (0x1) +#define NVME_CTRLR_DATA_ONCS_WRITE_UNC_SHIFT (1) +#define NVME_CTRLR_DATA_ONCS_WRITE_UNC_MASK (0x1) +#define NVME_CTRLR_DATA_ONCS_DSM_SHIFT (2) +#define NVME_CTRLR_DATA_ONCS_DSM_MASK (0x1) - /** i/o submission queue entry size */ - uint32_t iosqes : 4; +/** volatile write cache */ +#define NVME_CTRLR_DATA_VWC_PRESENT_SHIFT (0) +#define NVME_CTRLR_DATA_VWC_PRESENT_MASK (0x1) - /** i/o completion queue entry size */ - uint32_t iocqes : 4; +/** namespace features */ +/* thin provisioning */ +#define NVME_NS_DATA_NSFEAT_THIN_PROV_SHIFT (0) +#define NVME_NS_DATA_NSFEAT_THIN_PROV_MASK (0x1) - uint32_t reserved2 : 8; - } bits __packed; -} __packed; +/** formatted lba size */ +#define NVME_NS_DATA_FLBAS_FORMAT_SHIFT (0) +#define NVME_NS_DATA_FLBAS_FORMAT_MASK (0xF) +#define NVME_NS_DATA_FLBAS_EXTENDED_SHIFT (4) +#define NVME_NS_DATA_FLBAS_EXTENDED_MASK (0x1) -_Static_assert(sizeof(union cc_register) == 4, "bad size for cc_register"); +/** metadata capabilities */ +/* metadata can be transferred as part of data prp list */ +#define NVME_NS_DATA_MC_EXTENDED_SHIFT (0) +#define NVME_NS_DATA_MC_EXTENDED_MASK (0x1) +/* metadata can be transferred with separate metadata pointer */ +#define NVME_NS_DATA_MC_POINTER_SHIFT (1) +#define NVME_NS_DATA_MC_POINTER_MASK (0x1) -enum shn_value { - NVME_SHN_NORMAL = 0x1, - NVME_SHN_ABRUPT = 0x2, -}; +/** end-to-end data protection capabilities */ +/* protection information type 1 */ +#define NVME_NS_DATA_DPC_PIT1_SHIFT (0) +#define NVME_NS_DATA_DPC_PIT1_MASK (0x1) +/* protection information type 2 */ +#define NVME_NS_DATA_DPC_PIT2_SHIFT (1) +#define NVME_NS_DATA_DPC_PIT2_MASK (0x1) +/* protection information type 3 */ +#define NVME_NS_DATA_DPC_PIT3_SHIFT (2) +#define NVME_NS_DATA_DPC_PIT3_MASK (0x1) +/* first eight bytes of metadata */ +#define NVME_NS_DATA_DPC_MD_START_SHIFT (3) +#define NVME_NS_DATA_DPC_MD_START_MASK (0x1) +/* last eight bytes of metadata */ +#define NVME_NS_DATA_DPC_MD_END_SHIFT (4) +#define NVME_NS_DATA_DPC_MD_END_MASK (0x1) -union csts_register { - uint32_t raw; - struct { - /** ready */ - uint32_t rdy : 1; +/** end-to-end data protection type settings */ +/* protection information type */ +#define NVME_NS_DATA_DPS_PIT_SHIFT (0) +#define NVME_NS_DATA_DPS_PIT_MASK (0x7) +/* 1 == protection info transferred at start of metadata */ +/* 0 == protection info transferred at end of metadata */ +#define NVME_NS_DATA_DPS_MD_START_SHIFT (3) +#define NVME_NS_DATA_DPS_MD_START_MASK (0x1) - /** controller fatal status */ - uint32_t cfs : 1; +/** lba format support */ +/* metadata size */ +#define NVME_NS_DATA_LBAF_MS_SHIFT (0) +#define NVME_NS_DATA_LBAF_MS_MASK (0xFFFF) +/* lba data size */ +#define NVME_NS_DATA_LBAF_LBADS_SHIFT (16) +#define NVME_NS_DATA_LBAF_LBADS_MASK (0xFF) +/* relative performance */ +#define NVME_NS_DATA_LBAF_RP_SHIFT (24) +#define NVME_NS_DATA_LBAF_RP_MASK (0x3) - /** shutdown status */ - uint32_t shst : 2; +enum nvme_critical_warning_state { + NVME_CRIT_WARN_ST_AVAILABLE_SPARE = 0x1, + NVME_CRIT_WARN_ST_TEMPERATURE = 0x2, + NVME_CRIT_WARN_ST_DEVICE_RELIABILITY = 0x4, + NVME_CRIT_WARN_ST_READ_ONLY = 0x8, + NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP = 0x10, +}; +#define NVME_CRIT_WARN_ST_RESERVED_MASK (0xE0) - uint32_t reserved1 : 28; - } bits __packed; -} __packed; +/* slot for current FW */ +#define NVME_FIRMWARE_PAGE_AFI_SLOT_SHIFT (0) +#define NVME_FIRMWARE_PAGE_AFI_SLOT_MASK (0x7) -_Static_assert(sizeof(union csts_register) == 4, "bad size for csts_register"); +/* CC register SHN field values */ +enum shn_value { + NVME_SHN_NORMAL = 0x1, + NVME_SHN_ABRUPT = 0x2, +}; +/* CSTS register SHST field values */ enum shst_value { NVME_SHST_NORMAL = 0x0, NVME_SHST_OCCURRING = 0x1, NVME_SHST_COMPLETE = 0x2, }; -union aqa_register { - uint32_t raw; - struct { - /** admin submission queue size */ - uint32_t asqs : 12; - - uint32_t reserved1 : 4; - - /** admin completion queue size */ - uint32_t acqs : 12; - - uint32_t reserved2 : 4; - } bits __packed; -} __packed; - -_Static_assert(sizeof(union aqa_register) == 4, "bad size for aqa_resgister"); - struct nvme_registers { /** controller capabilities */ - union cap_lo_register cap_lo; - union cap_hi_register cap_hi; + uint32_t cap_lo; + uint32_t cap_hi; uint32_t vs; /* version */ uint32_t intms; /* interrupt mask set */ uint32_t intmc; /* interrupt mask clear */ /** controller configuration */ - union cc_register cc; + uint32_t cc; uint32_t reserved1; /** controller status */ - union csts_register csts; + uint32_t csts; uint32_t reserved2; /** admin queue attributes */ - union aqa_register aqa; + uint32_t aqa; uint64_t asq; /* admin submission queue base addr */ uint64_t acq; /* admin completion queue base addr */ @@ -222,9 +334,7 @@ _Static_assert(sizeof(struct nvme_registers) == 0x1008, "bad size for nvme_regis struct nvme_command { /* dword 0 */ - uint16_t opc : 8; /* opcode */ - uint16_t fuse : 2; /* fused operation */ - uint16_t rsvd1 : 6; + uint16_t opc_fuse; /* opcode, fused operation */ uint16_t cid; /* command identifier */ /* dword 1 */ @@ -254,18 +364,6 @@ struct nvme_command _Static_assert(sizeof(struct nvme_command) == 16 * 4, "bad size for nvme_command"); -struct nvme_status { - - uint16_t p : 1; /* phase tag */ - uint16_t sc : 8; /* status code */ - uint16_t sct : 3; /* status code type */ - uint16_t rsvd2 : 2; - uint16_t m : 1; /* more */ - uint16_t dnr : 1; /* do not retry */ -} __packed; - -_Static_assert(sizeof(struct nvme_status) == 2, "bad size for nvme_status"); - struct nvme_completion { /* dword 0 */ @@ -280,7 +378,7 @@ struct nvme_completion { /* dword 3 */ uint16_t cid; /* command identifier */ - struct nvme_status status; + uint16_t status; } __packed; _Static_assert(sizeof(struct nvme_completion) == 4 * 4, "bad size for nvme_completion"); @@ -435,27 +533,22 @@ struct nvme_power_state { /** Maximum Power */ uint16_t mp; /* Maximum Power */ uint8_t ps_rsvd1; - uint8_t mps : 1; /* Max Power Scale */ - uint8_t nops : 1; /* Non-Operational State */ - uint8_t ps_rsvd2 : 6; + uint8_t mps_nops; /* Max Power Scale, Non-Operational State */ + uint32_t enlat; /* Entry Latency */ uint32_t exlat; /* Exit Latency */ - uint8_t rrt : 5; /* Relative Read Throughput */ - uint8_t ps_rsvd3 : 3; - uint8_t rrl : 5; /* Relative Read Latency */ - uint8_t ps_rsvd4 : 3; - uint8_t rwt : 5; /* Relative Write Throughput */ - uint8_t ps_rsvd5 : 3; - uint8_t rwl : 5; /* Relative Write Latency */ - uint8_t ps_rsvd6 : 3; + + uint8_t rrt; /* Relative Read Throughput */ + uint8_t rrl; /* Relative Read Latency */ + uint8_t rwt; /* Relative Write Throughput */ + uint8_t rwl; /* Relative Write Latency */ + uint16_t idlp; /* Idle Power */ - uint8_t ps_rsvd7 : 6; - uint8_t ips : 2; /* Idle Power Scale */ + uint8_t ips; /* Idle Power Scale */ uint8_t ps_rsvd8; + uint16_t actp; /* Active Power */ - uint8_t apw : 3; /* Active Power Workload */ - uint8_t ps_rsvd9 : 3; - uint8_t aps : 2; /* Active Power Scale */ + uint8_t apw_aps; /* Active Power Workload, Active Power Scale */ uint8_t ps_rsvd10[9]; } __packed; @@ -524,21 +617,7 @@ struct nvme_controller_data { /* bytes 256-511: admin command set attributes */ /** optional admin command support */ - struct { - /* supports security send/receive commands */ - uint16_t security : 1; - - /* supports format nvm command */ - uint16_t format : 1; - - /* supports firmware activate/download commands */ - uint16_t firmware : 1; - - /* supports namespace management commands */ - uint16_t nsmgmt : 1; - - uint16_t oacs_rsvd : 12; - } __packed oacs; + uint16_t oacs; /** abort command limit */ uint8_t acl; @@ -547,23 +626,10 @@ struct nvme_controller_data { uint8_t aerl; /** firmware updates */ - struct { - /* first slot is read-only */ - uint8_t slot1_ro : 1; - - /* number of firmware slots */ - uint8_t num_slots : 3; - - uint8_t frmw_rsvd : 4; - } __packed frmw; + uint8_t frmw; /** log page attributes */ - struct { - /* per namespace smart/health log page */ - uint8_t ns_smart : 1; - - uint8_t lpa_rsvd : 7; - } __packed lpa; + uint8_t lpa; /** error log page entries */ uint8_t elpe; @@ -572,20 +638,10 @@ struct nvme_controller_data { uint8_t npss; /** admin vendor specific command configuration */ - struct { - /* admin vendor specific commands use spec format */ - uint8_t spec_format : 1; - - uint8_t avscc_rsvd : 7; - } __packed avscc; + uint8_t avscc; /** Autonomous Power State Transition Attributes */ - struct { - /* Autonmous Power State Transitions supported */ - uint8_t apst_supp : 1; - - uint8_t apsta_rsvd : 7; - } __packed apsta; + uint8_t apsta; /** Warning Composite Temperature Threshold */ uint16_t wctemp; @@ -636,20 +692,14 @@ struct nvme_controller_data { /** Sanitize Capabilities */ uint32_t sanicap; /* Really a bitfield */ - uint8_t reserved3[180]; + uint8_t reserved3[180]; /* bytes 512-703: nvm command set attributes */ /** submission queue entry size */ - struct { - uint8_t min : 4; - uint8_t max : 4; - } __packed sqes; + uint8_t sqes; /** completion queue entry size */ - struct { - uint8_t min : 4; - uint8_t max : 4; - } __packed cqes; + uint8_t cqes; /** Maximum Outstanding Commands */ uint16_t maxcmd; @@ -658,12 +708,7 @@ struct nvme_controller_data { uint32_t nn; /** optional nvm command support */ - struct { - uint16_t compare : 1; - uint16_t write_unc : 1; - uint16_t dsm: 1; - uint16_t reserved: 13; - } __packed oncs; + uint16_t oncs; /** fused operation support */ uint16_t fuses; @@ -672,10 +717,7 @@ struct nvme_controller_data { uint8_t fna; /** volatile write cache */ - struct { - uint8_t present : 1; - uint8_t reserved : 7; - } __packed vwc; + uint8_t vwc; /* TODO: flesh out remaining nvm command set attributes */ uint8_t reserved5[178]; @@ -704,78 +746,27 @@ struct nvme_namespace_data { uint64_t nuse; /** namespace features */ - struct { - /** thin provisioning */ - uint8_t thin_prov : 1; - uint8_t reserved1 : 7; - } __packed nsfeat; + uint8_t nsfeat; /** number of lba formats */ uint8_t nlbaf; /** formatted lba size */ - struct { - uint8_t format : 4; - uint8_t extended : 1; - uint8_t reserved2 : 3; - } __packed flbas; + uint8_t flbas; /** metadata capabilities */ - struct { - /* metadata can be transferred as part of data prp list */ - uint8_t extended : 1; - - /* metadata can be transferred with separate metadata pointer */ - uint8_t pointer : 1; - - uint8_t reserved3 : 6; - } __packed mc; + uint8_t mc; /** end-to-end data protection capabilities */ - struct { - /* protection information type 1 */ - uint8_t pit1 : 1; - - /* protection information type 2 */ - uint8_t pit2 : 1; - - /* protection information type 3 */ - uint8_t pit3 : 1; - - /* first eight bytes of metadata */ - uint8_t md_start : 1; - - /* last eight bytes of metadata */ - uint8_t md_end : 1; - } __packed dpc; + uint8_t dpc; /** end-to-end data protection type settings */ - struct { - /* protection information type */ - uint8_t pit : 3; - - /* 1 == protection info transferred at start of metadata */ - /* 0 == protection info transferred at end of metadata */ - uint8_t md_start : 1; - - uint8_t reserved4 : 4; - } __packed dps; + uint8_t dps; uint8_t reserved5[98]; /** lba format support */ - struct { - /** metadata size */ - uint32_t ms : 16; - - /** lba data size */ - uint32_t lbads : 8; - - /** relative performance */ - uint32_t rp : 2; - - uint32_t reserved6 : 6; - } __packed lbaf[16]; + uint32_t lbaf[16]; uint8_t reserved6[192]; @@ -818,7 +809,7 @@ struct nvme_error_information_entry { uint64_t error_count; uint16_t sqid; uint16_t cid; - struct nvme_status status; + uint16_t status; uint16_t error_location; uint64_t lba; uint32_t nsid; @@ -828,26 +819,9 @@ struct nvme_error_information_entry { _Static_assert(sizeof(struct nvme_error_information_entry) == 64, "bad size for nvme_error_information_entry"); -union nvme_critical_warning_state { - - uint8_t raw; - - struct { - uint8_t available_spare : 1; - uint8_t temperature : 1; - uint8_t device_reliability : 1; - uint8_t read_only : 1; - uint8_t volatile_memory_backup : 1; - uint8_t reserved : 3; - } __packed bits; -} __packed; - -_Static_assert(sizeof(union nvme_critical_warning_state) == 1, "bad size for nvme_critical_warning_state"); - struct nvme_health_information_page { - union nvme_critical_warning_state critical_warning; - + uint8_t critical_warning; uint16_t temperature; uint8_t available_spare; uint8_t available_spare_threshold; @@ -884,11 +858,7 @@ _Static_assert(sizeof(struct nvme_health_information_page) == 512, "bad size for struct nvme_firmware_page { - struct { - uint8_t slot : 3; /* slot for current FW */ - uint8_t reserved : 5; - } __packed afi; - + uint8_t afi; uint8_t reserved[7]; uint64_t revision[7]; /* revisions for 7 slots */ uint8_t reserved2[448]; @@ -987,7 +957,7 @@ struct nvme_pt_command { }; #define nvme_completion_is_error(cpl) \ - ((cpl)->status.sc != 0 || (cpl)->status.sct != 0) + (NVME_STATUS_GET_SC((cpl)->status) != 0 || NVME_STATUS_GET_SCT((cpl)->status) != 0) void nvme_strvis(uint8_t *dst, const uint8_t *src, int dstlen, int srclen); @@ -1087,19 +1057,19 @@ static inline void nvme_ns_flush_cmd(struct nvme_command *cmd, uint32_t nsid) { - cmd->opc = NVME_OPC_FLUSH; - cmd->nsid = nsid; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_FLUSH); + cmd->nsid = htole32(nsid); } static inline void nvme_ns_rw_cmd(struct nvme_command *cmd, uint32_t rwcmd, uint32_t nsid, uint64_t lba, uint32_t count) { - cmd->opc = rwcmd; - cmd->nsid = nsid; - cmd->cdw10 = lba & 0xffffffffu; - cmd->cdw11 = lba >> 32; - cmd->cdw12 = count-1; + cmd->opc_fuse = NVME_CMD_SET_OPC(rwcmd); + cmd->nsid = htole32(nsid); + cmd->cdw10 = htole32(lba & 0xffffffffu); + cmd->cdw11 = htole32(lba >> 32); + cmd->cdw12 = htole32(count-1); } static inline @@ -1120,14 +1090,160 @@ static inline void nvme_ns_trim_cmd(struct nvme_command *cmd, uint32_t nsid, uint32_t num_ranges) { - cmd->opc = NVME_OPC_DATASET_MANAGEMENT; - cmd->nsid = nsid; - cmd->cdw10 = num_ranges - 1; - cmd->cdw11 = NVME_DSM_ATTR_DEALLOCATE; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_DATASET_MANAGEMENT); + cmd->nsid = htole32(nsid); + cmd->cdw10 = htole32(num_ranges - 1); + cmd->cdw11 = htole32(NVME_DSM_ATTR_DEALLOCATE); } extern int nvme_use_nvd; #endif /* _KERNEL */ +/* Endianess conversion functions for NVMe structs */ +static inline +void nvme_completion_swapbytes(struct nvme_completion *s) +{ + + s->cdw0 = le32toh(s->cdw0); + /* omit rsvd1 */ + s->sqhd = le16toh(s->sqhd); + s->sqid = le16toh(s->sqid); + /* omit cid */ + s->status = le16toh(s->status); +} + +static inline +void nvme_power_state_swapbytes(struct nvme_power_state *s) +{ + + s->mp = le16toh(s->mp); + s->enlat = le32toh(s->enlat); + s->exlat = le32toh(s->exlat); + s->idlp = le16toh(s->idlp); + s->actp = le16toh(s->actp); +} + +static inline +void nvme_controller_data_swapbytes(struct nvme_controller_data *s) +{ + int i; + + s->vid = le16toh(s->vid); + s->ssvid = le16toh(s->ssvid); + s->ctrlr_id = le16toh(s->ctrlr_id); + s->ver = le32toh(s->ver); + s->rtd3r = le32toh(s->rtd3r); + s->rtd3e = le32toh(s->rtd3e); + s->oaes = le32toh(s->oaes); + s->ctratt = le32toh(s->ctratt); + s->oacs = le16toh(s->oacs); + s->wctemp = le16toh(s->wctemp); + s->cctemp = le16toh(s->cctemp); + s->mtfa = le16toh(s->mtfa); + s->hmpre = le32toh(s->hmpre); + s->hmmin = le32toh(s->hmmin); + s->rpmbs = le32toh(s->rpmbs); + s->edstt = le16toh(s->edstt); + s->kas = le16toh(s->kas); + s->hctma = le16toh(s->hctma); + s->mntmt = le16toh(s->mntmt); + s->mxtmt = le16toh(s->mxtmt); + s->sanicap = le32toh(s->sanicap); + s->maxcmd = le16toh(s->maxcmd); + s->nn = le32toh(s->nn); + s->oncs = le16toh(s->oncs); + s->fuses = le16toh(s->fuses); + for (i = 0; i < 32; i++) + nvme_power_state_swapbytes(&s->power_state[i]); +} + +static inline +void nvme_namespace_data_swapbytes(struct nvme_namespace_data *s) +{ + int i; + + s->nsze = le64toh(s->nsze); + s->ncap = le64toh(s->ncap); + s->nuse = le64toh(s->nuse); + for (i = 0; i < 16; i++) + s->lbaf[i] = le32toh(s->lbaf[i]); +} + +static inline +void nvme_error_information_entry_swapbytes(struct nvme_error_information_entry *s) +{ + + s->error_count = le64toh(s->error_count); + s->sqid = le16toh(s->sqid); + s->cid = le16toh(s->cid); + s->status = le16toh(s->status); + s->error_location = le16toh(s->error_location); + s->lba = le64toh(s->lba); + s->nsid = le32toh(s->nsid); +} + +static inline +void nvme_le128toh(void *p) +{ +#if _BYTE_ORDER != _LITTLE_ENDIAN + /* Swap 16 bytes in place */ + char *tmp = (char*)p; + char b; + int i; + for (i = 0; i < 8; i++) { + b = tmp[i]; + tmp[i] = tmp[15-i]; + tmp[15-i] = b; + } +#endif +} + +static inline +void nvme_health_information_page_swapbytes(struct nvme_health_information_page *s) +{ + int i; + + s->temperature = le16toh(s->temperature); + nvme_le128toh((void *)s->data_units_read); + nvme_le128toh((void *)s->data_units_written); + nvme_le128toh((void *)s->host_read_commands); + nvme_le128toh((void *)s->host_write_commands); + nvme_le128toh((void *)s->controller_busy_time); + nvme_le128toh((void *)s->power_cycles); + nvme_le128toh((void *)s->power_on_hours); + nvme_le128toh((void *)s->unsafe_shutdowns); + nvme_le128toh((void *)s->media_errors); + nvme_le128toh((void *)s->num_error_info_log_entries); + s->warning_temp_time = le32toh(s->warning_temp_time); + s->error_temp_time = le32toh(s->error_temp_time); + for (i = 0; i < 8; i++) + s->temp_sensor[i] = le16toh(s->temp_sensor[i]); +} + + +static inline +void nvme_firmware_page_swapbytes(struct nvme_firmware_page *s) +{ + int i; + + for (i = 0; i < 7; i++) + s->revision[i] = le64toh(s->revision[i]); +} + +static inline +void intel_log_temp_stats_swapbytes(struct intel_log_temp_stats *s) +{ + + s->current = le64toh(s->current); + s->overtemp_flag_last = le64toh(s->overtemp_flag_last); + s->overtemp_flag_life = le64toh(s->overtemp_flag_life); + s->max_temp = le64toh(s->max_temp); + s->min_temp = le64toh(s->min_temp); + /* omit _rsvd[] */ + s->max_oper_temp = le64toh(s->max_oper_temp); + s->min_oper_temp = le64toh(s->min_oper_temp); + s->est_offset = le64toh(s->est_offset); +} + #endif /* __NVME_H__ */ diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c index 9f32bc9033a43..b204c91ead11a 100644 --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -40,13 +40,14 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/smp.h> #include <sys/uio.h> +#include <sys/endian.h> #include <dev/pci/pcireg.h> #include <dev/pci/pcivar.h> #include "nvme_private.h" -#define B4_CHK_RDY_DELAY_MS 2300 /* work arond controller bug */ +#define B4_CHK_RDY_DELAY_MS 2300 /* work around controller bug */ static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr, struct nvme_async_event_request *aer); @@ -123,7 +124,8 @@ static int nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr) { struct nvme_qpair *qpair; - union cap_lo_register cap_lo; + uint32_t cap_lo; + uint16_t mqes; int i, error, num_entries, num_trackers; num_entries = NVME_IO_ENTRIES; @@ -134,8 +136,9 @@ nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr) * devices may specify a smaller limit, so we need to check * the MQES field in the capabilities register. */ - cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo); - num_entries = min(num_entries, cap_lo.bits.mqes+1); + cap_lo = nvme_mmio_read_4(ctrlr, cap_lo); + mqes = (cap_lo >> NVME_CAP_LO_REG_MQES_SHIFT) & NVME_CAP_LO_REG_MQES_MASK; + num_entries = min(num_entries, mqes + 1); num_trackers = NVME_IO_TRACKERS; TUNABLE_INT_FETCH("hw.nvme.io_trackers", &num_trackers); @@ -243,19 +246,19 @@ static int nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr, int desired_val) { int ms_waited; - union csts_register csts; + uint32_t csts; - csts.raw = nvme_mmio_read_4(ctrlr, csts); + csts = nvme_mmio_read_4(ctrlr, csts); ms_waited = 0; - while (csts.bits.rdy != desired_val) { + while (((csts >> NVME_CSTS_REG_RDY_SHIFT) & NVME_CSTS_REG_RDY_MASK) != desired_val) { if (ms_waited++ > ctrlr->ready_timeout_in_ms) { nvme_printf(ctrlr, "controller ready did not become %d " "within %d ms\n", desired_val, ctrlr->ready_timeout_in_ms); return (ENXIO); } DELAY(1000); - csts.raw = nvme_mmio_read_4(ctrlr, csts); + csts = nvme_mmio_read_4(ctrlr, csts); } return (0); @@ -264,12 +267,16 @@ nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr, int desired_val) static int nvme_ctrlr_disable(struct nvme_controller *ctrlr) { - union cc_register cc; - union csts_register csts; + uint32_t cc; + uint32_t csts; + uint8_t en, rdy; int err; - cc.raw = nvme_mmio_read_4(ctrlr, cc); - csts.raw = nvme_mmio_read_4(ctrlr, csts); + cc = nvme_mmio_read_4(ctrlr, cc); + csts = nvme_mmio_read_4(ctrlr, csts); + + en = (cc >> NVME_CC_REG_EN_SHIFT) & NVME_CC_REG_EN_MASK; + rdy = (csts >> NVME_CSTS_REG_RDY_SHIFT) & NVME_CSTS_REG_RDY_MASK; /* * Per 3.1.5 in NVME 1.3 spec, transitioning CC.EN from 0 to 1 @@ -277,8 +284,8 @@ nvme_ctrlr_disable(struct nvme_controller *ctrlr) * CSTS.RDY is 0 "has undefined results" So make sure that CSTS.RDY * isn't the desired value. Short circuit if we're already disabled. */ - if (cc.bits.en == 1) { - if (csts.bits.rdy == 0) { + if (en == 1) { + if (rdy == 0) { /* EN == 1, wait for RDY == 1 or fail */ err = nvme_ctrlr_wait_for_ready(ctrlr, 1); if (err != 0) @@ -286,14 +293,14 @@ nvme_ctrlr_disable(struct nvme_controller *ctrlr) } } else { /* EN == 0 already wait for RDY == 0 */ - if (csts.bits.rdy == 0) + if (rdy == 0) return (0); else return (nvme_ctrlr_wait_for_ready(ctrlr, 0)); } - cc.bits.en = 0; - nvme_mmio_write_4(ctrlr, cc, cc.raw); + cc &= ~NVME_CC_REG_EN_MASK; + nvme_mmio_write_4(ctrlr, cc, cc); /* * Some drives have issues with accessing the mmio after we * disable, so delay for a bit after we write the bit to @@ -307,19 +314,24 @@ nvme_ctrlr_disable(struct nvme_controller *ctrlr) static int nvme_ctrlr_enable(struct nvme_controller *ctrlr) { - union cc_register cc; - union csts_register csts; - union aqa_register aqa; - int err; + uint32_t cc; + uint32_t csts; + uint32_t aqa; + uint32_t qsize; + uint8_t en, rdy; + int err; + + cc = nvme_mmio_read_4(ctrlr, cc); + csts = nvme_mmio_read_4(ctrlr, csts); - cc.raw = nvme_mmio_read_4(ctrlr, cc); - csts.raw = nvme_mmio_read_4(ctrlr, csts); + en = (cc >> NVME_CC_REG_EN_SHIFT) & NVME_CC_REG_EN_MASK; + rdy = (csts >> NVME_CSTS_REG_RDY_SHIFT) & NVME_CSTS_REG_RDY_MASK; /* * See note in nvme_ctrlr_disable. Short circuit if we're already enabled. */ - if (cc.bits.en == 1) { - if (csts.bits.rdy == 1) + if (en == 1) { + if (rdy == 1) return (0); else return (nvme_ctrlr_wait_for_ready(ctrlr, 1)); @@ -335,24 +347,28 @@ nvme_ctrlr_enable(struct nvme_controller *ctrlr) nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr); DELAY(5000); - aqa.raw = 0; /* acqs and asqs are 0-based. */ - aqa.bits.acqs = ctrlr->adminq.num_entries-1; - aqa.bits.asqs = ctrlr->adminq.num_entries-1; - nvme_mmio_write_4(ctrlr, aqa, aqa.raw); + qsize = ctrlr->adminq.num_entries - 1; + + aqa = 0; + aqa = (qsize & NVME_AQA_REG_ACQS_MASK) << NVME_AQA_REG_ACQS_SHIFT; + aqa |= (qsize & NVME_AQA_REG_ASQS_MASK) << NVME_AQA_REG_ASQS_SHIFT; + nvme_mmio_write_4(ctrlr, aqa, aqa); DELAY(5000); - cc.bits.en = 1; - cc.bits.css = 0; - cc.bits.ams = 0; - cc.bits.shn = 0; - cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */ - cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */ + /* Initialization values for CC */ + cc = 0; + cc |= 1 << NVME_CC_REG_EN_SHIFT; + cc |= 0 << NVME_CC_REG_CSS_SHIFT; + cc |= 0 << NVME_CC_REG_AMS_SHIFT; + cc |= 0 << NVME_CC_REG_SHN_SHIFT; + cc |= 6 << NVME_CC_REG_IOSQES_SHIFT; /* SQ entry size == 64 == 2^6 */ + cc |= 4 << NVME_CC_REG_IOCQES_SHIFT; /* CQ entry size == 16 == 2^4 */ /* This evaluates to 0, which is according to spec. */ - cc.bits.mps = (PAGE_SIZE >> 13); + cc |= (PAGE_SIZE >> 13) << NVME_CC_REG_MPS_SHIFT; - nvme_mmio_write_4(ctrlr, cc, cc.raw); + nvme_mmio_write_4(ctrlr, cc, cc); return (nvme_ctrlr_wait_for_ready(ctrlr, 1)); } @@ -414,6 +430,9 @@ nvme_ctrlr_identify(struct nvme_controller *ctrlr) return (ENXIO); } + /* Convert data to host endian */ + nvme_controller_data_swapbytes(&ctrlr->cdata); + /* * Use MDTS to ensure our default max_xfer_size doesn't exceed what the * controller supports. @@ -531,8 +550,7 @@ nvme_ctrlr_get_log_page_size(struct nvme_controller *ctrlr, uint8_t page_id) case NVME_LOG_ERROR: log_page_size = min( sizeof(struct nvme_error_information_entry) * - ctrlr->cdata.elpe, - NVME_MAX_AER_LOG_SIZE); + (ctrlr->cdata.elpe + 1), NVME_MAX_AER_LOG_SIZE); break; case NVME_LOG_HEALTH_INFORMATION: log_page_size = sizeof(struct nvme_health_information_page); @@ -550,27 +568,27 @@ nvme_ctrlr_get_log_page_size(struct nvme_controller *ctrlr, uint8_t page_id) static void nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr, - union nvme_critical_warning_state state) + uint8_t state) { - if (state.bits.available_spare == 1) + if (state & NVME_CRIT_WARN_ST_AVAILABLE_SPARE) nvme_printf(ctrlr, "available spare space below threshold\n"); - if (state.bits.temperature == 1) + if (state & NVME_CRIT_WARN_ST_TEMPERATURE) nvme_printf(ctrlr, "temperature above threshold\n"); - if (state.bits.device_reliability == 1) + if (state & NVME_CRIT_WARN_ST_DEVICE_RELIABILITY) nvme_printf(ctrlr, "device reliability degraded\n"); - if (state.bits.read_only == 1) + if (state & NVME_CRIT_WARN_ST_READ_ONLY) nvme_printf(ctrlr, "media placed in read only mode\n"); - if (state.bits.volatile_memory_backup == 1) + if (state & NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP) nvme_printf(ctrlr, "volatile memory backup device failed\n"); - if (state.bits.reserved != 0) + if (state & NVME_CRIT_WARN_ST_RESERVED_MASK) nvme_printf(ctrlr, - "unknown critical warning(s): state = 0x%02x\n", state.raw); + "unknown critical warning(s): state = 0x%02x\n", state); } static void @@ -578,6 +596,8 @@ nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl) { struct nvme_async_event_request *aer = arg; struct nvme_health_information_page *health_info; + struct nvme_error_information_entry *err; + int i; /* * If the log page fetch for some reason completed with an error, @@ -588,6 +608,29 @@ nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl) nvme_notify_async_consumers(aer->ctrlr, &aer->cpl, aer->log_page_id, NULL, 0); else { + /* Convert data to host endian */ + switch (aer->log_page_id) { + case NVME_LOG_ERROR: + err = (struct nvme_error_information_entry *)aer->log_page_buffer; + for (i = 0; i < (aer->ctrlr->cdata.elpe + 1); i++) + nvme_error_information_entry_swapbytes(err++); + break; + case NVME_LOG_HEALTH_INFORMATION: + nvme_health_information_page_swapbytes( + (struct nvme_health_information_page *)aer->log_page_buffer); + break; + case NVME_LOG_FIRMWARE_SLOT: + nvme_firmware_page_swapbytes( + (struct nvme_firmware_page *)aer->log_page_buffer); + break; + case INTEL_LOG_TEMP_STATS: + intel_log_temp_stats_swapbytes( + (struct intel_log_temp_stats *)aer->log_page_buffer); + break; + default: + break; + } + if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) { health_info = (struct nvme_health_information_page *) aer->log_page_buffer; @@ -600,8 +643,8 @@ nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl) * config so that we do not receive repeated * notifications for the same event. */ - aer->ctrlr->async_event_config.raw &= - ~health_info->critical_warning.raw; + aer->ctrlr->async_event_config &= + ~health_info->critical_warning; nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr, aer->ctrlr->async_event_config, NULL, NULL); } @@ -679,7 +722,7 @@ nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr, * nature never be timed out. */ req->timeout = FALSE; - req->cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST; + req->cmd.opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_ASYNC_EVENT_REQUEST); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -690,8 +733,8 @@ nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr) struct nvme_async_event_request *aer; uint32_t i; - ctrlr->async_event_config.raw = 0xFF; - ctrlr->async_event_config.bits.reserved = 0; + ctrlr->async_event_config = 0xFF; + ctrlr->async_event_config &= ~NVME_CRIT_WARN_ST_RESERVED_MASK; status.done = 0; nvme_ctrlr_cmd_get_feature(ctrlr, NVME_FEAT_TEMPERATURE_THRESHOLD, @@ -702,7 +745,7 @@ nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr) (status.cpl.cdw0 & 0xFFFF) == 0xFFFF || (status.cpl.cdw0 & 0xFFFF) == 0x0000) { nvme_printf(ctrlr, "temperature threshold not supported\n"); - ctrlr->async_event_config.bits.temperature = 0; + ctrlr->async_event_config &= ~NVME_CRIT_WARN_ST_TEMPERATURE; } nvme_ctrlr_cmd_set_async_event_config(ctrlr, @@ -907,11 +950,14 @@ static void nvme_pt_done(void *arg, const struct nvme_completion *cpl) { struct nvme_pt_command *pt = arg; + uint16_t status; bzero(&pt->cpl, sizeof(pt->cpl)); pt->cpl.cdw0 = cpl->cdw0; - pt->cpl.status = cpl->status; - pt->cpl.status.p = 0; + + status = cpl->status; + status &= ~NVME_STATUS_P_MASK; + pt->cpl.status = status; mtx_lock(pt->driver_lock); wakeup(pt); @@ -973,20 +1019,24 @@ nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr, } else req = nvme_allocate_request_null(nvme_pt_done, pt); - req->cmd.opc = pt->cmd.opc; - req->cmd.cdw10 = pt->cmd.cdw10; - req->cmd.cdw11 = pt->cmd.cdw11; - req->cmd.cdw12 = pt->cmd.cdw12; - req->cmd.cdw13 = pt->cmd.cdw13; - req->cmd.cdw14 = pt->cmd.cdw14; - req->cmd.cdw15 = pt->cmd.cdw15; + /* Assume userspace already converted to little-endian */ + req->cmd.opc_fuse = pt->cmd.opc_fuse; + req->cmd.cdw10 = pt->cmd.cdw10; + req->cmd.cdw11 = pt->cmd.cdw11; + req->cmd.cdw12 = pt->cmd.cdw12; + req->cmd.cdw13 = pt->cmd.cdw13; + req->cmd.cdw14 = pt->cmd.cdw14; + req->cmd.cdw15 = pt->cmd.cdw15; - req->cmd.nsid = nsid; + req->cmd.nsid = htole32(nsid); if (is_admin_cmd) mtx = &ctrlr->lock; - else + else { + KASSERT((nsid-1) >= 0 && (nsid-1) < NVME_MAX_NAMESPACES, + ("%s: invalid namespace ID %d\n", __func__, nsid)); mtx = &ctrlr->ns[nsid-1].lock; + } mtx_lock(mtx); pt->driver_lock = mtx; @@ -1025,7 +1075,7 @@ nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, break; case NVME_PASSTHROUGH_CMD: pt = (struct nvme_pt_command *)arg; - return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, pt->cmd.nsid, + return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, le32toh(pt->cmd.nsid), 1 /* is_user_buffer */, 1 /* is_admin_cmd */)); default: return (ENOTTY); @@ -1125,9 +1175,12 @@ nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr) int nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev) { - union cap_lo_register cap_lo; - union cap_hi_register cap_hi; - int status, timeout_period; + uint32_t cap_lo; + uint32_t cap_hi; + uint8_t to; + uint8_t dstrd; + uint8_t mpsmin; + int status, timeout_period; ctrlr->dev = dev; @@ -1142,15 +1195,18 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev) * Software emulators may set the doorbell stride to something * other than zero, but this driver is not set up to handle that. */ - cap_hi.raw = nvme_mmio_read_4(ctrlr, cap_hi); - if (cap_hi.bits.dstrd != 0) + cap_hi = nvme_mmio_read_4(ctrlr, cap_hi); + dstrd = (cap_hi >> NVME_CAP_HI_REG_DSTRD_SHIFT) & NVME_CAP_HI_REG_DSTRD_MASK; + if (dstrd != 0) return (ENXIO); - ctrlr->min_page_size = 1 << (12 + cap_hi.bits.mpsmin); + mpsmin = (cap_hi >> NVME_CAP_HI_REG_MPSMIN_SHIFT) & NVME_CAP_HI_REG_MPSMIN_MASK; + ctrlr->min_page_size = 1 << (12 + mpsmin); /* Get ready timeout value from controller, in units of 500ms. */ - cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo); - ctrlr->ready_timeout_in_ms = cap_lo.bits.to * 500; + cap_lo = nvme_mmio_read_4(ctrlr, cap_lo); + to = (cap_lo >> NVME_CAP_LO_REG_TO_SHIFT) & NVME_CAP_LO_REG_TO_MASK; + ctrlr->ready_timeout_in_ms = to * 500; timeout_period = NVME_DEFAULT_TIMEOUT_PERIOD; TUNABLE_INT_FETCH("hw.nvme.timeout_period", &timeout_period); @@ -1249,19 +1305,21 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev) void nvme_ctrlr_shutdown(struct nvme_controller *ctrlr) { - union cc_register cc; - union csts_register csts; - int ticks = 0; + uint32_t cc; + uint32_t csts; + int ticks = 0; + + cc = nvme_mmio_read_4(ctrlr, cc); + cc &= ~(NVME_CC_REG_SHN_MASK << NVME_CC_REG_SHN_SHIFT); + cc |= NVME_SHN_NORMAL << NVME_CC_REG_SHN_SHIFT; + nvme_mmio_write_4(ctrlr, cc, cc); - cc.raw = nvme_mmio_read_4(ctrlr, cc); - cc.bits.shn = NVME_SHN_NORMAL; - nvme_mmio_write_4(ctrlr, cc, cc.raw); - csts.raw = nvme_mmio_read_4(ctrlr, csts); - while ((csts.bits.shst != NVME_SHST_COMPLETE) && (ticks++ < 5*hz)) { + csts = nvme_mmio_read_4(ctrlr, csts); + while ((NVME_CSTS_GET_SHST(csts) != NVME_SHST_COMPLETE) && (ticks++ < 5*hz)) { pause("nvme shn", 1); - csts.raw = nvme_mmio_read_4(ctrlr, csts); + csts = nvme_mmio_read_4(ctrlr, csts); } - if (csts.bits.shst != NVME_SHST_COMPLETE) + if (NVME_CSTS_GET_SHST(csts) != NVME_SHST_COMPLETE) nvme_printf(ctrlr, "did not complete shutdown within 5 seconds " "of notification\n"); } diff --git a/sys/dev/nvme/nvme_ctrlr_cmd.c b/sys/dev/nvme/nvme_ctrlr_cmd.c index f7d0da7d0917a..de0685a3b8cb9 100644 --- a/sys/dev/nvme/nvme_ctrlr_cmd.c +++ b/sys/dev/nvme/nvme_ctrlr_cmd.c @@ -42,13 +42,13 @@ nvme_ctrlr_cmd_identify_controller(struct nvme_controller *ctrlr, void *payload, sizeof(struct nvme_controller_data), cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_IDENTIFY; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_IDENTIFY); /* * TODO: create an identify command data structure, which * includes this CNS bit in cdw10. */ - cmd->cdw10 = 1; + cmd->cdw10 = htole32(1); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -64,12 +64,12 @@ nvme_ctrlr_cmd_identify_namespace(struct nvme_controller *ctrlr, uint32_t nsid, sizeof(struct nvme_namespace_data), cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_IDENTIFY; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_IDENTIFY); /* * TODO: create an identify command data structure */ - cmd->nsid = nsid; + cmd->nsid = htole32(nsid); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -85,16 +85,16 @@ nvme_ctrlr_cmd_create_io_cq(struct nvme_controller *ctrlr, req = nvme_allocate_request_null(cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_CREATE_IO_CQ; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_CREATE_IO_CQ); /* * TODO: create a create io completion queue command data * structure. */ - cmd->cdw10 = ((io_que->num_entries-1) << 16) | io_que->id; + cmd->cdw10 = htole32(((io_que->num_entries-1) << 16) | io_que->id); /* 0x3 = interrupts enabled | physically contiguous */ - cmd->cdw11 = (vector << 16) | 0x3; - cmd->prp1 = io_que->cpl_bus_addr; + cmd->cdw11 = htole32((vector << 16) | 0x3); + cmd->prp1 = htole64(io_que->cpl_bus_addr); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -109,16 +109,16 @@ nvme_ctrlr_cmd_create_io_sq(struct nvme_controller *ctrlr, req = nvme_allocate_request_null(cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_CREATE_IO_SQ; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_CREATE_IO_SQ); /* * TODO: create a create io submission queue command data * structure. */ - cmd->cdw10 = ((io_que->num_entries-1) << 16) | io_que->id; + cmd->cdw10 = htole32(((io_que->num_entries-1) << 16) | io_que->id); /* 0x1 = physically contiguous */ - cmd->cdw11 = (io_que->id << 16) | 0x1; - cmd->prp1 = io_que->cmd_bus_addr; + cmd->cdw11 = htole32((io_que->id << 16) | 0x1); + cmd->prp1 = htole64(io_que->cmd_bus_addr); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -133,13 +133,13 @@ nvme_ctrlr_cmd_delete_io_cq(struct nvme_controller *ctrlr, req = nvme_allocate_request_null(cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_DELETE_IO_CQ; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_DELETE_IO_CQ); /* * TODO: create a delete io completion queue command data * structure. */ - cmd->cdw10 = io_que->id; + cmd->cdw10 = htole32(io_que->id); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -154,13 +154,13 @@ nvme_ctrlr_cmd_delete_io_sq(struct nvme_controller *ctrlr, req = nvme_allocate_request_null(cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_DELETE_IO_SQ; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_DELETE_IO_SQ); /* * TODO: create a delete io submission queue command data * structure. */ - cmd->cdw10 = io_que->id; + cmd->cdw10 = htole32(io_que->id); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -176,9 +176,9 @@ nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr, uint8_t feature, req = nvme_allocate_request_null(cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_SET_FEATURES; - cmd->cdw10 = feature; - cmd->cdw11 = cdw11; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_SET_FEATURES); + cmd->cdw10 = htole32(feature); + cmd->cdw11 = htole32(cdw11); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -194,9 +194,9 @@ nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr, uint8_t feature, req = nvme_allocate_request_null(cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_GET_FEATURES; - cmd->cdw10 = feature; - cmd->cdw11 = cdw11; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_GET_FEATURES); + cmd->cdw10 = htole32(feature); + cmd->cdw11 = htole32(cdw11); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -214,12 +214,11 @@ nvme_ctrlr_cmd_set_num_queues(struct nvme_controller *ctrlr, void nvme_ctrlr_cmd_set_async_event_config(struct nvme_controller *ctrlr, - union nvme_critical_warning_state state, nvme_cb_fn_t cb_fn, - void *cb_arg) + uint8_t state, nvme_cb_fn_t cb_fn, void *cb_arg) { uint32_t cdw11; - cdw11 = state.raw; + cdw11 = state; nvme_ctrlr_cmd_set_feature(ctrlr, NVME_FEAT_ASYNC_EVENT_CONFIGURATION, cdw11, NULL, 0, cb_fn, cb_arg); @@ -261,10 +260,11 @@ nvme_ctrlr_cmd_get_log_page(struct nvme_controller *ctrlr, uint8_t log_page, req = nvme_allocate_request_vaddr(payload, payload_size, cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_GET_LOG_PAGE; - cmd->nsid = nsid; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_GET_LOG_PAGE); + cmd->nsid = htole32(nsid); cmd->cdw10 = ((payload_size/sizeof(uint32_t)) - 1) << 16; cmd->cdw10 |= log_page; + cmd->cdw10 = htole32(cmd->cdw10); nvme_ctrlr_submit_admin_request(ctrlr, req); } @@ -320,8 +320,8 @@ nvme_ctrlr_cmd_abort(struct nvme_controller *ctrlr, uint16_t cid, req = nvme_allocate_request_null(cb_fn, cb_arg); cmd = &req->cmd; - cmd->opc = NVME_OPC_ABORT; - cmd->cdw10 = (cid << 16) | sqid; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_ABORT); + cmd->cdw10 = htole32((cid << 16) | sqid); nvme_ctrlr_submit_admin_request(ctrlr, req); } diff --git a/sys/dev/nvme/nvme_ns.c b/sys/dev/nvme/nvme_ns.c index 2fe1c8b90896a..465d3c014554d 100644 --- a/sys/dev/nvme/nvme_ns.c +++ b/sys/dev/nvme/nvme_ns.c @@ -172,7 +172,14 @@ nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns) uint32_t nvme_ns_get_sector_size(struct nvme_namespace *ns) { - return (1 << ns->data.lbaf[ns->data.flbas.format].lbads); + uint8_t flbas_fmt, lbads; + + flbas_fmt = (ns->data.flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) & + NVME_NS_DATA_FLBAS_FORMAT_MASK; + lbads = (ns->data.lbaf[flbas_fmt] >> NVME_NS_DATA_LBAF_LBADS_SHIFT) & + NVME_NS_DATA_LBAF_LBADS_MASK; + + return (1 << lbads); } uint64_t @@ -265,8 +272,10 @@ nvme_bio_child_inbed(struct bio *parent, int bio_error) inbed = atomic_fetchadd_int(&parent->bio_inbed, 1) + 1; if (inbed == children) { bzero(&parent_cpl, sizeof(parent_cpl)); - if (parent->bio_flags & BIO_ERROR) - parent_cpl.status.sc = NVME_SC_DATA_TRANSFER_ERROR; + if (parent->bio_flags & BIO_ERROR) { + parent_cpl.status &= ~(NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT); + parent_cpl.status |= (NVME_SC_DATA_TRANSFER_ERROR) << NVME_STATUS_SC_SHIFT; + } nvme_ns_bio_done(parent, &parent_cpl); } } @@ -459,10 +468,14 @@ nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp, dsm_range = malloc(sizeof(struct nvme_dsm_range), M_NVME, M_ZERO | M_WAITOK); + if (!dsm_range) { + err = ENOMEM; + break; + } dsm_range->length = - bp->bio_bcount/nvme_ns_get_sector_size(ns); + htole32(bp->bio_bcount/nvme_ns_get_sector_size(ns)); dsm_range->starting_lba = - bp->bio_offset/nvme_ns_get_sector_size(ns); + htole64(bp->bio_offset/nvme_ns_get_sector_size(ns)); bp->bio_driver2 = dsm_range; err = nvme_ns_cmd_deallocate(ns, dsm_range, 1, nvme_ns_bio_done, bp); @@ -483,6 +496,10 @@ nvme_ns_construct(struct nvme_namespace *ns, uint32_t id, { struct nvme_completion_poll_status status; int unit; + uint16_t oncs; + uint8_t dsm; + uint8_t flbas_fmt; + uint8_t vwc_present; ns->ctrlr = ctrlr; ns->id = id; @@ -513,6 +530,9 @@ nvme_ns_construct(struct nvme_namespace *ns, uint32_t id, return (ENXIO); } + /* Convert data to host endian */ + nvme_namespace_data_swapbytes(&ns->data); + /* * If the size of is zero, chances are this isn't a valid * namespace (eg one that's not been configured yet). The @@ -522,20 +542,26 @@ nvme_ns_construct(struct nvme_namespace *ns, uint32_t id, if (ns->data.nsze == 0) return (ENXIO); + flbas_fmt = (ns->data.flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) & + NVME_NS_DATA_FLBAS_FORMAT_MASK; /* * Note: format is a 0-based value, so > is appropriate here, * not >=. */ - if (ns->data.flbas.format > ns->data.nlbaf) { + if (flbas_fmt > ns->data.nlbaf) { printf("lba format %d exceeds number supported (%d)\n", - ns->data.flbas.format, ns->data.nlbaf+1); + flbas_fmt, ns->data.nlbaf + 1); return (ENXIO); } - if (ctrlr->cdata.oncs.dsm) + oncs = ctrlr->cdata.oncs; + dsm = (oncs >> NVME_CTRLR_DATA_ONCS_DSM_SHIFT) & NVME_CTRLR_DATA_ONCS_DSM_MASK; + if (dsm) ns->flags |= NVME_NS_DEALLOCATE_SUPPORTED; - if (ctrlr->cdata.vwc.present) + vwc_present = (ctrlr->cdata.vwc >> NVME_CTRLR_DATA_VWC_PRESENT_SHIFT) & + NVME_CTRLR_DATA_VWC_PRESENT_MASK; + if (vwc_present) ns->flags |= NVME_NS_FLUSH_SUPPORTED; /* diff --git a/sys/dev/nvme/nvme_ns_cmd.c b/sys/dev/nvme/nvme_ns_cmd.c index c9a37c385f9aa..0105e367e952a 100644 --- a/sys/dev/nvme/nvme_ns_cmd.c +++ b/sys/dev/nvme/nvme_ns_cmd.c @@ -126,12 +126,12 @@ nvme_ns_cmd_deallocate(struct nvme_namespace *ns, void *payload, return (ENOMEM); cmd = &req->cmd; - cmd->opc = NVME_OPC_DATASET_MANAGEMENT; - cmd->nsid = ns->id; + cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_DATASET_MANAGEMENT); + cmd->nsid = htole32(ns->id); /* TODO: create a delete command data structure */ - cmd->cdw10 = num_ranges - 1; - cmd->cdw11 = NVME_DSM_ATTR_DEALLOCATE; + cmd->cdw10 = htole32(num_ranges - 1); + cmd->cdw11 = htole32(NVME_DSM_ATTR_DEALLOCATE); nvme_ctrlr_submit_io_request(ns->ctrlr, req); diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index 9064ae10a9022..348737231e890 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -312,8 +312,8 @@ struct nvme_controller { struct cdev *cdev; - /** bit mask of warning types currently enabled for async events */ - union nvme_critical_warning_state async_event_config; + /** bit mask of critical warning types currently enabled for async events */ + uint8_t async_event_config; uint32_t num_aers; struct nvme_async_event_request aer[NVME_MAX_ASYNC_EVENTS]; @@ -339,13 +339,13 @@ struct nvme_controller { bus_space_write_4((sc)->bus_tag, (sc)->bus_handle, \ nvme_mmio_offsetof(reg), val) -#define nvme_mmio_write_8(sc, reg, val) \ +#define nvme_mmio_write_8(sc, reg, val) \ do { \ bus_space_write_4((sc)->bus_tag, (sc)->bus_handle, \ nvme_mmio_offsetof(reg), val & 0xFFFFFFFF); \ bus_space_write_4((sc)->bus_tag, (sc)->bus_handle, \ nvme_mmio_offsetof(reg)+4, \ - (val & 0xFFFFFFFF00000000UL) >> 32); \ + (val & 0xFFFFFFFF00000000ULL) >> 32); \ } while (0); #if __FreeBSD_version < 800054 @@ -399,7 +399,7 @@ void nvme_ctrlr_cmd_set_num_queues(struct nvme_controller *ctrlr, uint32_t num_queues, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_set_async_event_config(struct nvme_controller *ctrlr, - union nvme_critical_warning_state state, + uint8_t state, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_abort(struct nvme_controller *ctrlr, uint16_t cid, uint16_t sqid, nvme_cb_fn_t cb_fn, void *cb_arg); diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c index 8b96f6d5e2644..ad168d31f6df8 100644 --- a/sys/dev/nvme/nvme_qpair.c +++ b/sys/dev/nvme/nvme_qpair.c @@ -110,40 +110,42 @@ static void nvme_admin_qpair_print_command(struct nvme_qpair *qpair, struct nvme_command *cmd) { + uint16_t opc; + opc = le16toh(cmd->opc_fuse) & NVME_CMD_OPC_MASK; nvme_printf(qpair->ctrlr, "%s (%02x) sqid:%d cid:%d nsid:%x " "cdw10:%08x cdw11:%08x\n", - get_admin_opcode_string(cmd->opc), cmd->opc, qpair->id, cmd->cid, - cmd->nsid, cmd->cdw10, cmd->cdw11); + get_admin_opcode_string(opc), opc, qpair->id, cmd->cid, + le32toh(cmd->nsid), le32toh(cmd->cdw10), le32toh(cmd->cdw11)); } static void nvme_io_qpair_print_command(struct nvme_qpair *qpair, struct nvme_command *cmd) { + uint16_t opc; - switch (cmd->opc) { + opc = le16toh(cmd->opc_fuse) & NVME_CMD_OPC_MASK; + switch (opc) { case NVME_OPC_WRITE: case NVME_OPC_READ: case NVME_OPC_WRITE_UNCORRECTABLE: case NVME_OPC_COMPARE: nvme_printf(qpair->ctrlr, "%s sqid:%d cid:%d nsid:%d " "lba:%llu len:%d\n", - get_io_opcode_string(cmd->opc), qpair->id, cmd->cid, - cmd->nsid, - ((unsigned long long)cmd->cdw11 << 32) + cmd->cdw10, - (cmd->cdw12 & 0xFFFF) + 1); + get_io_opcode_string(opc), qpair->id, cmd->cid, le32toh(cmd->nsid), + ((unsigned long long)le32toh(cmd->cdw11) << 32) + le32toh(cmd->cdw10), + (le32toh(cmd->cdw12) & 0xFFFF) + 1); break; case NVME_OPC_FLUSH: case NVME_OPC_DATASET_MANAGEMENT: nvme_printf(qpair->ctrlr, "%s sqid:%d cid:%d nsid:%d\n", - get_io_opcode_string(cmd->opc), qpair->id, cmd->cid, - cmd->nsid); + get_io_opcode_string(opc), qpair->id, cmd->cid, le32toh(cmd->nsid)); break; default: nvme_printf(qpair->ctrlr, "%s (%02x) sqid:%d cid:%d nsid:%d\n", - get_io_opcode_string(cmd->opc), cmd->opc, qpair->id, - cmd->cid, cmd->nsid); + get_io_opcode_string(opc), opc, qpair->id, + cmd->cid, le32toh(cmd->nsid)); break; } } @@ -245,26 +247,37 @@ static void nvme_qpair_print_completion(struct nvme_qpair *qpair, struct nvme_completion *cpl) { + uint16_t sct, sc; + + sct = NVME_STATUS_GET_SCT(cpl->status); + sc = NVME_STATUS_GET_SC(cpl->status); + nvme_printf(qpair->ctrlr, "%s (%02x/%02x) sqid:%d cid:%d cdw0:%x\n", - get_status_string(cpl->status.sct, cpl->status.sc), - cpl->status.sct, cpl->status.sc, cpl->sqid, cpl->cid, cpl->cdw0); + get_status_string(sct, sc), sct, sc, cpl->sqid, cpl->cid, + cpl->cdw0); } static boolean_t nvme_completion_is_retry(const struct nvme_completion *cpl) { + uint8_t sct, sc, dnr; + + sct = NVME_STATUS_GET_SCT(cpl->status); + sc = NVME_STATUS_GET_SC(cpl->status); + dnr = NVME_STATUS_GET_DNR(cpl->status); + /* * TODO: spec is not clear how commands that are aborted due * to TLER will be marked. So for now, it seems * NAMESPACE_NOT_READY is the only case where we should * look at the DNR bit. */ - switch (cpl->status.sct) { + switch (sct) { case NVME_SCT_GENERIC: - switch (cpl->status.sc) { + switch (sc) { case NVME_SC_ABORTED_BY_REQUEST: case NVME_SC_NAMESPACE_NOT_READY: - if (cpl->status.dnr) + if (dnr) return (0); else return (1); @@ -359,9 +372,9 @@ nvme_qpair_manual_complete_tracker(struct nvme_qpair *qpair, memset(&cpl, 0, sizeof(cpl)); cpl.sqid = qpair->id; cpl.cid = tr->cid; - cpl.status.sct = sct; - cpl.status.sc = sc; - cpl.status.dnr = dnr; + cpl.status |= (sct & NVME_STATUS_SCT_MASK) << NVME_STATUS_SCT_SHIFT; + cpl.status |= (sc & NVME_STATUS_SC_MASK) << NVME_STATUS_SC_SHIFT; + cpl.status |= (dnr & NVME_STATUS_DNR_MASK) << NVME_STATUS_DNR_SHIFT; nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); } @@ -375,8 +388,8 @@ nvme_qpair_manual_complete_request(struct nvme_qpair *qpair, memset(&cpl, 0, sizeof(cpl)); cpl.sqid = qpair->id; - cpl.status.sct = sct; - cpl.status.sc = sc; + cpl.status |= (sct & NVME_STATUS_SCT_MASK) << NVME_STATUS_SCT_SHIFT; + cpl.status |= (sc & NVME_STATUS_SC_MASK) << NVME_STATUS_SC_SHIFT; error = nvme_completion_is_error(&cpl); @@ -395,7 +408,7 @@ void nvme_qpair_process_completions(struct nvme_qpair *qpair) { struct nvme_tracker *tr; - struct nvme_completion *cpl; + struct nvme_completion cpl; qpair->num_intr_handler_calls++; @@ -409,20 +422,24 @@ nvme_qpair_process_completions(struct nvme_qpair *qpair) return; while (1) { - cpl = &qpair->cpl[qpair->cq_head]; + cpl = qpair->cpl[qpair->cq_head]; + + /* Convert to host endian */ + nvme_completion_swapbytes(&cpl); - if (cpl->status.p != qpair->phase) + if (NVME_STATUS_GET_P(cpl.status) != qpair->phase) break; - tr = qpair->act_tr[cpl->cid]; + tr = qpair->act_tr[cpl.cid]; if (tr != NULL) { - nvme_qpair_complete_tracker(qpair, tr, cpl, TRUE); - qpair->sq_head = cpl->sqhd; + nvme_qpair_complete_tracker(qpair, tr, &cpl, TRUE); + qpair->sq_head = cpl.sqhd; } else { nvme_printf(qpair->ctrlr, "cpl does not map to outstanding cmd\n"); - nvme_dump_completion(cpl); + /* nvme_dump_completion expects device endianess */ + nvme_dump_completion(&qpair->cpl[qpair->cq_head]); KASSERT(0, ("received completion for unknown cmd\n")); } @@ -629,7 +646,7 @@ nvme_admin_qpair_abort_aers(struct nvme_qpair *qpair) tr = TAILQ_FIRST(&qpair->outstanding_tr); while (tr != NULL) { - if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) { + if ((le16toh(tr->req->cmd.opc_fuse) & NVME_CMD_OPC_MASK) == NVME_OPC_ASYNC_EVENT_REQUEST) { nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC, NVME_SC_ABORTED_SQ_DELETION, 0, FALSE); @@ -685,12 +702,14 @@ nvme_timeout(void *arg) struct nvme_tracker *tr = arg; struct nvme_qpair *qpair = tr->qpair; struct nvme_controller *ctrlr = qpair->ctrlr; - union csts_register csts; + uint32_t csts; + uint8_t cfs; /* Read csts to get value of cfs - controller fatal status. */ - csts.raw = nvme_mmio_read_4(ctrlr, csts); + csts = nvme_mmio_read_4(ctrlr, csts); - if (ctrlr->enable_aborts && csts.bits.cfs == 0) { + cfs = (csts >> NVME_CSTS_REG_CFS_SHIFT) & NVME_CSTS_REG_CFS_MASK; + if (ctrlr->enable_aborts && cfs == 0) { /* * If aborts are enabled, only use them if the controller is * not reporting fatal status. @@ -759,16 +778,16 @@ nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error) * we can safely just transfer each segment to its * associated PRP entry. */ - tr->req->cmd.prp1 = seg[0].ds_addr; + tr->req->cmd.prp1 = htole64(seg[0].ds_addr); if (nseg == 2) { - tr->req->cmd.prp2 = seg[1].ds_addr; + tr->req->cmd.prp2 = htole64(seg[1].ds_addr); } else if (nseg > 2) { cur_nseg = 1; - tr->req->cmd.prp2 = (uint64_t)tr->prp_bus_addr; + tr->req->cmd.prp2 = htole64((uint64_t)tr->prp_bus_addr); while (cur_nseg < nseg) { tr->prp[cur_nseg-1] = - (uint64_t)seg[cur_nseg].ds_addr; + htole64((uint64_t)seg[cur_nseg].ds_addr); cur_nseg++; } } else { |
