From 232dd8f2e18b0f41303874390aa50dca898fa703 Mon Sep 17 00:00:00 2001 From: Swapna Samoju Date: Mon, 23 Feb 2026 15:52:37 +0530 Subject: [PATCH] micron: add smart-log and id-ctrl commands Add smart-log command to retrieve SMART/Health log with vendor-specific OLEC and IPM fields. Add id-ctrl command to display Identify Controller with vendor-specific PMS, IPMSR, and MSMT fields. Signed-off-by: Swapna Samoju --- Documentation/cmd-plugins.txt | 6 + Documentation/meson.build | 2 + Documentation/nvme-micron-id-ctrl.txt | 59 +++++ Documentation/nvme-micron-smart-log.txt | 54 ++++ completions/_nvme | 2 + completions/bash-nvme-completion.sh | 3 +- plugins/micron/micron-nvme.c | 326 ++++++++++++++++++++++++ plugins/micron/micron-nvme.h | 3 + 8 files changed, 454 insertions(+), 1 deletion(-) create mode 100644 Documentation/nvme-micron-id-ctrl.txt create mode 100644 Documentation/nvme-micron-smart-log.txt diff --git a/Documentation/cmd-plugins.txt b/Documentation/cmd-plugins.txt index 334203733e..a925f1ce4a 100644 --- a/Documentation/cmd-plugins.txt +++ b/Documentation/cmd-plugins.txt @@ -46,6 +46,12 @@ linknvme:nvme-micron-smart-add-log[1]:: linknvme:nvme-micron-temperature-stats[1]:: Retrieves temperature information of given micron device +linknvme:nvme-micron-smart-log[1]:: + Retrieve SMART/Health log with Micron vendor-specific fields + +linknvme:nvme-micron-id-ctrl[1]:: + Send NVMe Identify Controller with Micron vendor-specific fields + linknvme:nvme-ocp-internal-log[1]:: Retrieves and parses OCP Telemetry DA1 and DA2 logs. diff --git a/Documentation/meson.build b/Documentation/meson.build index 671f8caffe..50bda5eff2 100644 --- a/Documentation/meson.build +++ b/Documentation/meson.build @@ -77,11 +77,13 @@ adoc_sources = [ 'nvme-lockdown', 'nvme-mi-cmd-support-effects-log', 'nvme-micron-clear-pcie-errors', + 'nvme-micron-id-ctrl', 'nvme-micron-internal-log', 'nvme-micron-nand-stats', 'nvme-micron-pcie-stats', 'nvme-micron-selective-download', 'nvme-micron-smart-add-log', + 'nvme-micron-smart-log', 'nvme-micron-temperature-stats', 'nvme-netapp-ontapdevices', 'nvme-netapp-smdevices', diff --git a/Documentation/nvme-micron-id-ctrl.txt b/Documentation/nvme-micron-id-ctrl.txt new file mode 100644 index 0000000000..b5b4edfe1f --- /dev/null +++ b/Documentation/nvme-micron-id-ctrl.txt @@ -0,0 +1,59 @@ +nvme-micron-id-ctrl(1) +====================== + +NAME +---- +nvme-micron-id-ctrl - Send NVMe Identify Controller with Micron vendor-specific fields + +SYNOPSIS +-------- +[verse] +'nvme micron id-ctrl' [--output-format= | -o ] + +DESCRIPTION +----------- +For the NVMe device given, sends an identify controller command and +provides the result with Micron vendor-specific fields decoded. + +The parameter is mandatory and may be either the NVMe character +device (ex: /dev/nvme0), or a namespace block device (ex: /dev/nvme0n1). + +This command extends the standard Identify Controller output with +Micron-specific fields: + +PMS (Power Measurement Support):: + Bit 21 of CTRATT (Controller Attributes). + Indicates whether the controller supports power measurement. + +IPMSR (Interval Power Measurement Sample Rate):: + Located at bytes 392-393 in the Identify Controller reserved area. + Specifies the sample rate for interval power measurement. + +MSMT (Maximum Stop Measurement Time):: + Located at bytes 394-395 in the Identify Controller reserved area. + Specifies the maximum time to stop power measurement. + +OPTIONS +------- +-o :: +--output-format=:: + Set the reporting format to 'normal' or 'json'. + Default is 'normal'. + +EXAMPLES +-------- +* Display Identify Controller in human-readable format: ++ +------------ +# nvme micron id-ctrl /dev/nvme0 +------------ + +* Display Identify Controller in JSON format: ++ +------------ +# nvme micron id-ctrl /dev/nvme0 -o json +------------ + +NVME +---- +Part of the nvme-user suite diff --git a/Documentation/nvme-micron-smart-log.txt b/Documentation/nvme-micron-smart-log.txt new file mode 100644 index 0000000000..34e9351c0c --- /dev/null +++ b/Documentation/nvme-micron-smart-log.txt @@ -0,0 +1,54 @@ +nvme-micron-smart-log(1) +======================== + +NAME +---- +nvme-micron-smart-log - Retrieve SMART/Health log with Micron vendor-specific fields + +SYNOPSIS +-------- +[verse] +'nvme micron smart-log' [--output-format= | -o ] + +DESCRIPTION +----------- +For the NVMe device given, retrieves the SMART/Health Information log page +and displays it with Micron vendor-specific fields decoded. + +The parameter is mandatory and may be either the NVMe character +device (ex: /dev/nvme0), or a namespace block device (ex: /dev/nvme0n1). + +This command extends the standard SMART log with Micron-specific fields: + +OLEC (Operational Lifetime Energy Consumed):: + Located at bytes 232-239 in the SMART log reserved area. + Reports cumulative energy consumed by the device. + +IPM (Interval Power Measurement):: + Located at bytes 240-243 in the SMART log reserved area. + Reports power measurement over sampling interval. + +OPTIONS +------- +-o :: +--output-format=:: + Set the reporting format to 'normal' or 'json'. + Default is 'normal'. + +EXAMPLES +-------- +* Display SMART log in human-readable format: ++ +------------ +# nvme micron smart-log /dev/nvme0 +------------ + +* Display SMART log in JSON format: ++ +------------ +# nvme micron smart-log /dev/nvme0 -o json +------------ + +NVME +---- +Part of the nvme-user suite diff --git a/completions/_nvme b/completions/_nvme index 9ee30d421d..0f0097fb4b 100644 --- a/completions/_nvme +++ b/completions/_nvme @@ -2972,6 +2972,8 @@ _nvme () { clear-fw-activate-history':Clear FW activation history' vs-smbus-option':Enable/Disable SMBUS on the drive' ocp-telemetry-log-parse':Parse OCP Telemetry DA1 and DA2 logs' + smart-log':Retrieve SMART/Health log with Micron vendor-specific fields' + id-ctrl':Identify Controller with Micron vendor-specific fields' help':Display this help' ) _arguments '*:: :->subcmds' diff --git a/completions/bash-nvme-completion.sh b/completions/bash-nvme-completion.sh index b96b4c0e88..ea48565907 100644 --- a/completions/bash-nvme-completion.sh +++ b/completions/bash-nvme-completion.sh @@ -1713,7 +1713,8 @@ _nvme_subcmds () { vs-drive-info plugin-version cloud-SSD-plugin-version \ log-page-directory vs-fw-activate-history \ vs-error-reason-identifier vs-smart-add-log \ - clear-fw-activate-history vs-smbus-option ocp-telemetry-log-parse" + clear-fw-activate-history vs-smbus-option ocp-telemetry-log-parse \ + smart-log id-ctrl" [seagate]="vs-temperature-stats vs-log-page-sup \ vs-smart-add-log vs-pcie-stats clear-pcie-correctable-errors \ get-host-tele get-ctrl-tele vs-internal-log \ diff --git a/plugins/micron/micron-nvme.c b/plugins/micron/micron-nvme.c index 0585331702..f3224385c4 100644 --- a/plugins/micron/micron-nvme.c +++ b/plugins/micron/micron-nvme.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,7 @@ #include "nvme-print.h" #include "util/cleanup.h" #include "util/utils.h" +#include "util/types.h" #define CREATE_CMD #include "micron-nvme.h" @@ -4178,3 +4180,327 @@ static int micron_cloud_log(int argc, char **argv, struct command *acmd, nvme_show_status(err); return err; } + +/* Extended SMART log structure with Micron-specific fields in reserved area */ +struct micron_smart_log_ext { + struct nvme_smart_log base; + /* Access vendor-specific fields via rsvd232 overlay */ +}; + +/* + * OLEC: bytes 232-239 (rsvd232[0:7]) + * IPM: bytes 240-243 (rsvd232[8:11]) + */ +#define SMART_OLEC_OFFSET 0 +#define SMART_IPM_OFFSET 8 + +static inline __u64 get_smart_olec(struct nvme_smart_log *smart) +{ + return le64_to_cpu(*(__le64 *)&smart->rsvd232[SMART_OLEC_OFFSET]); +} + +static inline __u32 get_smart_ipm(struct nvme_smart_log *smart) +{ + return le32_to_cpu(*(__le32 *)&smart->rsvd232[SMART_IPM_OFFSET]); +} + +static void print_micron_health_log_normal(struct nvme_smart_log *smart, + const char *devname) +{ + __u16 temp = smart->temperature[1] << 8 | smart->temperature[0]; + __u64 olec = get_smart_olec(smart); + __u32 ipm = get_smart_ipm(smart); + int i; + + printf("SMART/Health Information Log for %s\n", devname); + printf("========================================\n"); + + printf("Critical Warning : 0x%02x\n", + smart->critical_warning); + if (smart->critical_warning) { + if (smart->critical_warning & 0x01) + printf(" - Available spare below threshold\n"); + if (smart->critical_warning & 0x02) + printf(" - Temperature threshold exceeded\n"); + if (smart->critical_warning & 0x04) + printf(" - NVM subsystem reliability degraded\n"); + if (smart->critical_warning & 0x08) + printf(" - Media placed in read-only mode\n"); + if (smart->critical_warning & 0x10) + printf(" - Volatile memory backup failed\n"); + if (smart->critical_warning & 0x20) + printf(" - PMR read-only or unreliable\n"); + } + + printf("Composite Temperature : %u K (%d C)\n", + temp, temp ? temp - 273 : 0); + printf("Available Spare : %u%%\n", smart->avail_spare); + printf("Available Spare Threshold : %u%%\n", smart->spare_thresh); + printf("Percentage Used : %u%%\n", smart->percent_used); + printf("Endurance Grp Critical Warn : 0x%02x\n", + smart->endu_grp_crit_warn_sumry); + + printf("Data Units Read : %s\n", + uint128_t_to_string(le128_to_cpu(smart->data_units_read))); + printf("Data Units Written : %s\n", + uint128_t_to_string(le128_to_cpu(smart->data_units_written))); + printf("Host Read Commands : %s\n", + uint128_t_to_string(le128_to_cpu(smart->host_reads))); + printf("Host Write Commands : %s\n", + uint128_t_to_string(le128_to_cpu(smart->host_writes))); + printf("Controller Busy Time : %s min\n", + uint128_t_to_string(le128_to_cpu(smart->ctrl_busy_time))); + printf("Power Cycles : %s\n", + uint128_t_to_string(le128_to_cpu(smart->power_cycles))); + printf("Power On Hours : %s\n", + uint128_t_to_string(le128_to_cpu(smart->power_on_hours))); + printf("Unsafe Shutdowns : %s\n", + uint128_t_to_string(le128_to_cpu(smart->unsafe_shutdowns))); + printf("Media Errors : %s\n", + uint128_t_to_string(le128_to_cpu(smart->media_errors))); + printf("Num Error Log Entries : %s\n", + uint128_t_to_string(le128_to_cpu(smart->num_err_log_entries))); + + printf("Warning Comp Temp Time : %u min\n", + le32_to_cpu(smart->warning_temp_time)); + printf("Critical Comp Temp Time : %u min\n", + le32_to_cpu(smart->critical_comp_time)); + + for (i = 0; i < 8; i++) { + __u16 ts = le16_to_cpu(smart->temp_sensor[i]); + + if (ts) + printf("Temperature Sensor %d : %u K (%d C)\n", + i + 1, ts, ts - 273); + } + + printf("Thm Temp 1 Trans Count : %u\n", + le32_to_cpu(smart->thm_temp1_trans_count)); + printf("Thm Temp 2 Trans Count : %u\n", + le32_to_cpu(smart->thm_temp2_trans_count)); + printf("Thm Temp 1 Total Time : %u sec\n", + le32_to_cpu(smart->thm_temp1_total_time)); + printf("Thm Temp 2 Total Time : %u sec\n", + le32_to_cpu(smart->thm_temp2_total_time)); + + /* Micron-specific extended fields */ + printf("OLEC (Energy) : %llu\n", + (unsigned long long)olec); + printf("Interval Power Measurement : %u\n", ipm); +} + +static void print_micron_health_log_json(struct nvme_smart_log *smart, + const char *devname) +{ + __u16 temp = smart->temperature[1] << 8 | smart->temperature[0]; + __u64 olec = get_smart_olec(smart); + __u32 ipm = get_smart_ipm(smart); + struct json_object *root; + int i; + + root = json_create_object(); + + json_object_add_value_string(root, "device", devname); + json_object_add_value_int(root, "critical_warning", + smart->critical_warning); + json_object_add_value_int(root, "temperature_kelvin", temp); + json_object_add_value_int(root, "temperature_celsius", + temp ? temp - 273 : 0); + json_object_add_value_int(root, "avail_spare", smart->avail_spare); + json_object_add_value_int(root, "spare_thresh", smart->spare_thresh); + json_object_add_value_int(root, "percent_used", smart->percent_used); + json_object_add_value_int(root, "endurance_grp_crit_warn", + smart->endu_grp_crit_warn_sumry); + + json_object_add_value_string(root, "data_units_read", + uint128_t_to_string(le128_to_cpu(smart->data_units_read))); + json_object_add_value_string(root, "data_units_written", + uint128_t_to_string(le128_to_cpu(smart->data_units_written))); + json_object_add_value_string(root, "host_reads", + uint128_t_to_string(le128_to_cpu(smart->host_reads))); + json_object_add_value_string(root, "host_writes", + uint128_t_to_string(le128_to_cpu(smart->host_writes))); + json_object_add_value_string(root, "ctrl_busy_time", + uint128_t_to_string(le128_to_cpu(smart->ctrl_busy_time))); + json_object_add_value_string(root, "power_cycles", + uint128_t_to_string(le128_to_cpu(smart->power_cycles))); + json_object_add_value_string(root, "power_on_hours", + uint128_t_to_string(le128_to_cpu(smart->power_on_hours))); + json_object_add_value_string(root, "unsafe_shutdowns", + uint128_t_to_string(le128_to_cpu(smart->unsafe_shutdowns))); + json_object_add_value_string(root, "media_errors", + uint128_t_to_string(le128_to_cpu(smart->media_errors))); + json_object_add_value_string(root, "num_err_log_entries", + uint128_t_to_string(le128_to_cpu(smart->num_err_log_entries))); + + json_object_add_value_uint(root, "warning_temp_time", + le32_to_cpu(smart->warning_temp_time)); + json_object_add_value_uint(root, "critical_comp_time", + le32_to_cpu(smart->critical_comp_time)); + + for (i = 0; i < 8; i++) { + __u16 ts = le16_to_cpu(smart->temp_sensor[i]); + char key[32]; + + if (ts) { + sprintf(key, "temp_sensor_%d", i + 1); + json_object_add_value_int(root, key, ts - 273); + } + } + + json_object_add_value_uint(root, "thm_temp1_trans_count", + le32_to_cpu(smart->thm_temp1_trans_count)); + json_object_add_value_uint(root, "thm_temp2_trans_count", + le32_to_cpu(smart->thm_temp2_trans_count)); + json_object_add_value_uint(root, "thm_temp1_total_time", + le32_to_cpu(smart->thm_temp1_total_time)); + json_object_add_value_uint(root, "thm_temp2_total_time", + le32_to_cpu(smart->thm_temp2_total_time)); + + /* Micron-specific extended fields */ + json_object_add_value_uint64(root, "olec", olec); + json_object_add_value_uint(root, "ipm", ipm); + + json_print_object(root, NULL); + printf("\n"); + json_free_object(root); +} + +static int micron_health_info(int argc, char **argv, struct command *acmd, + struct plugin *plugin) +{ + _cleanup_nvme_global_ctx_ struct nvme_global_ctx *ctx = NULL; + _cleanup_nvme_transport_handle_ struct nvme_transport_handle *hdl = NULL; + const char *desc = "Retrieve SMART/Health log for Micron drives"; + const char *fmt = "output format normal|json"; + enum eDriveModel eModel = UNKNOWN_MODEL; + struct nvme_smart_log smart_log = { 0 }; + bool is_json = false; + int err = 0; + struct format { + char *fmt; + }; + struct format cfg = { + .fmt = "normal", + }; + + NVME_ARGS(opts, + OPT_FMT("format", 'f', &cfg.fmt, fmt)); + + err = micron_parse_options(&ctx, &hdl, argc, argv, desc, opts, &eModel); + if (err < 0) + return err; + + if (eModel == UNKNOWN_MODEL) + fprintf(stderr, "WARNING: Unknown drive model\n"); + + if (!strcmp(cfg.fmt, "json")) + is_json = true; + + err = nvme_get_log_smart(hdl, NVME_NSID_ALL, &smart_log); + if (err) { + fprintf(stderr, "Failed to get SMART log: %s\n", + nvme_strerror(err)); + return err; + } + + if (is_json) + print_micron_health_log_json(&smart_log, argv[optind]); + else + print_micron_health_log_normal(&smart_log, argv[optind]); + + return 0; +} + +/* + * Identify Controller field offsets for Micron-specific fields + * IPMSR: Interval Power Measurement Sample Rate (2 bytes) + * MSMT: Maximum Stop Measurement Time (2 bytes) + * PMS: Power Measurement Support - bit 21 of CTRATT + */ +#define ID_CTRL_RSVD388_OFFSET 388 +#define ID_CTRL_IPMSR_OFFSET 392 +#define ID_CTRL_MSMT_OFFSET 394 +#define CTRATT_PMS_BIT 21 + +static inline __u16 get_id_ctrl_ipmsr(struct nvme_id_ctrl *ctrl) +{ + __u8 *p = &ctrl->rsvd388[ID_CTRL_IPMSR_OFFSET - ID_CTRL_RSVD388_OFFSET]; + + return le16_to_cpu(*(__le16 *)p); +} + +static inline __u16 get_id_ctrl_msmt(struct nvme_id_ctrl *ctrl) +{ + __u8 *p = &ctrl->rsvd388[ID_CTRL_MSMT_OFFSET - ID_CTRL_RSVD388_OFFSET]; + + return le16_to_cpu(*(__le16 *)p); +} + +static inline bool get_id_ctrl_pms(struct nvme_id_ctrl *ctrl) +{ + return (le32_to_cpu(ctrl->ctratt) >> CTRATT_PMS_BIT) & 0x1; +} + +/* Micron vendor-specific id-ctrl fields display */ +static void micron_id_ctrl_vs(__u8 *vs, struct json_object *root) +{ + /* Cast back to get full ctrl structure for our extended fields */ + struct nvme_id_ctrl *ctrl = + (struct nvme_id_ctrl *)(vs - offsetof(struct nvme_id_ctrl, vs)); + __u16 ipmsr = get_id_ctrl_ipmsr(ctrl); + __u16 msmt = get_id_ctrl_msmt(ctrl); + bool pms = get_id_ctrl_pms(ctrl); + + if (root) { + /* JSON output */ + json_object_add_value_int(root, "pms", pms ? 1 : 0); + json_object_add_value_uint(root, "ipmsr", ipmsr); + json_object_add_value_uint(root, "msmt", msmt); + } else { + /* Normal output */ + printf("pms : %u\n", pms ? 1 : 0); + printf("ipmsr : %u\n", ipmsr); + printf("msmt : %u\n", msmt); + } +} + +static int micron_id_ctrl(int argc, char **argv, struct command *acmd, + struct plugin *plugin) +{ + _cleanup_nvme_global_ctx_ struct nvme_global_ctx *ctx = NULL; + _cleanup_nvme_transport_handle_ struct nvme_transport_handle *hdl = NULL; + const char *desc = "Identify Controller with Micron vendor fields"; + enum eDriveModel eModel = UNKNOWN_MODEL; + struct nvme_id_ctrl ctrl = { 0 }; + nvme_print_flags_t flags; + int err = 0; + + NVME_ARGS(opts); + + err = micron_parse_options(&ctx, &hdl, argc, argv, desc, opts, &eModel); + if (err < 0) + return err; + + if (eModel == UNKNOWN_MODEL) { + fprintf(stderr, + "WARNING: Drive not recognized as Micron, proceeding anyway\n"); + } + + err = validate_output_format(nvme_args.output_format, &flags); + if (err < 0) { + fprintf(stderr, "Invalid output format\n"); + return err; + } + + err = nvme_identify_ctrl(hdl, &ctrl); + if (err) { + fprintf(stderr, "identify controller failed: %s\n", + nvme_strerror(err)); + return err; + } + + nvme_show_id_ctrl(&ctrl, flags, micron_id_ctrl_vs); + + return 0; +} diff --git a/plugins/micron/micron-nvme.h b/plugins/micron/micron-nvme.h index c6b3b37dda..1fd90bf8c3 100644 --- a/plugins/micron/micron-nvme.h +++ b/plugins/micron/micron-nvme.h @@ -46,6 +46,9 @@ PLUGIN(NAME("micron", "Micron vendor specific extensions", NVME_VERSION), ENTRY("vs-work-load-log", "Retrieve Workload logs", micron_work_load_log) ENTRY("vs-vendor-telemetry-log", "Retrieve Vendor Telemetry logs", micron_vendor_telemetry_log) + ENTRY("smart-log", "Retrieve SMART/Health Log", + micron_health_info) + ENTRY("id-ctrl", "Identify Controller", micron_id_ctrl) ) );