Skip to content

Commit

Permalink
bertprobe: fix failures and logging
Browse files Browse the repository at this point in the history
Fix a reported problem: #170

This patch fixes the case when there is no error reported via BERT data
and yet the probe fails with error such as:
"Failed to read payload from: /sys/firmware/acpi/tables/data/BERT"
Which then results in journalprobe reporting:
"bert-probe.service: Main process exited, code=exited, status=1/FAILURE"

The file BERT data file now has root only read permissions:

 $ ls -al /sys/firmware/acpi/tables/data/BERT
-r-------- 1 root root 32768 Nov 25 07:37 /sys/firmware/acpi/tables/data/BERT

Hence the user "telemetry" cannot read it anymore. In addition, the file size
can be more than max payload size (4k). However, the file can be completely
blank. The probe inspects the block_status for any present errors.
If there aren't any, probe does not report anything.

While in there, for consistency reasons, modified "printf" statements with
error messages to telem_log(LOG_ERR, msg)

Signed-off-by: Juro Bystricky <[email protected]>
  • Loading branch information
jurobystricky authored and alexjch committed Nov 25, 2019
1 parent 49823ea commit 72ea0d9
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 8 deletions.
2 changes: 1 addition & 1 deletion src/data/bert-probe.service.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ ConditionPathExists=/sys/firmware/acpi/tables/data/BERT

[Service]
ExecStart=@bindir@/bertprobe
User=telemetry
User=root

[Install]
WantedBy=multi-user.target
90 changes: 83 additions & 7 deletions src/probes/bert_probe.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,77 @@ void print_usage(char *prog)
printf(" -V, --version Print the program version\n");
}

struct acpi_generic_error_data_entry {
uint8_t section_type[16];
uint32_t error_severity;
uint16_t revision;
uint8_t validation_bits;
uint8_t flags;
uint32_t error_data_length;
uint8_t fru_ld[16];
uint8_t fru_text[16];
uint64_t timestamp;
uint8_t data[1]; // error_data_length
} __attribute__((packed));

struct acpi_generic_error_status_block {
// Bit [0] - Uncorrectable Error Valid
// Bit [1] - Correctable Error Valid
// Bit [2] - Multiple Uncorrectable Errors
// Bit [3] - Multiple Correctable Errors
// Bit [13:4] - Error Data Entry Count: Number of Error Data Entries found in the Data section.
// Bit [31:14] - Reserved
uint32_t block_status;
uint32_t raw_data_offset;
// Length in bytes of the generic error data
uint32_t data_length;
uint32_t error_severity;
// The information contained in this field is a collection of zero
// or more Generic Error Data Entrie
struct acpi_generic_error_data_entry data[];
} __attribute__((packed));

static int check_bert_file(const char *filename, char *buff, size_t buff_size) {

int ret = EXIT_FAILURE;
size_t len;
uint32_t block_status;
FILE *fh = fopen(filename, "rb");

if (fh == NULL) {
telem_log(LOG_ERR, "Failed to open: %s\n", filename);
goto done;
}

len = fread(&block_status, 1, sizeof(block_status), fh);
if (ferror(fh) || (len != sizeof(block_status))){
telem_log(LOG_ERR, "Error reading block_status: %s\n", filename);
goto done;
}

/* Check the "Error Data Entry Count", if 0 then nothing
* to report */

if ((block_status & 0x3FFF) != 0) {
rewind(fh);

/* nc_b64enc_file returns 1 in success and 0 in failure */
if (nc_b64enc_file(fh, buff, buff_size) == 0) {
telem_log(LOG_ERR, "Failed to read payload from: %s\n", filename);
goto done;
}
}

ret = EXIT_SUCCESS;
done:
if (fh) {
fclose(fh);
}

return ret;
}


int main(int argc, char **argv)
{
struct telem_ref *tm_handle = NULL;
Expand Down Expand Up @@ -79,29 +150,34 @@ int main(int argc, char **argv)

payload = calloc(sizeof(char), MAX_PAYLOAD_LENGTH);
if (!payload) {
printf("Unable to allocate more memory.\n");
telem_log(LOG_ERR, "Unable to allocate more memory.\n");
return -ENOMEM;
}

if (!(ret = nc_b64enc_filename(bert_record_file, payload, MAX_PAYLOAD_LENGTH))) {
printf("Failed to read payload from: %s\n", bert_record_file);
ret = EXIT_FAILURE;
ret = check_bert_file(bert_record_file, payload, MAX_PAYLOAD_LENGTH);
if (ret == EXIT_FAILURE) {
goto done;
} else {
if (payload[0] == 0) {
/* Nothing to report */
goto done;
}
}

/* Valid payload, send the record */
if ((ret = tm_create_record(&tm_handle, severity, classification,
payload_version)) < 0) {
printf("Failed to create record: %s\n", strerror(-ret));
telem_log(LOG_ERR, "Failed to create record: %s\n", strerror(-ret));
goto done;
}

if ((ret = tm_set_payload(tm_handle, payload)) < 0) {
printf("Failed to set record payload: %s\n", strerror(-ret));
telem_log(LOG_ERR, "Failed to set record payload: %s\n", strerror(-ret));
goto done;
}

if ((ret = tm_send_record(tm_handle)) < 0) {
printf("Failed to send record to daemon: %s\n", strerror(-ret));
telem_log(LOG_ERR, "Failed to send record to daemon: %s\n", strerror(-ret));
goto done;
}

Expand Down

0 comments on commit 72ea0d9

Please sign in to comment.