cembeddedbootloaderzephyr-rtos

MCUboot loses pending update image in slot 1 after reset


Update:

I got MCUboot logging working and found the error: mcuboot: Image in the secondary slot is not valid!

It isn't obvious to me why the image isn't valid.

[00:00:10.516,021] ␛[0m<dbg> spi_nrfx_spim: spi_context_update_tx: tx buf/len 0x20005177/1␛[0m
[00:00:10.517,211] ␛[0m<dbg> spi_nrfx_spim: spi_context_update_rx: rx buf/len 0x20005177/1␛[0m
[00:00:10.518,432] ␛[0m<dbg> spi_nrfx_spim: spi_context_update_tx: tx buf/len (nil)/0␛[0m
[00:00:10.519,592] ␛[0m<dbg> spi_nrfx_spim: spi_context_update_rx: rx buf/len (nil)/0␛[0m
[00:00:10.520,721] ␛[0m<dbg> spi_nrfx_spim: finish_transaction: Transaction finished with status 0␛[0m
[00:00:10.522,033] ␛[0m<dbg> spi_nrfx_spim: spi_context_buffers_setup: tx_bufs 0x20005140 - rx_bufs (nil) - 1␛[0m
[00:00:10.523,620] ␛[0m<dbg> spi_nrfx_spim: spi_context_buffers_setup: current_tx 0x20005150 (1), current_rx (nil) (0), tx buf/len 0x20005138/1, rx buf/len (nil)/0␛[0m
[00:00:10.525,878] ␛[0m<dbg> spi_nrfx_spim: spi_context_update_tx: tx buf/len (nil)/0␛[0m
[00:00:10.527,038] ␛[0m<dbg> spi_nrfx_spim: finish_transaction: Transaction finished with status 0␛[0m
[00:00:10.528,320] ␛[1;31m<err> mcuboot: Image in the secondary slot is not valid!␛[0m
[00:00:10.583,068] ␛[0m<inf> mcuboot: Bootloader chainload address offset: 0x20000␛[0m
[00:00:10.584,259] ␛[0m<inf> mcuboot: Jumping to the first image slot␛[0m

############

I'm trying to download and flash an MCUboot image at runtime. All goes well, the image is flashed to the secondary slot, confirmed, and marked for upgrade. The secondary slot is in external spi flash.

On reboot MCUboot has no knowledge of the secondary image and boots back into the primary image. What could be going wrong here? Something to do with metadata or encryption?

Stack trace BEFORE reset:

I: Image index: 0, Swap type: none
D: mcuboot_swap_type: 1
D: writing magic; fa_id=1 off=0xd7ff0 (0xd7ff0)
D: writing swap_info; fa_id=1 off=0xd7fd8 (0xd7fd8), swap_type=0x2 image_num=0x0
D: 
Image info: mcuboot_primary
I: Image index: 0, Swap type: test
I: MCUboot swap type: 2
I: Image Version 0.3.0-0
I: Image is confirmed OK
D: 
Image info: mcuboot_secondary
I: Image index: 0, Swap type: test
I: MCUboot swap type: 2
I: Image Version 0.3.1-0
I: Image is confirmed OK

Stack trace AFTER reset:

*** Booting nRF Connect SDK v2.5.2 ***
I: Reset Reason 2, Flags:
W: RESET_SOFTWARE
D: 
Image info: mcuboot_primary
I: Image index: 0, Swap type: none
I: MCUboot swap type: 1
I: Image Version 0.3.0-0
I: Image is confirmed OK
D: 
Image info: mcuboot_secondary
I: Image index: 0, Swap type: none
I: MCUboot swap type: 1
I: Image Version 76.146.8194-224105
I: Image is confirmed OK

My mcuboot.conf:

# Core should be on a clean state when TF-M starts
CONFIG_MCUBOOT_CLEANUP_ARM_CORE=y

# Enable echo command
CONFIG_BOOT_MGMT_ECHO=y

# Increase MCUboot image sectors and set static partition size
CONFIG_BOOT_MAX_IMG_SECTORS=256
CONFIG_PM_PARTITION_SIZE_MCUBOOT=0x20000

# Enable flash operations
CONFIG_FLASH=y

# Enable serial recovery
CONFIG_MCUBOOT_SERIAL=y
CONFIG_BOOT_SERIAL_UART=y
CONFIG_MCUBOOT_SERIAL_DIRECT_IMAGE_UPLOAD=y
CONFIG_BOOT_SERIAL_WAIT_FOR_DFU=y
CONFIG_BOOT_SERIAL_WAIT_FOR_DFU_TIMEOUT=1000
CONFIG_BOOT_SERIAL_IMG_GRP_IMAGE_STATE=y

# SPI Flash
CONFIG_SPI=y
CONFIG_NVS=y
CONFIG_PM_EXTERNAL_FLASH_BASE=0x0
CONFIG_SPI_NOR=y
CONFIG_SPI_NOR_FLASH_LAYOUT_PAGE_SIZE=4096
CONFIG_PM_EXTERNAL_FLASH_MCUBOOT_SECONDARY=y
CONFIG_PM_OVERRIDE_EXTERNAL_DRIVER_CHECK=y

# Erase Flash Progressively
CONFIG_BOOT_ERASE_PROGRESSIVELY=y

The code:

#include "fota.h"

#include <zephyr/device.h>
#include <zephyr/dfu/flash_img.h>
#include <zephyr/dfu/mcuboot.h>
#include <zephyr/kernel.h>
#include <zephyr/logging/log.h>
#include <zephyr/stats/stats.h>
#include <zephyr/storage/flash_map.h>

#include "custom_http_client.h"
#include "pm_config.h"

LOG_MODULE_REGISTER(fota, LOG_LEVEL_DBG);

#define STRINGIZE(arg) #arg
#define STRINGIZE_VALUE(arg) STRINGIZE(arg)
#define PM_MCUBOOT_PRIMARY_STRING STRINGIZE_VALUE(PM_MCUBOOT_PRIMARY_NAME)
#define PM_MCUBOOT_SECONDARY_STRING STRINGIZE_VALUE(PM_MCUBOOT_SECONDARY_NAME)

BUILD_ASSERT(
    FIXED_PARTITION_EXISTS(PM_MCUBOOT_SECONDARY_NAME),
    "Missing " PM_MCUBOOT_SECONDARY_STRING
    " fixed partition. Secondary slot partition is required!"
);

struct flash_img_context ctx;

void image_info(uint8_t area_id) {
  int rc;
  char buf[BOOT_IMG_VER_STRLEN_MAX];
  struct mcuboot_img_header header;

  boot_read_bank_header(area_id, &header, sizeof(header));
  snprintk(
      buf, sizeof(buf), "%d.%d.%d-%d", header.h.v1.sem_ver.major,
      header.h.v1.sem_ver.minor, header.h.v1.sem_ver.revision,
      header.h.v1.sem_ver.build_num
  );
  LOG_INF("MCUboot swap type: %d", mcuboot_swap_type());
  LOG_INF("Image Version %s", buf);
  rc = boot_is_img_confirmed();
  LOG_INF("Image is%s confirmed OK", rc ? "" : " not");
}

int write_buffer_to_flash(char *data, size_t len, _Bool flush) {
  int rc;
  if (flush) {
    rc = flash_img_buffered_write(&ctx, data, len, true);
  } else {
    rc = flash_img_buffered_write(&ctx, data, len, false);
  }

  LOG_DBG("Flash img bytes written: %d", flash_img_bytes_written(&ctx));

  return rc;
}

void download_update(void) {
  int rc;

  char headers_buf[1024];
  char write_buf[CONFIG_IMG_BLOCK_BUF_SIZE];

  rc = flash_img_init_id(&ctx, PM_MCUBOOT_SECONDARY_ID);
  if (rc < 0) {
    LOG_ERR("Failed to init stream flash");
  }

  (void)http_get_firmware(
      write_buf, sizeof(write_buf), headers_buf, sizeof(headers_buf)
  );

  LOG_DBG("mcuboot_swap_type: %d", mcuboot_swap_type());

  rc = boot_request_upgrade(BOOT_UPGRADE_TEST);
  if (rc < 0) {
    LOG_ERR("Failed to REQUEST FIRMWARE UPGRADE");
  }

  LOG_DBG("\nImage info: " PM_MCUBOOT_PRIMARY_STRING);
  (void)image_info(PM_MCUBOOT_PRIMARY_ID);

  LOG_DBG("\nImage info: " PM_MCUBOOT_SECONDARY_STRING);
  (void)image_info(PM_MCUBOOT_SECONDARY_ID);
}

Solution

  • I have confirmed the issue was due to an improperly signed image generated by my CI build.

    The image wasn't signed correctly because I forgot to ensure the correct NRF python dependencies were installed in the container.

    I'm using the zephyrproject-rtos/ci-base which has the Zephyr python dependencies preinstalled but not the dependencies required for Nordic's version of MCUboot.

    Adding this step to my CI workflow solved my issue:

    - name: Update Python dependencies
        run: |
          pip install -r zephyr/scripts/requirements.txt
          pip install -r nrf/scripts/requirements.txt
          pip install -r bootloader/mcuboot/scripts/requirements.txt