diff --git a/.github/workflows/test-configs.yml b/.github/workflows/test-configs.yml index a84cde0ccc..88784d60a8 100644 --- a/.github/workflows/test-configs.yml +++ b/.github/workflows/test-configs.yml @@ -182,12 +182,6 @@ jobs: arch: ppc config-file: ./config/examples/nxp-t1024.config - nxp_t2080_68ppc2_test: - uses: ./.github/workflows/test-build.yml - with: - arch: ppc - config-file: ./config/examples/nxp-t2080-68ppc2.config - nxp_t2080_test: uses: ./.github/workflows/test-build.yml with: diff --git a/arch.mk b/arch.mk index 10f4c6a0a1..13f9f88984 100644 --- a/arch.mk +++ b/arch.mk @@ -627,12 +627,19 @@ endif ifeq ($(ARCH),PPC) CROSS_COMPILE?=powerpc-linux-gnu- LDFLAGS+=-Wl,--build-id=none - CFLAGS+=-DARCH_PPC -DFAST_MEMCPY + CFLAGS+=-DARCH_PPC -DFAST_MEMCPY -ffreestanding -fno-tree-loop-distribute-patterns ifeq ($(DEBUG_UART),0) CFLAGS+=-fno-builtin-printf endif + # Target-specific CPU flags + ifeq ($(TARGET),nxp_t2080) + CFLAGS+=-mcpu=e6500 -mno-altivec -mbss-plt + else ifeq ($(TARGET),nxp_t1024) + CFLAGS+=-mcpu=e5500 + endif + # Prune unused functions and data CFLAGS+=-ffunction-sections -fdata-sections LDFLAGS+=-Wl,--gc-sections @@ -994,8 +1001,11 @@ ifeq ($(TARGET),nxp_t2080) LDFLAGS+=$(ARCH_FLAGS) LDFLAGS+=-Wl,--hash-style=both # generate both sysv and gnu symbol hash table LDFLAGS+=-Wl,--as-needed # remove weak functions not used - UPDATE_OBJS:=src/update_ram.o + OBJS+=src/boot_ppc_mp.o # support for spin table OBJS+=src/fdt.o + OBJS+=src/pci.o + CFLAGS+=-DWOLFBOOT_USE_PCI + UPDATE_OBJS:=src/update_ram.o endif ifeq ($(TARGET),nxp_p1021) diff --git a/config/examples/nxp-t2080-68ppc2.config b/config/examples/nxp-t2080-68ppc2.config deleted file mode 100644 index c4e2cbb01d..0000000000 --- a/config/examples/nxp-t2080-68ppc2.config +++ /dev/null @@ -1,55 +0,0 @@ -# NAII 68PPC2 NXP T2080 wolfBoot Configuration Template - -ARCH=PPC -TARGET=nxp_t2080 -SIGN?=ECC384 -HASH?=SHA384 -IMAGE_HEADER_SIZE?=512 -DEBUG?=0 -DEBUG_UART?=1 -VTOR?=1 -CORTEX_M0?=0 -NO_ASM?=0 -EXT_FLASH?=0 -SPI_FLASH?=0 -NO_XIP?=0 -UART_FLASH?=0 -ALLOW_DOWNGRADE?=0 -NVM_FLASH_WRITEONCE?=0 -WOLFBOOT_VERSION?=0 -NO_MPU?=0 -SPMATH?=0 -SPMATHALL?=1 -RAM_CODE?=1 -DUALBANK_SWAP?=0 -WOLFTPM?=0 - -# NOR Base Address -ARCH_FLASH_OFFSET?=0xE8000000 - -# Flash Sector Size -WOLFBOOT_SECTOR_SIZE=0x10000 - -# wolfBoot start address -WOLFBOOT_ORIGIN=0xEFF40000 -# wolfBoot partition size (custom) -BOOTLOADER_PARTITION_SIZE=0x20000 - -# Application Partition Size -WOLFBOOT_PARTITION_SIZE?=0xA00000 -# Location in Flash for Application Partition -WOLFBOOT_PARTITION_BOOT_ADDRESS?=0xE8080000 -# Load Partition to RAM Address -WOLFBOOT_LOAD_ADDRESS?=0x19000 - -# Location in Flash for Update Partition -WOLFBOOT_PARTITION_UPDATE_ADDRESS?=0xE8A80000 - -# Location of temporary sector used during updates -WOLFBOOT_PARTITION_SWAP_ADDRESS?=0xE8060000 - -# DTS (Device Tree) -WOLFBOOT_DTS_BOOT_ADDRESS?=0xE8040000 -WOLFBOOT_DTS_UPDATE_ADDRESS?=0xE8050000 -# DTS Load to RAM Address -WOLFBOOT_LOAD_DTS_ADDRESS?=0x40000 diff --git a/config/examples/nxp-t2080.config b/config/examples/nxp-t2080.config index 190b99bd30..76fe7e4634 100644 --- a/config/examples/nxp-t2080.config +++ b/config/examples/nxp-t2080.config @@ -1,8 +1,21 @@ +# NXP T2080 wolfBoot Configuration +# Default board: T2080 RDB (66.66 MHz oscillator, DDR3L SODIMM) +# +# Board selection: uncomment exactly one line to override the default. +# Default (no define): T2080 RDB (66.66 MHz oscillator, DDR3L SODIMM) +# BOARD_CW_VPX3152: CW VPX3-152 (66.667 MHz oscillator, DDR3L) +# BOARD_NAII_68PPC2: NAII 68PPC2 (100 MHz oscillator, 8GB DDR3) +# +#CFLAGS_EXTRA+=-DBOARD_CW_VPX3152 +#CFLAGS_EXTRA+=-DBOARD_NAII_68PPC2 + ARCH=PPC TARGET=nxp_t2080 SIGN?=ECC384 HASH?=SHA384 +IMAGE_HEADER_SIZE?=512 DEBUG?=0 +DEBUG_SYMBOLS?=1 DEBUG_UART?=1 VTOR?=1 CORTEX_M0?=0 @@ -15,27 +28,46 @@ ALLOW_DOWNGRADE?=0 NVM_FLASH_WRITEONCE?=0 WOLFBOOT_VERSION?=0 NO_MPU?=0 -SPMATH?=0 -SPMATHALL?=1 -RAM_CODE?=0 +SPMATH?=1 +SPMATHALL?=0 +RAM_CODE?=1 DUALBANK_SWAP?=0 -PKA?=1 WOLFTPM?=0 -WOLFBOOT_ORIGIN?=0xEFFF0000 -WOLFBOOT_PARTITION_SIZE?=0x20000 +OPTIMIZATION_LEVEL?=1 + +# NOR Base Address +ARCH_FLASH_OFFSET?=0xEFFE0000 + +# Flash Sector Size WOLFBOOT_SECTOR_SIZE?=0x10000 -ARCH_FLASH_OFFSET?=0xEFFF0000 -BOOTLOADER_PARTITION_SIZE=0x10000 +# wolfBoot start address +WOLFBOOT_ORIGIN?=0xEFFE0000 +# wolfBoot partition size (custom) +BOOTLOADER_PARTITION_SIZE=0x20000 -WOLFBOOT_PARTITION_BOOT_ADDRESS?=0xEFFD0000 +# Application Partition Size +WOLFBOOT_PARTITION_SIZE?=0x100000 +# Location in Flash for Application Partition +WOLFBOOT_PARTITION_BOOT_ADDRESS?=0xEFEE0000 +# Load Partition to RAM Address WOLFBOOT_LOAD_ADDRESS?=0x19000 -WOLFBOOT_PARTITION_UPDATE_ADDRESS?=0xEFFB0000 + +# Location in Flash for Update Partition +WOLFBOOT_PARTITION_UPDATE_ADDRESS?=0xEFDE0000 # Location of temporary sector used during updates -WOLFBOOT_PARTITION_SWAP_ADDRESS?=0xEFFA0000 +WOLFBOOT_PARTITION_SWAP_ADDRESS?=0xEFDD0000 # DTS (Device Tree) WOLFBOOT_DTS_BOOT_ADDRESS?=0xE8040000 WOLFBOOT_DTS_UPDATE_ADDRESS?=0xE8050000 +# DTS Load to RAM Address WOLFBOOT_LOAD_DTS_ADDRESS?=0x200000 + +# Flash erase/write/read test at update partition address +#TEST_FLASH?=1 + +# wolfCrypt Test and Benchmark (requires larger partition size) +#WOLFCRYPT_TEST?=1 +#WOLFCRYPT_BENCHMARK?=1 diff --git a/docs/Targets.md b/docs/Targets.md index 3217652c0d..e0f8e22ed2 100644 --- a/docs/Targets.md +++ b/docs/Targets.md @@ -3065,9 +3065,8 @@ Flash factory_custom.bin to NOR base 0xEC00_0000 The NXP QorIQ T2080 is a PPC e6500 based processor (four cores). Support has been tested with the NAII 68PPC2. -Example configurations for this target are provided in: -* NXP T2080: [/config/examples/nxp-t2080.config](/config/examples/nxp-t2080.config). -* NAII 68PPC2: [/config/examples/nxp-t2080-68ppc2.config](/config/examples/nxp-t2080-68ppc2.config). +Example configuration: [/config/examples/nxp-t2080.config](/config/examples/nxp-t2080.config). +Stock layout is default; for NAII 68PPC2, uncomment the "# NAII 68PPC2:" lines and comment the stock lines. ### Design NXP T2080 PPC @@ -3075,18 +3074,104 @@ The QorIQ requires a Reset Configuration Word (RCW) to define the boot parameter The flash boot entry point is `0xEFFFFFFC`, which is an offset jump to wolfBoot initialization boot code. Initially the PowerPC core enables only a 4KB region to execute from. The initialization code (`src/boot_ppc_start.S`) sets the required CCSR and TLB for memory addressing and jumps to wolfBoot `main()`. -RM 4.3.3 Boot Space Translation +#### Boot Sequence and Hardware Constraints + +**Memory Hierarchy:** + +``` +CPU Core → L1 (32KB I + 32KB D) → L2 (256KB/cluster, shared by 4 cores) + → CoreNet Fabric → CPC (2MB, SRAM or L3 cache) + → DDR Controller → DDR SDRAM + → IFC Controller → NOR Flash +``` + +Each core begins execution at effective address `0x0_FFFF_FFFC` with a single +4KB MMU page (RM 4.3.3). The assembly startup (`boot_ppc_start.S`) configures +TLBs, caches, and stack before jumping to C code. + +**Cold Boot Stack (L1 Locked D-Cache)** + +CPC SRAM is unreliable for stores on cold power-on — L1 dirty-line evictions +through CoreNet to CPC cause bus errors (silent CPU checkstop with `MSR[ME]=0`). +The fix (matching U-Boot) uses L1 locked D-cache as the initial 16KB stack: +`dcbz` allocates cache lines without bus reads, `dcbtls` locks them so they +are never evicted. The locked lines at `L1_CACHE_ADDR` (0xF8E00000) are +entirely core-local. After DDR init in `hal_init()`, the stack relocates to +DDR and the CPC switches from SRAM to L3 cache mode. + +**Flash TLB and XIP** + +The flash TLB uses `MAS2_W | MAS2_G` (Write-Through + Guarded) during XIP +boot, allowing L1 I-cache to cache instruction fetches while preventing +speculative prefetch to the IFC. C code switches to `MAS2_I | MAS2_G` during +flash write/erase (command mode), then `MAS2_M` for full caching afterward. + +**RAMFUNCTION Constraints** + +The NAII 68PPC2 NOR flash (two S29GL01GS x8 in parallel, 16-bit bus) enters +command mode bank-wide — instruction fetches during program/erase return status +data instead of code. All flash write/erase functions are marked `RAMFUNCTION`, +placed in `.ramcode`, copied to DDR, and remapped via TLB9. Key rules: + +- **No calls to flash-resident code.** The linker generates trampolines that + jump back to flash addresses. Any helper called from RAMFUNCTION code must + itself be RAMFUNCTION or fully inlined. Delay/clock helpers (for example, + `udelay` and associated clock accessors) are provided by `nxp_ppc.c` and + are marked `RAMFUNCTION` so they can be safely invoked without executing + from flash `.text`. +- **Inline TLB/cache ops.** `hal_flash_cache_disable/enable` use + `set_tlb()` / `write_tlb()` (inline `mtspr` helpers) and direct + L1CSR0/L1CSR1 manipulation. +- **WBP timing.** The write-buffer-program sequence (unlock → 0x25 → count → + data → 0x29) must execute without bus-stalling delays. UART output between + steps (~87us per character at 115200) triggers DQ1 abort. +- **WBP abort recovery.** Plain `AMD_CMD_RESET` (0xF0) is ignored in + WBP-abort state; the full unlock + 0xF0 sequence is required. + +**Multi-Core (ENABLE_MP)** -"When each core comes out of reset, its MMU has one 4 KB page defined at 0x0_FFFF_Fnnn. Each core begins execution with the instruction at effective address 0x0_FFFF_FFFC. To get this instruction, the core's first instruction fetch is a burst read of boot code from effective address 0x0_FFFF_FFC0." +The e6500 L2 cache is per-cluster (shared by all 4 cores). Secondary cores +must skip L2 flash-invalidate (L2FI) since the primary core already +initialized the shared L2; they only set L1 stash ID via L1CSR2. + +**e6500 64-bit GPR** + +The e6500 has 64-bit GPRs even in 32-bit mode. `lis` sign-extends to 64 bits, +producing incorrect values for addresses >= 0x80000000 (e.g., `lis r3, 0xEFFE` +→ `0xFFFFFFFF_EFFE0000`), causing TLB misses on `blr`. The `LOAD_ADDR32` +macro (`li reg, 0` + `oris` + `ori`) avoids this for all address loads. + +**MSR Configuration** + +After the stack is established: `MSR[CE|ME|DE|RI]` — critical interrupt, +machine check (exceptions instead of checkstop), debug, and recoverable +interrupt enable. Branch prediction (BUCSR) is deferred to `hal_init()` after +DDR stack relocation. + +**UART Debug Checkpoints (`DEBUG_UART=1`)** + +Assembly startup emits characters to UART0 (0xFE11C500, 115200 baud): + +``` +1 - CPC invalidate start A - L2 cluster enable start +2 - CPC invalidate done B - L2 cluster enabled +3 - CPC SRAM configured E - L1 cache setup +4 - SRAM LAW configured F - L1 I-cache enabled +5 - Flash TLB configured G - L1 D-cache enabled +6 - CCSRBAR TLB configured D - Stack ready (L1 locked cache) +7 - SRAM TLB configured Z - About to jump to C code +8 - CPC enabled +``` ### Building wolfBoot for NXP T2080 PPC By default wolfBoot will use `powerpc-linux-gnu-` cross-compiler prefix. These tools can be installed with the Debian package `gcc-powerpc-linux-gnu` (`sudo apt install gcc-powerpc-linux-gnu`). The `make` creates a `factory.bin` image that can be programmed at `0xE8080000` +(For NAII 68PPC2, first edit `nxp-t2080.config` to uncomment the NAII 68PPC2 lines.) ``` -cp ./config/examples/nxp-t2080-68ppc2.config .config +cp ./config/examples/nxp-t2080.config .config make clean make keytools make @@ -3129,26 +3214,32 @@ Flash Layout (with files): Or program the `factory.bin` to `0xE8080000` -Example Boot Debug Output: +Example Boot Debug Output (with `DEBUG_UART=1`): ``` wolfBoot Init -Part: Active 0, Address E8080000 -Image size 1028 +Build: Mar 3 2026 13:22:20 +IFC CSPR0: 0x141 (WP set) +Ramcode: copied 5584 bytes to DDR, TLB9 remapped +CPC: Released SRAM, full 2MB L3 CPC cache enabled +Flash: caching enabled (L1+L2+CPC) +MP: Starting cores (boot page 0x7FFFF000, spin table 0x7FFFE100) +Versions: Boot 1, Update 0 +Trying Boot partition at 0xEFFC0000 +Boot partition: 0xEFFC0000 (sz 3468, ver 0x1, type 0x601) +Checking integrity...done +Verifying signature...done +Successfully selected image in part: 0 Firmware Valid -Loading 1028 bytes to RAM at 19000 -Failed parsing DTB to load. -Booting at 19000 +Copying image from 0xEFFC0200 to RAM at 0x19000 (3468 bytes) +Failed parsing DTB to load +Booting at 0x19000 +FDT: Invalid header! -1 Test App 0x00000001 0x00000002 0x00000003 -0x00000004 -0x00000005 -0x00000006 -0x00000007 -... ``` #### Flash Programming with Lauterbach diff --git a/hal/nxp_p1021.c b/hal/nxp_p1021.c index 7b173e9575..646f98c1d2 100644 --- a/hal/nxp_p1021.c +++ b/hal/nxp_p1021.c @@ -34,16 +34,11 @@ #define ENABLE_BUS_CLK_CALC #ifndef BUILD_LOADER_STAGE1 - /* Tests */ - #if 0 - #define TEST_DDR - #define TEST_TPM - #endif #define ENABLE_PCIE #define ENABLE_CPLD /* Board Configuration and Status Registers (BCSR) */ #define ENABLE_CONF_IO #define ENABLE_QE /* QUICC Engine */ - #if defined(WOLFBOOT_TPM) || defined(TEST_TPM) + #if defined(WOLFBOOT_TPM) #define ENABLE_ESPI /* SPI for TPM */ #endif #define ENABLE_MP /* multi-core support */ @@ -51,14 +46,6 @@ /* #define ENABLE_QE_CRC32 */ /* CRC32 check on QE disabled by default */ #endif -/* Foward declarations */ -#if defined(ENABLE_DDR) && defined(TEST_DDR) -static int test_ddr(void); -#endif -#if defined(ENABLE_ESPI) && defined(TEST_TPM) -static int test_tpm(void); -#endif - #ifdef ENABLE_ESPI #include "spi_drv.h" /* for transfer flags and chip select */ #endif @@ -488,11 +475,7 @@ static uint32_t flash_idx; int ext_flash_read(uintptr_t address, uint8_t *data, int len); #endif -/* generic share NXP QorIQ driver code */ -#include "nxp_ppc.c" - - -/* local functions */ +/* P1021 bus clock: reads GUTS_PORPLLSR (different from E5500/E6500 CLOCKING regs) */ #ifdef ENABLE_BUS_CLK_CALC static uint32_t hal_get_bus_clk(void) { @@ -508,53 +491,16 @@ static uint32_t hal_get_bus_clk(void) #define hal_get_bus_clk() (uint32_t)(SYS_CLK * 6) #endif +/* E500 uses bus-clock based delay (not TIMEBASE like E5500/E6500) */ #define DELAY_US (hal_get_bus_clk() / 1000000) static void udelay(uint32_t delay_us) { wait_ticks(delay_us * DELAY_US); } -#if 0 /* useful timer code */ - -uint64_t hal_timer_ms(void) -{ - uint64_t val; - /* time base is updated every 8 CCB clocks */ - uint64_t cntfrq = hal_get_bus_clk() / 8; - uint64_t cntpct = get_ticks(); - val = (cntpct * 1000ULL) / cntfrq; - return val; -} - -/* example usage */ -//uint64_t start = hal_get_tick_count(); -// do some work -//wolfBoot_printf("done (%lu ms)\n", (uint32_t)hal_elapsed_time_ms(start)); - -/* Calculate elapsed time in milliseconds, handling timer overflow properly */ -uint64_t hal_elapsed_time_ms(uint64_t start_ticks) -{ - uint64_t current_ticks, elapsed_ticks; - uint64_t cntfrq = hal_get_bus_clk() / 8; - - current_ticks = get_ticks(); - - /* Handle timer overflow using unsigned arithmetic - * This works correctly even if the timer has rolled over, - * as long as the elapsed time is less than the full timer range - */ - elapsed_ticks = current_ticks - start_ticks; - - /* Convert elapsed ticks to milliseconds */ - return (elapsed_ticks * 1000ULL) / cntfrq; -} +/* generic share NXP QorIQ driver code (uart_init/uart_write use hal_get_bus_clk above) */ +#include "nxp_ppc.c" -/* Get current tick count for use with hal_elapsed_time_ms() */ -uint64_t hal_get_tick_count(void) -{ - return get_ticks(); -} -#endif /* ---- eSPI Driver ---- */ #ifdef ENABLE_ESPI @@ -673,47 +619,7 @@ void hal_espi_deinit(void) } #endif /* ENABLE_ESPI */ -/* ---- DUART Driver ---- */ -#ifdef DEBUG_UART - -void uart_init(void) -{ - /* calc divisor for UART - * baud rate = CCSRBAR frequency ÷ (16 x [UDMB||UDLB]) - */ - /* compute UART divisor - round up */ - uint32_t div = (hal_get_bus_clk() + (16/2 * BAUD_RATE)) / (16 * BAUD_RATE); - - while (!(get8(UART_LSR(UART_SEL)) & UART_LSR_TEMT)) - ; - - /* set ier, fcr, mcr */ - set8(UART_IER(UART_SEL), 0); - set8(UART_FCR(UART_SEL), (UART_FCR_TFR | UART_FCR_RFR | UART_FCR_FEN)); - - /* enable baud rate access (DLAB=1) - divisor latch access bit*/ - set8(UART_LCR(UART_SEL), (UART_LCR_DLAB | UART_LCR_WLS)); - /* set divisor */ - set8(UART_DLB(UART_SEL), (div & 0xff)); - set8(UART_DMB(UART_SEL), ((div>>8) & 0xff)); - /* disable rate access (DLAB=0) */ - set8(UART_LCR(UART_SEL), (UART_LCR_WLS)); -} - -void uart_write(const char* buf, uint32_t sz) -{ - uint32_t pos = 0; - while (sz-- > 0) { - char c = buf[pos++]; - if (c == '\n') { /* handle CRLF */ - while ((get8(UART_LSR(UART_SEL)) & UART_LSR_THRE) == 0); - set8(UART_THR(UART_SEL), '\r'); - } - while ((get8(UART_LSR(UART_SEL)) & UART_LSR_THRE) == 0); - set8(UART_THR(UART_SEL), c); - } -} -#endif /* DEBUG_UART */ +/* uart_init and uart_write are provided by nxp_ppc.c shared code */ /* ---- eLBC Driver ---- */ #ifdef ENABLE_ELBC diff --git a/hal/nxp_ppc.c b/hal/nxp_ppc.c index 203a655ec1..e925f74fa8 100644 --- a/hal/nxp_ppc.c +++ b/hal/nxp_ppc.c @@ -21,3 +21,89 @@ /* This file gets directly included from nxp_ targets. * This file contains shared driver code for all NXP QorIQ platforms */ + +/* RAMFUNCTION is defined by image.h (included by targets that need RAM_CODE). + * Provide an empty fallback for targets that do not use RAM_CODE (e.g. T1024/P1021). */ +#ifndef RAMFUNCTION +#define RAMFUNCTION +#endif + +/* ---- E5500/E6500 clock helpers and udelay ---- + * CLOCKING_PLLCNGSR and CLOCKING_PLLPGSR must be defined by the including + * target before this file is reached (e.g. via nxp_t2080.h / nxp_t1024.c). + * SYS_CLK must be the oscillator input frequency (e.g. 100 MHz). */ +#if defined(CORE_E5500) || defined(CORE_E6500) +#ifdef ENABLE_BUS_CLK_CALC +static uint32_t hal_get_core_clk(void) +{ + /* compute core clock: system_input * (CGA_PLL1_RAT / 2) */ + uint32_t core_clk; + uint32_t core_ratio = get32(CLOCKING_PLLCNGSR(0)); + core_ratio = ((core_ratio >> 1) & 0x3F); + core_clk = SYS_CLK * core_ratio; + return core_clk; +} +static uint32_t RAMFUNCTION hal_get_plat_clk(void) +{ + /* compute platform clock: system_input * (SYS_PLL_RAT / 2) */ + uint32_t plat_clk; + uint32_t plat_ratio = get32(CLOCKING_PLLPGSR); + plat_ratio = ((plat_ratio >> 1) & 0x1F); + plat_clk = SYS_CLK * plat_ratio; + return plat_clk; +} +static uint32_t hal_get_bus_clk(void) +{ + return hal_get_plat_clk() / 2; +} +#endif /* ENABLE_BUS_CLK_CALC */ + +#define TIMEBASE_CLK_DIV 16 +#define TIMEBASE_HZ (hal_get_plat_clk() / TIMEBASE_CLK_DIV) +#define DELAY_US (TIMEBASE_HZ / 1000000) +static void RAMFUNCTION udelay(uint32_t delay_us) +{ + wait_ticks((unsigned long long)delay_us * DELAY_US); +} +#endif /* CORE_E5500 || CORE_E6500 */ + +/* ---- Shared PC16552D-compatible DUART driver ---- + * Each target must define before including this file: + * UART_SEL, BAUD_RATE, UART_THR(n), UART_IER(n), UART_FCR(n), + * UART_LCR(n), UART_DLB(n), UART_DMB(n), UART_LSR(n), + * UART_FCR_TFR, UART_FCR_RFR, UART_FCR_FEN, + * UART_LCR_DLAB, UART_LCR_WLS, UART_LSR_TEMT, UART_LSR_THRE */ +#ifdef DEBUG_UART +void uart_init(void) +{ + /* baud rate = bus_clk / (16 * div); round up */ + uint32_t div = (hal_get_bus_clk() + (8 * BAUD_RATE)) / (16 * BAUD_RATE); + + while (!(get8(UART_LSR(UART_SEL)) & UART_LSR_TEMT)) + ; + + set8(UART_IER(UART_SEL), 0); + set8(UART_FCR(UART_SEL), (UART_FCR_TFR | UART_FCR_RFR | UART_FCR_FEN)); + + /* enable baud rate access (DLAB=1) */ + set8(UART_LCR(UART_SEL), (UART_LCR_DLAB | UART_LCR_WLS)); + set8(UART_DLB(UART_SEL), (div & 0xff)); + set8(UART_DMB(UART_SEL), ((div >> 8) & 0xff)); + /* disable baud rate access (DLAB=0) */ + set8(UART_LCR(UART_SEL), (UART_LCR_WLS)); +} + +void uart_write(const char* buf, uint32_t sz) +{ + uint32_t pos = 0; + while (sz-- > 0) { + char c = buf[pos++]; + if (c == '\n') { /* handle CRLF */ + while ((get8(UART_LSR(UART_SEL)) & UART_LSR_THRE) == 0); + set8(UART_THR(UART_SEL), '\r'); + } + while ((get8(UART_LSR(UART_SEL)) & UART_LSR_THRE) == 0); + set8(UART_THR(UART_SEL), c); + } +} +#endif /* DEBUG_UART */ diff --git a/hal/nxp_ppc.h b/hal/nxp_ppc.h index b72bad4042..beb33cf83c 100644 --- a/hal/nxp_ppc.h +++ b/hal/nxp_ppc.h @@ -101,7 +101,7 @@ #define USE_LONG_JUMP #elif defined(TARGET_nxp_t2080) - /* NXP T0280 */ + /* NXP T2080 */ #define CORE_E6500 #define CPU_NUMCORES 4 #define CORES_PER_CLUSTER 4 @@ -111,36 +111,72 @@ #define CCSRBAR_DEF (0xFE000000UL) /* T2080RM 4.3.1 default base */ #define CCSRBAR_SIZE BOOKE_PAGESZ_16M - /* relocate to 64-bit 0xE_ */ - //#define CCSRBAR_PHYS_HIGH 0xEULL - //#define CCSRBAR_PHYS (CCSRBAR_PHYS_HIGH + CCSRBAR_DEF) - #define ENABLE_L1_CACHE #define ENABLE_L2_CACHE - #define L2SRAM_ADDR (0xF8F80000UL) /* L2 as SRAM */ - #define L2SRAM_SIZE (256UL * 1024UL) + /* L1 locked dcache as initial stack (16KB). + * CPC SRAM (via CoreNet) is unreliable on cold power cycle — + * store buffer drains cause bus errors. L1 locked cache is + * core-local and works reliably from first instruction. + * Address chosen below CPC SRAM range, no backing memory needed. */ + #define L1_CACHE_ADDR (0xF8E00000UL) + + /* T2080 CPC SRAM config - 1MB for ECC P384 stack requirements. + * CPC hardware is configured in early ASM but NOT used for stack. + * CPC SRAM becomes usable after cache hierarchy is initialized in C. */ + #define L2SRAM_ADDR (0xF8F00000UL) /* CPC as SRAM (1MB) */ + #define L2SRAM_SIZE (1024UL * 1024UL) #define INITIAL_SRAM_ADDR L2SRAM_ADDR - #define INITIAL_SRAM_LAW_SZ LAW_SIZE_256KB + /* CPC SRAM transactions traverse the CoreNet interconnect, which + * requires a LAW to route them. LAW_TRGT_DDR_1 (0x10) is the CPC + * target per T2080RM Table 2-2 (Target ID Assignments). */ + #define INITIAL_SRAM_LAW_SZ LAW_SIZE_1MB #define INITIAL_SRAM_LAW_TRGT LAW_TRGT_DDR_1 - #define INITIAL_SRAM_BOOKE_SZ BOOKE_PAGESZ_256K + #define INITIAL_SRAM_BOOKE_SZ BOOKE_PAGESZ_1M #define ENABLE_INTERRUPTS #define ENABLE_DDR #ifndef DDR_SIZE - #define DDR_SIZE (8192UL * 1024UL * 1024UL) + #ifdef BOARD_CW_VPX3152 + #define DDR_SIZE (8192ULL * 1024ULL * 1024ULL) /* TODO: confirm from CS_BNDS dump (4/8/16 GB) */ + #else + #define DDR_SIZE (8192ULL * 1024ULL * 1024ULL) /* T2080 RDB / NAII 68PPC2: 8 GB */ + #endif #endif + /* DDR stack configuration - relocate from CPC SRAM after DDR init + * Stack is at top of first 32MB of DDR, with 64KB reserved for stack + * Stack grows downward from DDR_STACK_TOP */ + #define DDR_STACK_SIZE (64 * 1024) /* 64KB stack in DDR */ + #define DDR_STACK_TOP 0x02000000UL /* Top of first 32MB */ + #define DDR_STACK_BASE (DDR_STACK_TOP - DDR_STACK_SIZE) + + /* DDR address where .ramcode is copied before CPC SRAM is released. + * TLB9 is remapped: VA 0xF8F00000 -> PA DDR_RAMCODE_ADDR so that + * RAMFUNCTION code continues to work after CPC becomes L2 cache. */ + #define DDR_RAMCODE_ADDR 0x03000000UL /* 48MB into DDR */ + + /* Flash base address and size — may differ between board variants. + * TODO: Confirm VPX3-152 flash mapping from IFC CSPR(0)/AMASK(0) dump. + * If the new board uses a different base address (e.g. 0xF0000000 for + * 256 MB flash), update the BOARD_CW_VPX3152 values and uncomment. */ +#if 0 && defined(BOARD_CW_VPX3152) + #define FLASH_BASE_ADDR 0xF0000000UL /* TODO: from IFC dump */ + #define FLASH_BASE_PHYS_HIGH 0x0ULL + #define FLASH_LAW_SIZE LAW_SIZE_256MB + #define FLASH_TLB_PAGESZ BOOKE_PAGESZ_256M +#else #define FLASH_BASE_ADDR 0xE8000000UL #define FLASH_BASE_PHYS_HIGH 0x0ULL #define FLASH_LAW_SIZE LAW_SIZE_128MB #define FLASH_TLB_PAGESZ BOOKE_PAGESZ_128M +#endif #define USE_LONG_JUMP #else - #error Please define platform PowerPC core version and CCSRBAR + #error Please define TARGET (nxp_t2080, nxp_t1024, or nxp_p1021) #endif @@ -170,6 +206,7 @@ #define CCSRBAR_PHYS_HIGH 0 #endif + /* DDR */ #ifndef DDR_ADDRESS #define DDR_ADDRESS 0x00000000UL @@ -285,20 +322,24 @@ #define CPC_BASE (CCSRBAR + 0x10000) /* 8.2 CoreNet Platform Cache (CPC) Memory Map */ #define CPCCSR0 (0x000) + #define CPCEWCR0 (0x010) #define CPCSRCR1 (0x100) #define CPCSRCR0 (0x104) + #define CPCERRDIS (0xE44) #define CPCHDBCR0 (0xF00) #define CPCCSR0_CPCE (0x80000000 >> 0) #define CPCCSR0_CPCPE (0x80000000 >> 1) #define CPCCSR0_CPCFI (0x80000000 >> 10) + #define CPCCSR0_CPCFL (0x80000000 >> 20) #define CPCCSR0_CPCLFC (0x80000000 >> 21) - #define CPCCSR0_SRAM_ENABLE (CPCCSR0_CPCE | CPCCSR0_CPCPE) #ifdef CORE_E6500 - #define CPCSRCR0_SRAMSZ_64 (0x1 << 1) /* ways 14-15 */ - #define CPCSRCR0_SRAMSZ_256 (0x3 << 1) /* ways 8-15 */ - #define CPCSRCR0_SRAMSZ_512 (0x4 << 1) /* ways 0-15 */ + /* T2080: 2MB CPC, 16 ways, 128KB per way */ + #define CPCSRCR0_SRAMSZ_256 (0x1 << 1) /* ways 14-15, 256KB */ + #define CPCSRCR0_SRAMSZ_512 (0x2 << 1) /* ways 12-15, 512KB */ + #define CPCSRCR0_SRAMSZ_1024 (0x3 << 1) /* ways 8-15, 1MB */ + #define CPCSRCR0_SRAMSZ_2048 (0x4 << 1) /* ways 0-15, 2MB */ #else /* CORE E5500 */ #define CPCSRCR0_SRAMSZ_64 (0x1 << 1) /* ways 6-7 */ #define CPCSRCR0_SRAMSZ_128 (0x2 << 1) /* ways 4-7 */ @@ -465,31 +506,37 @@ #define L2CSR1 0x3FA /* L2 Data Cache Control and Status Register 1 */ #endif -#define L2CSR0_L2FI 0x00200000 /* L2 Cache Flash Invalidate */ -#define L2CSR0_L2FL 0x00000800 /* L2 Cache Flush */ -#define L2CSR0_L2LFC 0x00000400 /* L2 Cache Lock Flash Clear */ -#define L2CSR0_L2PE 0x40000000 /* L2 Cache Parity/ECC Enable */ #define L2CSR0_L2E 0x80000000 /* L2 Cache Enable */ - +#define L2CSR0_L2PE 0x40000000 /* L2 Cache Parity/ECC Enable */ #define L2CSR0_L2WP 0x1c000000 /* L2 I/D Way Partioning */ #define L2CSR0_L2CM 0x03000000 /* L2 Cache Coherency Mode */ #define L2CSR0_L2FI 0x00200000 /* L2 Cache Flash Invalidate */ #define L2CSR0_L2IO 0x00100000 /* L2 Cache Instruction Only */ #define L2CSR0_L2DO 0x00010000 /* L2 Cache Data Only */ #define L2CSR0_L2REP 0x00003000 /* L2 Line Replacement Algo */ +#define L2CSR0_L2FL 0x00000800 /* L2 Cache Flush */ +#define L2CSR0_L2LFC 0x00000400 /* L2 Cache Lock Flash Clear */ #define SCCSRBAR 0x3FE /* Shifted CCSRBAR */ #define SPRN_DBSR 0x130 /* Debug Status Register */ #define SPRN_DEC 0x016 /* Decrement Register */ -#define SPRN_TSR 0x3D8 /* Timer Status Register */ -#define SPRN_TCR 0x3DA /* Timer Control Register */ +#ifdef CORE_E6500 + #define SPRN_TSR 0x150 /* Timer Status Register (SPR 336) */ + #define SPRN_TCR 0x154 /* Timer Control Register (SPR 340) */ + #define SPRN_DEAR 0x03D /* Data Exception Address Register (SPR 61) */ + #define SPRN_ESR 0x03E /* Exception Syndrome Register (SPR 62) */ +#else + #define SPRN_TSR 0x3D8 /* Timer Status Register */ + #define SPRN_TCR 0x3DA /* Timer Control Register */ + #define SPRN_DEAR 0x3D5 /* Data Exception Address Register */ + #define SPRN_ESR 0x3D4 /* Exception Syndrome Register */ +#endif + #define TCR_WIE 0x08000000 /* Watchdog Interrupt Enable */ #define TCR_DIE 0x04000000 /* Decrement Interrupt Enable */ - -#define SPRN_ESR 0x3D4 /* Exception Syndrome Register */ #define SPRN_MCSR 0x23C /* Machine Check Syndrome Register */ #define SPRN_PVR 0x11F /* Processor Version */ #define SPRN_SVR 0x3FF /* System Version */ @@ -524,9 +571,12 @@ #define SRR0 0x01A /* Save/Restore Register 0 */ #define SRR1 0x01B /* Save/Restore Register 1 */ +#define SPRN_MCSRR0 0x23A /* Machine Check Save/Restore Register 0 */ +#define SPRN_MCSRR1 0x23B /* Machine Check Save/Restore Register 1 */ #define MSR_DS (1<<4) /* Book E Data address space */ #define MSR_IS (1<<5) /* Book E Instruction address space */ +#define MSR_RI (1<<1) /* Recoverable Interrupt */ #define MSR_DE (1<<9) /* Debug Exception Enable */ #define MSR_ME (1<<12) /* Machine check enable */ #define MSR_EE (1<<15) /* External Interrupt enable */ @@ -575,6 +625,10 @@ #endif #define mtspr(rn, v) __asm__ __volatile__("mtspr " WC_STRINGIFY(rn) ",%0" : : "r" (v)) +#define mfspr(rn) ({ \ + unsigned int rval; \ + __asm__ __volatile__("mfspr %0," WC_STRINGIFY(rn) : "=r" (rval)); rval; \ +}) #define mfmsr() ({ \ unsigned int rval; \ @@ -649,10 +703,19 @@ static inline void set32(volatile unsigned int *addr, unsigned int val) ); } +/* longcall attribute for functions in .ramcode — callers in .text need + * indirect calls since .ramcode VMA is ~143MB from .text VMA, + * exceeding PPC bl +/-32MB range */ +#if defined(__WOLFBOOT) && defined(RAM_CODE) && defined(ARCH_PPC) +#define LONGCALL_ATTR __attribute__((longcall)) +#else +#define LONGCALL_ATTR +#endif + /* C version in boot_ppc.c */ -extern void set_tlb(uint8_t tlb, uint8_t esel, uint32_t epn, uint32_t rpn, - uint32_t urpn, uint8_t perms, uint8_t wimge, uint8_t ts, uint8_t tsize, - uint8_t iprot); +extern void LONGCALL_ATTR set_tlb(uint8_t tlb, uint8_t esel, uint32_t epn, + uint32_t rpn, uint32_t urpn, uint8_t perms, uint8_t wimge, uint8_t ts, + uint8_t tsize, uint8_t iprot); extern void disable_tlb1(uint8_t esel); extern void flush_cache(uint32_t start_addr, uint32_t size); extern void set_law(uint8_t idx, uint32_t addr_h, uint32_t addr_l, @@ -662,18 +725,49 @@ extern void set_law(uint8_t idx, uint32_t addr_h, uint32_t addr_l, extern void uart_init(void); /* from boot_ppc_start.S */ -extern unsigned long long get_ticks(void); -extern void wait_ticks(unsigned long long); +extern unsigned long long LONGCALL_ATTR get_ticks(void); +extern void LONGCALL_ATTR wait_ticks(unsigned long long); extern unsigned long get_pc(void); extern void relocate_code(uint32_t *dest, uint32_t *src, uint32_t length); -extern void invalidate_dcache(void); -extern void invalidate_icache(void); +extern void LONGCALL_ATTR invalidate_dcache(void); +extern void LONGCALL_ATTR invalidate_icache(void); extern void icache_enable(void); extern void dcache_enable(void); extern void dcache_disable(void); #else /* Assembly version */ +#ifdef CORE_E6500 +/* e6500 has 64-bit MAS registers. On 64-bit PPC, lis sign-extends to 64 bits. + * Any MAS value with bit 31 set (MAS1=0xC..., MAS2/MAS3 high addresses) gets + * upper 32 bits = 0xFFFFFFFF. Hardware may require reserved upper bits = 0. + * Use "li 0; oris; ori" pattern for MAS1, MAS2, MAS3 to avoid sign-extension. */ +#define set_tlb(tlb, esel, epn, rpn, urpn, perms, winge, ts, tsize, iprot, reg) \ + lis reg, BOOKE_MAS0(tlb, esel, 0)@h; \ + ori reg, reg, BOOKE_MAS0(tlb, esel, 0)@l; \ + mtspr MAS0, reg;\ + li reg, 0; \ + oris reg, reg, BOOKE_MAS1(1, iprot, 0, ts, tsize)@h; \ + ori reg, reg, BOOKE_MAS1(1, iprot, 0, ts, tsize)@l; \ + mtspr MAS1, reg; \ + li reg, 0; \ + oris reg, reg, BOOKE_MAS2(epn, winge)@h; \ + ori reg, reg, BOOKE_MAS2(epn, winge)@l; \ + mtspr MAS2, reg; \ + li reg, 0; \ + oris reg, reg, BOOKE_MAS3(rpn, 0, perms)@h; \ + ori reg, reg, BOOKE_MAS3(rpn, 0, perms)@l; \ + mtspr MAS3, reg; \ + li reg, 0; \ + oris reg, reg, urpn@h; \ + ori reg, reg, urpn@l; \ + mtspr MAS7, reg; \ + isync; \ + msync; \ + tlbwe; \ + isync; +#else +/* e500/e5500 - 32-bit MAS registers */ #define set_tlb(tlb, esel, epn, rpn, urpn, perms, winge, ts, tsize, iprot, reg) \ lis reg, BOOKE_MAS0(tlb, esel, 0)@h; \ ori reg, reg, BOOKE_MAS0(tlb, esel, 0)@l; \ @@ -694,6 +788,7 @@ extern void dcache_disable(void); msync; \ tlbwe; \ isync; +#endif /* CORE_E6500 */ /* readability helpers for assembly to show register versus decimal */ #define r0 0 @@ -744,11 +839,6 @@ extern void dcache_disable(void); #define ENTRY_RESV 16 #define ENTRY_PIR 20 -/* not used for ePAPR 1.1 */ -#define ENTRY_R6_UPPER 24 -#define ENTRY_R6_LOWER 28 - - #define ENTRY_SIZE 64 #endif /* !_NXP_PPC_H_ */ diff --git a/hal/nxp_t1024.c b/hal/nxp_t1024.c index ae859d87b5..8132dffaf5 100644 --- a/hal/nxp_t1024.c +++ b/hal/nxp_t1024.c @@ -40,12 +40,6 @@ #define ENABLE_BUS_CLK_CALC #ifndef BUILD_LOADER_STAGE1 - /* Tests */ - #if 0 - #define TEST_DDR - #define TEST_TPM - #endif - #define ENABLE_PCIE #define ENABLE_CPLD #define ENABLE_QE /* QUICC Engine */ @@ -53,7 +47,7 @@ #define ENABLE_PHY #define ENABLE_MRAM - #if defined(WOLFBOOT_TPM) || defined(TEST_TPM) + #if defined(WOLFBOOT_TPM) #define ENABLE_ESPI /* SPI for TPM */ #endif #define ENABLE_MP /* multi-core support */ @@ -67,14 +61,7 @@ #define USE_ERRATA_DDRA009663 #define USE_ERRATA_DDRA009942 -/* Foward declarations */ -#if defined(ENABLE_DDR) && defined(TEST_DDR) -static int test_ddr(void); -#endif -#if defined(ENABLE_ESPI) && defined(TEST_TPM) -static int test_tpm(void); -#endif - +/* Forward declarations */ static void hal_flash_unlock_sector(uint32_t sector); #ifdef ENABLE_ESPI @@ -1011,47 +998,7 @@ enum ifc_amask_sizes { /* generic share NXP QorIQ driver code */ #include "nxp_ppc.c" - -#ifdef ENABLE_BUS_CLK_CALC -static uint32_t hal_get_core_clk(void) -{ - /* compute core clock (system input * ratio) */ - uint32_t core_clk; - uint32_t core_ratio = get32(CLOCKING_PLLCNGSR(0)); /* see CGA_PLL1_RAT in RCW */ - /* shift by 1 and mask */ - core_ratio = ((core_ratio >> 1) & 0x3F); - core_clk = SYS_CLK * core_ratio; - return core_clk; -} -static uint32_t hal_get_plat_clk(void) -{ - /* compute core clock (system input * ratio) */ - uint32_t plat_clk; - uint32_t plat_ratio = get32(CLOCKING_PLLPGSR); /* see SYS_PLL_RAT in RCW */ - /* shift by 1 and mask */ - plat_ratio = ((plat_ratio >> 1) & 0x1F); - plat_clk = SYS_CLK * plat_ratio; - return plat_clk; -} -static uint32_t hal_get_bus_clk(void) -{ - /* compute bus clock (platform clock / 2) */ - uint32_t bus_clk = hal_get_plat_clk() / 2; - return bus_clk; -} -#else -#define hal_get_core_clk() (uint32_t)(SYS_CLK * 14) -#define hal_get_plat_clk() (uint32_t)(SYS_CLK * 4) -#define hal_get_bus_clk() (uint32_t)(hal_get_plat_clk() / 2) -#endif - -#define TIMEBASE_CLK_DIV 16 -#define TIMEBASE_HZ (hal_get_plat_clk() / TIMEBASE_CLK_DIV) -#define DELAY_US (TIMEBASE_HZ / 1000000) -static void udelay(uint32_t delay_us) -{ - wait_ticks(delay_us * DELAY_US); -} +/* clock helpers and udelay are provided by nxp_ppc.c (CORE_E5500 path) */ static void law_init(void) { @@ -1200,46 +1147,7 @@ void hal_espi_deinit(void) } #endif /* ENABLE_ESPI */ -/* ---- DUART Driver ---- */ -#ifdef DEBUG_UART -void uart_init(void) -{ - /* calc divisor for UART - * baud rate = CCSRBAR frequency ÷ (16 x [UDMB||UDLB]) - */ - /* compute UART divisor - round up */ - uint32_t div = (hal_get_bus_clk() + (16/2 * BAUD_RATE)) / (16 * BAUD_RATE); - - while (!(get8(UART_LSR(UART_SEL)) & UART_LSR_TEMT)) - ; - - /* set ier, fcr, mcr */ - set8(UART_IER(UART_SEL), 0); - set8(UART_FCR(UART_SEL), (UART_FCR_TFR | UART_FCR_RFR | UART_FCR_FEN)); - - /* enable baud rate access (DLAB=1) - divisor latch access bit*/ - set8(UART_LCR(UART_SEL), (UART_LCR_DLAB | UART_LCR_WLS)); - /* set divisor */ - set8(UART_DLB(UART_SEL), (div & 0xff)); - set8(UART_DMB(UART_SEL), ((div>>8) & 0xff)); - /* disable rate access (DLAB=0) */ - set8(UART_LCR(UART_SEL), (UART_LCR_WLS)); -} - -void uart_write(const char* buf, uint32_t sz) -{ - uint32_t pos = 0; - while (sz-- > 0) { - char c = buf[pos++]; - if (c == '\n') { /* handle CRLF */ - while ((get8(UART_LSR(UART_SEL)) & UART_LSR_THRE) == 0); - set8(UART_THR(UART_SEL), '\r'); - } - while ((get8(UART_LSR(UART_SEL)) & UART_LSR_THRE) == 0); - set8(UART_THR(UART_SEL), c); - } -} -#endif /* DEBUG_UART */ +/* uart_init and uart_write are provided by nxp_ppc.c shared code */ /* ---- IFC Driver ---- */ #if defined(ENABLE_IFC) && !defined(BUILD_LOADER_STAGE1) @@ -1682,15 +1590,6 @@ static int hal_pcie_init(void) set32(PCIE_IWBEAR(pcie_bus, 3), 0x0); set32( PCIE_IWAR(pcie_bus, 3), (PIWAR_PF | PIWAR_TRGT_LOCAL | PIWAR_READ | PIWAR_WRITE | LAW_SIZE_1TB)); - - #define PCI_LTSSM 0x404 /* PCIe Link Training, Status State Machine */ - #define PCI_LTSSM_L0 0x16 /* L0 state */ - - /* TODO: Check if link is active. Read config PCI_LTSSM */ - #if 0 - link = pci_config_read16(0, 0, 0, PCI_LTSSM); - enabled = (link >= PCI_LTSSM_L0); - #endif } /* Only enumerate PCIe 3 */ diff --git a/hal/nxp_t2080.c b/hal/nxp_t2080.c index 6cc3159791..45b49b1845 100644 --- a/hal/nxp_t2080.c +++ b/hal/nxp_t2080.c @@ -23,320 +23,65 @@ #include "printf.h" #include "image.h" /* for RAMFUNCTION */ #include "nxp_ppc.h" +#include "nxp_t2080.h" -/* Tested on T2080E Rev 1.1, e6500 core 2.0, PVR 8040_0120 and SVR 8538_0011 */ - -/* T2080 */ -#define SYS_CLK (600000000) /* 100MHz PLL with 6:1 = 600 MHz */ - -/* T2080 PC16552D Dual UART */ -#define BAUD_RATE 115200 -#define UART_SEL 0 /* select UART 0 or 1 */ - -#define UART_BASE(n) (CCSRBAR + 0x11C500 + (n * 0x1000)) - -#define UART_RBR(n) *((volatile uint8_t*)(UART_BASE(n) + 0)) /* receiver buffer register */ -#define UART_THR(n) *((volatile uint8_t*)(UART_BASE(n) + 0)) /* transmitter holding register */ -#define UART_IER(n) *((volatile uint8_t*)(UART_BASE(n) + 1)) /* interrupt enable register */ -#define UART_IIR(n) *((volatile uint8_t*)(UART_BASE(n) + 2)) /* interrupt ID register */ -#define UART_FCR(n) *((volatile uint8_t*)(UART_BASE(n) + 2)) /* FIFO control register */ -#define UART_LCR(n) *((volatile uint8_t*)(UART_BASE(n) + 3)) /* line control register */ -#define UART_MCR(n) *((volatile uint8_t*)(UART_BASE(n) + 4)) /* modem control register */ -#define UART_LSR(n) *((volatile uint8_t*)(UART_BASE(n) + 5)) /* line status register */ - -/* enabled when UART_LCR_DLAB set */ -#define UART_DLB(n) *((volatile uint8_t*)(UART_BASE(n) + 0)) /* divisor least significant byte register */ -#define UART_DMB(n) *((volatile uint8_t*)(UART_BASE(n) + 1)) /* divisor most significant byte register */ - -#define UART_FCR_TFR (0x04) /* Transmitter FIFO reset */ -#define UART_FCR_RFR (0x02) /* Receiver FIFO reset */ -#define UART_FCR_FEN (0x01) /* FIFO enable */ -#define UART_LCR_DLAB (0x80) /* Divisor latch access bit */ -#define UART_LCR_WLS (0x03) /* Word length select: 8-bits */ -#define UART_LSR_TEMT (0x40) /* Transmitter empty */ -#define UART_LSR_THRE (0x20) /* Transmitter holding register empty */ - - -/* T2080 IFC (Integrated Flash Controller) - RM 13.3 */ -#define IFC_BASE (CCSRBAR + 0x00124000) -#define IFC_MAX_BANKS 8 - -#define IFC_CSPR_EXT(n) *((volatile uint32_t*)(IFC_BASE + 0x000C + (n * 0xC))) /* Extended Base Address */ -#define IFC_CSPR(n) *((volatile uint32_t*)(IFC_BASE + 0x0010 + (n * 0xC))) /* Chip-select Property */ -#define IFC_AMASK(n) *((volatile uint32_t*)(IFC_BASE + 0x00A0 + (n * 0xC))) -#define IFC_CSOR(n) *((volatile uint32_t*)(IFC_BASE + 0x0130 + (n * 0xC))) -#define IFC_CSOR_EXT(n) *((volatile uint32_t*)(IFC_BASE + 0x0134 + (n * 0xC))) -#define IFC_FTIM0(n) *((volatile uint32_t*)(IFC_BASE + 0x01C0 + (n * 0x30))) -#define IFC_FTIM1(n) *((volatile uint32_t*)(IFC_BASE + 0x01C4 + (n * 0x30))) -#define IFC_FTIM2(n) *((volatile uint32_t*)(IFC_BASE + 0x01C8 + (n * 0x30))) -#define IFC_FTIM3(n) *((volatile uint32_t*)(IFC_BASE + 0x01CC + (n * 0x30))) - -#define IFC_CSPR_PHYS_ADDR(x) (((uint32_t)x) & 0xFFFF0000) /* Physical base address */ -#define IFC_CSPR_PORT_SIZE_8 0x00000080 /* Port Size 8 */ -#define IFC_CSPR_PORT_SIZE_16 0x00000100 /* Port Size 16 */ -#define IFC_CSPR_WP 0x00000040 /* Write Protect */ -#define IFC_CSPR_MSEL_NOR 0x00000000 /* Mode Select - NOR */ -#define IFC_CSPR_MSEL_NAND 0x00000002 /* Mode Select - NAND */ -#define IFC_CSPR_MSEL_GPCM 0x00000004 /* Mode Select - GPCM (General-purpose chip-select machine) */ -#define IFC_CSPR_V 0x00000001 /* Bank Valid */ - -/* NOR Timings (IFC clocks) */ -#define IFC_FTIM0_NOR_TACSE(n) (((n) & 0x0F) << 28) /* After address hold cycle */ -#define IFC_FTIM0_NOR_TEADC(n) (((n) & 0x3F) << 16) /* External latch address delay cycles */ -#define IFC_FTIM0_NOR_TAVDS(n) (((n) & 0x3F) << 8) /* Delay between CS assertion */ -#define IFC_FTIM0_NOR_TEAHC(n) (((n) & 0x3F) << 0) /* External latch address hold cycles */ -#define IFC_FTIM1_NOR_TACO(n) (((n) & 0xFF) << 24) /* CS assertion to output enable */ -#define IFC_FTIM1_NOR_TRAD(n) (((n) & 0x3F) << 8) /* read access delay */ -#define IFC_FTIM1_NOR_TSEQ(n) (((n) & 0x3F) << 0) /* sequential read access delay */ -#define IFC_FTIM2_NOR_TCS(n) (((n) & 0x0F) << 24) /* Chip-select assertion setup time */ -#define IFC_FTIM2_NOR_TCH(n) (((n) & 0x0F) << 18) /* Chip-select hold time */ -#define IFC_FTIM2_NOR_TWPH(n) (((n) & 0x3F) << 10) /* Chip-select hold time */ -#define IFC_FTIM2_NOR_TWP(n) (((n) & 0xFF) << 0) /* Write enable pulse width */ - -/* GPCM Timings (IFC clocks) */ -#define IFC_FTIM0_GPCM_TACSE(n) (((n) & 0x0F) << 28) /* After address hold cycle */ -#define IFC_FTIM0_GPCM_TEADC(n) (((n) & 0x3F) << 16) /* External latch address delay cycles */ -#define IFC_FTIM0_GPCM_TEAHC(n) (((n) & 0x3F) << 0) /* External latch address hold cycles */ -#define IFC_FTIM1_GPCM_TACO(n) (((n) & 0xFF) << 24) /* CS assertion to output enable */ -#define IFC_FTIM1_GPCM_TRAD(n) (((n) & 0x3F) << 8) /* read access delay */ -#define IFC_FTIM2_GPCM_TCS(n) (((n) & 0x0F) << 24) /* Chip-select assertion setup time */ -#define IFC_FTIM2_GPCM_TCH(n) (((n) & 0x0F) << 18) /* Chip-select hold time */ -#define IFC_FTIM2_GPCM_TWP(n) (((n) & 0xFF) << 0) /* Write enable pulse width */ - -/* IFC AMASK - RM Table 13-3 - Count of MSB minus 1 */ -enum ifc_amask_sizes { - IFC_AMASK_64KB = 0xFFFF0000, - IFC_AMASK_128KB = 0xFFFE0000, - IFC_AMASK_256KB = 0xFFFC0000, - IFC_AMASK_512KB = 0xFFF80000, - IFC_AMASK_1MB = 0xFFF00000, - IFC_AMASK_2MB = 0xFFE00000, - IFC_AMASK_4MB = 0xFFC00000, - IFC_AMASK_8MB = 0xFF800000, - IFC_AMASK_16MB = 0xFF000000, - IFC_AMASK_32MB = 0xFE000000, - IFC_AMASK_64MB = 0xFC000000, - IFC_AMASK_128MB = 0xF8000000, - IFC_AMASK_256MB = 0xF0000000, - IFC_AMASK_512MB = 0xE0000000, - IFC_AMASK_1GB = 0xC0000000, - IFC_AMASK_2GB = 0x80000000, - IFC_AMASK_4GB = 0x00000000, -}; - - -/* NOR Flash */ -#define FLASH_BASE 0xE8000000 - -#define FLASH_BANK_SIZE (128*1024*1024) -#define FLASH_PAGE_SIZE (1024) /* program buffer */ -#define FLASH_SECTOR_SIZE (128*1024) -#define FLASH_SECTORS (FLASH_BANK_SIZE / FLASH_SECTOR_SIZE) -#define FLASH_CFI_16BIT 0x02 /* word */ -#define FLASH_CFI_WIDTH FLASH_CFI_16BIT - -#define FLASH_ERASE_TOUT 60000 /* Flash Erase Timeout (ms) */ -#define FLASH_WRITE_TOUT 500 /* Flash Write Timeout (ms) */ - - -#if 0 - #define ENABLE_CPLD +#define ENABLE_IFC +#define ENABLE_BUS_CLK_CALC +/* #define DEBUG_FLASH */ + +#ifndef BUILD_LOADER_STAGE1 + #define ENABLE_MP /* multi-core support */ #endif -/* CPLD */ -#define CPLD_BASE 0xFFDF0000 -#define CPLD_BASE_PHYS_HIGH 0xFULL - -#define CPLD_SPARE 0x00 -#define CPLD_SATA_MUX_SEL 0x02 -#define CPLD_BANK_SEL 0x04 -#define CPLD_FW_REV 0x06 -#define CPLD_TTL_RW 0x08 -#define CPLD_TTL_LPBK 0x0A -#define CPLD_TTL_DATA 0x0C -#define CPLD_PROC_STATUS 0x0E /* write 1 to enable proc reset function, reset default value is 0 */ -#define CPLD_FPGA_RDY 0x10 /* read only when reg read 0x0DB1 then fpga is ready */ -#define CPLD_PCIE_SW_RESET 0x12 /* write 1 to reset the PCIe switch */ -#define CPLD_WR_TTL_INT_EN 0x14 -#define CPLD_WR_TTL_INT_DIR 0x16 -#define CPLD_INT_STAT 0x18 -#define CPLD_WR_TEMP_ALM_OVRD 0x1A /* write 0 to enable temp shutdown. reset default value is 1 */ -#define CPLD_PWR_DWN_CMD 0x1C -#define CPLD_TEMP_ALM_INT_STAT 0x1E -#define CPLD_WR_TEMP_ALM_INT_EN 0x20 - -#define CPLD_FLASH_BANK_0 0x00 -#define CPLD_FLASH_BANK_1 0x01 - -#define CPLD_DATA(n) *((volatile uint8_t*)(CPLD_BASE + n)) - - -/* SATA */ -#define SATA_ENBL (*(volatile uint32_t *)(0xB1003F4C)) /* also saw 0xB4003F4C */ - -/* DDR */ -/* NAII 68PPC2 - 8GB discrete DDR3 IM8G08D3EBDG-15E */ -/* 1333.333 MT/s data rate 8 GiB (DDR3, 64-bit, CL=9, ECC on) */ -#define DDR_N_RANKS 2 -#define DDR_RANK_DENS 0x100000000 -#define DDR_SDRAM_WIDTH 64 -#define DDR_EC_SDRAM_W 8 -#define DDR_N_ROW_ADDR 16 -#define DDR_N_COL_ADDR 10 -#define DDR_N_BANKS 8 -#define DDR_EDC_CONFIG 2 -#define DDR_BURSTL_MASK 0x0c -#define DDR_TCKMIN_X_PS 1500 -#define DDR_TCMMAX_PS 3000 -#define DDR_CASLAT_X 0x000007E0 -#define DDR_TAA_PS 13500 -#define DDR_TRCD_PS 13500 -#define DDR_TRP_PS 13500 -#define DDR_TRAS_PS 36000 -#define DDR_TRC_PS 49500 -#define DDR_TFAW_PS 30000 -#define DDR_TWR_PS 15000 -#define DDR_TRFC_PS 260000 -#define DDR_TRRD_PS 6000 -#define DDR_TWTR_PS 7500 -#define DDR_TRTP_PS 7500 -#define DDR_REF_RATE_PS 7800000 - -#define DDR_CS0_BNDS_VAL 0x000000FF -#define DDR_CS1_BNDS_VAL 0x010001FF -#define DDR_CS2_BNDS_VAL 0x0300033F -#define DDR_CS3_BNDS_VAL 0x0340037F -#define DDR_CS0_CONFIG_VAL 0x80044402 -#define DDR_CS1_CONFIG_VAL 0x80044402 -#define DDR_CS2_CONFIG_VAL 0x00000202 -#define DDR_CS3_CONFIG_VAL 0x00040202 -#define DDR_CS_CONFIG_2_VAL 0x00000000 - -#define DDR_TIMING_CFG_0_VAL 0xFF530004 -#define DDR_TIMING_CFG_1_VAL 0x98906345 -#define DDR_TIMING_CFG_2_VAL 0x0040A114 -#define DDR_TIMING_CFG_3_VAL 0x010A1100 -#define DDR_TIMING_CFG_4_VAL 0x00000001 -#define DDR_TIMING_CFG_5_VAL 0x04402400 - -#define DDR_SDRAM_MODE_VAL 0x00441C70 -#define DDR_SDRAM_MODE_2_VAL 0x00980000 -#define DDR_SDRAM_MODE_3_8_VAL 0x00000000 -#define DDR_SDRAM_MD_CNTL_VAL 0x00000000 - -#define DDR_SDRAM_CFG_VAL 0xE7040000 -#define DDR_SDRAM_CFG_2_VAL 0x00401010 - -#define DDR_SDRAM_INTERVAL_VAL 0x0C300100 -#define DDR_DATA_INIT_VAL 0xDEADBEEF -#define DDR_SDRAM_CLK_CNTL_VAL 0x02400000 -#define DDR_ZQ_CNTL_VAL 0x89080600 - -#define DDR_WRLVL_CNTL_VAL 0x8675F604 -#define DDR_WRLVL_CNTL_2_VAL 0x05060607 -#define DDR_WRLVL_CNTL_3_VAL 0x080A0A0B - -#define DDR_SDRAM_RCW_1_VAL 0x00000000 -#define DDR_SDRAM_RCW_2_VAL 0x00000000 - -#define DDR_DDRCDR_1_VAL 0x80040000 -#define DDR_DDRCDR_2_VAL 0x00000001 - -#define DDR_ERR_INT_EN_VAL 0x0000001D -#define DDR_ERR_SBE_VAL 0x00010000 - - -/* 12.4 DDR Memory Map */ -#define DDR_BASE (CCSRBAR + 0x8000) - -#define DDR_CS_BNDS(n) *((volatile uint32_t*)(DDR_BASE + 0x000 + (n * 8))) /* Chip select n memory bounds */ -#define DDR_CS_CONFIG(n) *((volatile uint32_t*)(DDR_BASE + 0x080 + (n * 4))) /* Chip select n configuration */ -#define DDR_CS_CONFIG_2(n) *((volatile uint32_t*)(DDR_BASE + 0x0C0 + (n * 4))) /* Chip select n configuration 2 */ -#define DDR_SDRAM_CFG *((volatile uint32_t*)(DDR_BASE + 0x110)) /* DDR SDRAM control configuration */ -#define DDR_SDRAM_CFG_2 *((volatile uint32_t*)(DDR_BASE + 0x114)) /* DDR SDRAM control configuration 2 */ -#define DDR_SDRAM_INTERVAL *((volatile uint32_t*)(DDR_BASE + 0x124)) /* DDR SDRAM interval configuration */ -#define DDR_INIT_ADDR *((volatile uint32_t*)(DDR_BASE + 0x148)) /* DDR training initialization address */ -#define DDR_INIT_EXT_ADDR *((volatile uint32_t*)(DDR_BASE + 0x14C)) /* DDR training initialization extended address */ -#define DDR_DATA_INIT *((volatile uint32_t*)(DDR_BASE + 0x128)) /* DDR training initialization value */ -#define DDR_TIMING_CFG_0 *((volatile uint32_t*)(DDR_BASE + 0x104)) /* DDR SDRAM timing configuration 0 */ -#define DDR_TIMING_CFG_1 *((volatile uint32_t*)(DDR_BASE + 0x108)) /* DDR SDRAM timing configuration 1 */ -#define DDR_TIMING_CFG_2 *((volatile uint32_t*)(DDR_BASE + 0x10C)) /* DDR SDRAM timing configuration 2 */ -#define DDR_TIMING_CFG_3 *((volatile uint32_t*)(DDR_BASE + 0x100)) /* DDR SDRAM timing configuration 3 */ -#define DDR_TIMING_CFG_4 *((volatile uint32_t*)(DDR_BASE + 0x160)) /* DDR SDRAM timing configuration 4 */ -#define DDR_TIMING_CFG_5 *((volatile uint32_t*)(DDR_BASE + 0x164)) /* DDR SDRAM timing configuration 5 */ -#define DDR_TIMING_CFG_6 *((volatile uint32_t*)(DDR_BASE + 0x168)) /* DDR SDRAM timing configuration 6 */ -#define DDR_ZQ_CNTL *((volatile uint32_t*)(DDR_BASE + 0x170)) /* DDR ZQ calibration control */ -#define DDR_WRLVL_CNTL *((volatile uint32_t*)(DDR_BASE + 0x174)) /* DDR write leveling control */ -#define DDR_WRLVL_CNTL_2 *((volatile uint32_t*)(DDR_BASE + 0x190)) /* DDR write leveling control 2 */ -#define DDR_WRLVL_CNTL_3 *((volatile uint32_t*)(DDR_BASE + 0x194)) /* DDR write leveling control 3 */ -#define DDR_SR_CNTR *((volatile uint32_t*)(DDR_BASE + 0x17C)) /* DDR Self Refresh Counter */ -#define DDR_SDRAM_RCW_1 *((volatile uint32_t*)(DDR_BASE + 0x180)) /* DDR Register Control Word 1 */ -#define DDR_SDRAM_RCW_2 *((volatile uint32_t*)(DDR_BASE + 0x184)) /* DDR Register Control Word 2 */ -#define DDR_DDRCDR_1 *((volatile uint32_t*)(DDR_BASE + 0xB28)) /* DDR Control Driver Register 1 */ -#define DDR_DDRCDR_2 *((volatile uint32_t*)(DDR_BASE + 0xB2C)) /* DDR Control Driver Register 2 */ -#define DDR_DDRDSR_1 *((volatile uint32_t*)(DDR_BASE + 0xB20)) /* DDR Debug Status Register 1 */ -#define DDR_DDRDSR_2 *((volatile uint32_t*)(DDR_BASE + 0xB24)) /* DDR Debug Status Register 2 */ -#define DDR_ERR_DISABLE *((volatile uint32_t*)(DDR_BASE + 0xE44)) /* Memory error disable */ -#define DDR_ERR_INT_EN *((volatile uint32_t*)(DDR_BASE + 0xE48)) /* Memory error interrupt enable */ -#define DDR_ERR_SBE *((volatile uint32_t*)(DDR_BASE + 0xE58)) /* Single-Bit ECC memory error management */ -#define DDR_SDRAM_MODE *((volatile uint32_t*)(DDR_BASE + 0x118)) /* DDR SDRAM mode configuration */ -#define DDR_SDRAM_MODE_2 *((volatile uint32_t*)(DDR_BASE + 0x11C)) /* DDR SDRAM mode configuration 2 */ -#define DDR_SDRAM_MODE_3 *((volatile uint32_t*)(DDR_BASE + 0x200)) /* DDR SDRAM mode configuration 3 */ -#define DDR_SDRAM_MODE_4 *((volatile uint32_t*)(DDR_BASE + 0x204)) /* DDR SDRAM mode configuration 4 */ -#define DDR_SDRAM_MODE_5 *((volatile uint32_t*)(DDR_BASE + 0x208)) /* DDR SDRAM mode configuration 5 */ -#define DDR_SDRAM_MODE_6 *((volatile uint32_t*)(DDR_BASE + 0x20C)) /* DDR SDRAM mode configuration 6 */ -#define DDR_SDRAM_MODE_7 *((volatile uint32_t*)(DDR_BASE + 0x210)) /* DDR SDRAM mode configuration 7 */ -#define DDR_SDRAM_MODE_8 *((volatile uint32_t*)(DDR_BASE + 0x214)) /* DDR SDRAM mode configuration 8 */ -#define DDR_SDRAM_MD_CNTL *((volatile uint32_t*)(DDR_BASE + 0x120)) /* DDR SDRAM mode control */ -#define DDR_SDRAM_CLK_CNTL *((volatile uint32_t*)(DDR_BASE + 0x130)) /* DDR SDRAM clock control */ - -#define DDR_SDRAM_CFG_MEM_EN 0x80000000 /* SDRAM interface logic is enabled */ -#define DDR_SDRAM_CFG_2_D_INIT 0x00000010 /* data initialization in progress */ - - -/* generic share NXP QorIQ driver code */ + +/* generic shared NXP QorIQ driver code */ #include "nxp_ppc.c" +/* Forward declarations */ +static void RAMFUNCTION hal_flash_unlock_sector(uint32_t sector); +#ifdef ENABLE_MP +static void hal_mp_init(void); +#endif -#ifdef DEBUG_UART -void uart_init(void) -{ - /* calc divisor for UART - * example config values: - * clock_div, baud, base_clk 163 115200 300000000 - * +0.5 to round up - */ - uint32_t div = (((SYS_CLK / 2.0) / (16 * BAUD_RATE)) + 0.5); - - while (!(UART_LSR(UART_SEL) & UART_LSR_TEMT)) - ; - - /* set ier, fcr, mcr */ - UART_IER(UART_SEL) = 0; - UART_FCR(UART_SEL) = (UART_FCR_TFR | UART_FCR_RFR | UART_FCR_FEN); - - /* enable baud rate access (DLAB=1) - divisor latch access bit*/ - UART_LCR(UART_SEL) = (UART_LCR_DLAB | UART_LCR_WLS); - /* set divisor */ - UART_DLB(UART_SEL) = (div & 0xff); - UART_DMB(UART_SEL) = ((div>>8) & 0xff); - /* disable rate access (DLAB=0) */ - UART_LCR(UART_SEL) = (UART_LCR_WLS); -} +/* AMD CFI Commands (Spansion/Cypress) */ +#define AMD_CMD_RESET 0xF0 +#define AMD_CMD_WRITE 0xA0 +#define AMD_CMD_ERASE_START 0x80 +#define AMD_CMD_ERASE_SECTOR 0x30 +#define AMD_CMD_UNLOCK_START 0xAA +#define AMD_CMD_UNLOCK_ACK 0x55 +#define AMD_CMD_WRITE_TO_BUFFER 0x25 +#define AMD_CMD_WRITE_BUFFER_CONFIRM 0x29 +#define AMD_CMD_SET_PPB_ENTRY 0xC0 +#define AMD_CMD_SET_PPB_EXIT_BC1 0x90 +#define AMD_CMD_SET_PPB_EXIT_BC2 0x00 +#define AMD_CMD_PPB_UNLOCK_BC1 0x80 +#define AMD_CMD_PPB_UNLOCK_BC2 0x30 +#define AMD_CMD_PPB_LOCK_BC1 0xA0 +#define AMD_CMD_PPB_LOCK_BC2 0x00 + +#define AMD_STATUS_TOGGLE 0x40 +#define AMD_STATUS_ERROR 0x20 + +/* Flash unlock addresses */ +#if FLASH_CFI_WIDTH == 16 +#define FLASH_UNLOCK_ADDR1 0x555 +#define FLASH_UNLOCK_ADDR2 0x2AA +#else +#define FLASH_UNLOCK_ADDR1 0xAAA +#define FLASH_UNLOCK_ADDR2 0x555 +#endif + +/* Flash IO Helpers */ +#if FLASH_CFI_WIDTH == 16 +#define FLASH_IO8_WRITE(sec, n, val) *((volatile uint16_t*)(FLASH_BASE_ADDR + (FLASH_SECTOR_SIZE * (sec)) + ((n) * 2))) = (((val) << 8) | (val)) +#define FLASH_IO16_WRITE(sec, n, val) *((volatile uint16_t*)(FLASH_BASE_ADDR + (FLASH_SECTOR_SIZE * (sec)) + ((n) * 2))) = (val) +#define FLASH_IO8_READ(sec, n) (uint8_t)(*((volatile uint16_t*)(FLASH_BASE_ADDR + (FLASH_SECTOR_SIZE * (sec)) + ((n) * 2)))) +#define FLASH_IO16_READ(sec, n) *((volatile uint16_t*)(FLASH_BASE_ADDR + (FLASH_SECTOR_SIZE * (sec)) + ((n) * 2))) +#else +#define FLASH_IO8_WRITE(sec, n, val) *((volatile uint8_t*)(FLASH_BASE_ADDR + (FLASH_SECTOR_SIZE * (sec)) + (n))) = (val) +#define FLASH_IO8_READ(sec, n) *((volatile uint8_t*)(FLASH_BASE_ADDR + (FLASH_SECTOR_SIZE * (sec)) + (n))) +#endif -void uart_write(const char* buf, uint32_t sz) -{ - uint32_t pos = 0; - while (sz-- > 0) { - char c = buf[pos++]; - if (c == '\n') { /* handle CRLF */ - while ((UART_LSR(UART_SEL) & UART_LSR_THRE) == 0); - UART_THR(UART_SEL) = '\r'; - } - while ((UART_LSR(UART_SEL) & UART_LSR_THRE) == 0); - UART_THR(UART_SEL) = c; - } -} -#endif /* DEBUG_UART */ void law_init(void) { @@ -344,114 +89,149 @@ void law_init(void) set_law(3, 0xF, 0xF4000000, LAW_TRGT_BMAN, LAW_SIZE_32MB, 1); } +/* Note: AMD Autoselect (READ_ID) mode is not used here because entering it + * affects the entire flash bank. Since wolfBoot runs XIP from the same + * bank (CS0), entering Autoselect would crash instruction fetch. */ static void hal_flash_init(void) { - /* IFC - NOR Flash */ - /* LAW is also set in boot_ppc_start.S:flash_law */ - set_law(1, FLASH_BASE_PHYS_HIGH, FLASH_BASE, LAW_TRGT_IFC, LAW_SIZE_128MB, 1); - - /* NOR IFC Flash Timing Parameters */ - IFC_FTIM0(0) = (IFC_FTIM0_NOR_TACSE(4) | \ - IFC_FTIM0_NOR_TEADC(5) | \ - IFC_FTIM0_NOR_TEAHC(5)); - IFC_FTIM1(0) = (IFC_FTIM1_NOR_TACO(53) | - IFC_FTIM1_NOR_TRAD(26) | - IFC_FTIM1_NOR_TSEQ(19)); - IFC_FTIM2(0) = (IFC_FTIM2_NOR_TCS(4) | - IFC_FTIM2_NOR_TCH(4) | - IFC_FTIM2_NOR_TWPH(14) | - IFC_FTIM2_NOR_TWP(28)); - IFC_FTIM3(0) = 0; - /* NOR IFC Definitions (CS0) */ - IFC_CSPR_EXT(0) = (0xF); - IFC_CSPR(0) = (IFC_CSPR_PHYS_ADDR(FLASH_BASE) | \ - IFC_CSPR_PORT_SIZE_16 | \ - IFC_CSPR_MSEL_NOR | \ - IFC_CSPR_V); - IFC_AMASK(0) = IFC_AMASK_128MB; - IFC_CSOR(0) = 0x0000000C; /* TRHZ (80 clocks for read enable high) */ +#ifdef ENABLE_IFC + uint32_t cspr; + + /* IFC CS0 - NOR Flash + * Do NOT reprogram IFC CS0 base address, port size, AMASK, CSOR, or + * FTIM while executing from flash (XIP). The boot ROM already + * configured CS0 correctly. + * + * However, the boot ROM may set IFC_CSPR_WP (write-protect), which + * blocks all write cycles to the flash. This prevents AMD command + * sequences (erase/program) from reaching the chips. Clearing just + * the WP bit is safe during XIP — it doesn't change chip-select + * decode, only enables write forwarding. */ + cspr = get32(IFC_CSPR(0)); +#ifdef DEBUG_UART + wolfBoot_printf("IFC CSPR0: 0x%x%s\n", cspr, + (cspr & IFC_CSPR_WP) ? " (WP set)" : ""); +#endif + /* WP clearing is done in hal_flash_clear_wp() from RAMFUNCTION code. + * T2080RM requires V=0 before modifying IFC_CSPR, which is not safe + * during XIP. The RAMFUNCTION code runs from DDR with flash TLB + * guarded, so it can safely toggle V=0 -> modify -> V=1. */ +#endif /* ENABLE_IFC */ } -static void hal_ddr_init(void) +void hal_ddr_init(void) { #ifdef ENABLE_DDR + uint32_t reg; + /* Map LAW for DDR */ - set_law(4, 0, 0, LAW_TRGT_DDR_1, LAW_SIZE_2GB, 0); + set_law(4, 0, DDR_ADDRESS, LAW_TRGT_DDR_1, LAW_SIZE_2GB, 0); /* If DDR is already enabled then just return */ - if (DDR_SDRAM_CFG & DDR_SDRAM_CFG_MEM_EN) { + reg = get32(DDR_SDRAM_CFG); + if (reg & DDR_SDRAM_CFG_MEM_EN) { return; } + /* Set clock early for clock / pin */ + set32(DDR_SDRAM_CLK_CNTL, DDR_SDRAM_CLK_CNTL_VAL); + /* Setup DDR CS (chip select) bounds */ - DDR_CS_BNDS(0) = DDR_CS0_BNDS_VAL; - DDR_CS_CONFIG(0) = DDR_CS0_CONFIG_VAL; - DDR_CS_CONFIG_2(0) = DDR_CS_CONFIG_2_VAL; - DDR_CS_BNDS(1) = DDR_CS1_BNDS_VAL; - DDR_CS_CONFIG(1) = DDR_CS1_CONFIG_VAL; - DDR_CS_CONFIG_2(1) = DDR_CS_CONFIG_2_VAL; - DDR_CS_BNDS(2) = DDR_CS2_BNDS_VAL; - DDR_CS_CONFIG(2) = DDR_CS2_CONFIG_VAL; - DDR_CS_CONFIG_2(2) = DDR_CS_CONFIG_2_VAL; - DDR_CS_BNDS(3) = DDR_CS3_BNDS_VAL; - DDR_CS_CONFIG(3) = DDR_CS3_CONFIG_VAL; - DDR_CS_CONFIG_2(3) = DDR_CS_CONFIG_2_VAL; + set32(DDR_CS_BNDS(0), DDR_CS0_BNDS_VAL); + set32(DDR_CS_CONFIG(0), DDR_CS0_CONFIG_VAL); + set32(DDR_CS_CONFIG_2(0), DDR_CS_CONFIG_2_VAL); + set32(DDR_CS_BNDS(1), DDR_CS1_BNDS_VAL); + set32(DDR_CS_CONFIG(1), DDR_CS1_CONFIG_VAL); + set32(DDR_CS_CONFIG_2(1), DDR_CS_CONFIG_2_VAL); + set32(DDR_CS_BNDS(2), DDR_CS2_BNDS_VAL); + set32(DDR_CS_CONFIG(2), DDR_CS2_CONFIG_VAL); + set32(DDR_CS_CONFIG_2(2), DDR_CS_CONFIG_2_VAL); + set32(DDR_CS_BNDS(3), DDR_CS3_BNDS_VAL); + set32(DDR_CS_CONFIG(3), DDR_CS3_CONFIG_VAL); + set32(DDR_CS_CONFIG_2(3), DDR_CS_CONFIG_2_VAL); /* DDR SDRAM timing configuration */ - DDR_TIMING_CFG_0 = DDR_TIMING_CFG_0_VAL; - DDR_TIMING_CFG_1 = DDR_TIMING_CFG_1_VAL; - DDR_TIMING_CFG_2 = DDR_TIMING_CFG_2_VAL; - DDR_TIMING_CFG_3 = DDR_TIMING_CFG_3_VAL; - DDR_TIMING_CFG_4 = DDR_TIMING_CFG_4_VAL; - DDR_TIMING_CFG_5 = DDR_TIMING_CFG_5_VAL; + set32(DDR_TIMING_CFG_3, DDR_TIMING_CFG_3_VAL); + set32(DDR_TIMING_CFG_0, DDR_TIMING_CFG_0_VAL); + set32(DDR_TIMING_CFG_1, DDR_TIMING_CFG_1_VAL); + set32(DDR_TIMING_CFG_2, DDR_TIMING_CFG_2_VAL); + set32(DDR_TIMING_CFG_4, DDR_TIMING_CFG_4_VAL); + set32(DDR_TIMING_CFG_5, DDR_TIMING_CFG_5_VAL); + + set32(DDR_ZQ_CNTL, DDR_ZQ_CNTL_VAL); /* DDR SDRAM mode configuration */ - DDR_SDRAM_MODE = DDR_SDRAM_MODE_VAL; - DDR_SDRAM_MODE_2 = DDR_SDRAM_MODE_2_VAL; - DDR_SDRAM_MODE_3 = DDR_SDRAM_MODE_3_8_VAL; - DDR_SDRAM_MODE_4 = DDR_SDRAM_MODE_3_8_VAL; - DDR_SDRAM_MODE_5 = DDR_SDRAM_MODE_3_8_VAL; - DDR_SDRAM_MODE_6 = DDR_SDRAM_MODE_3_8_VAL; - DDR_SDRAM_MODE_7 = DDR_SDRAM_MODE_3_8_VAL; - DDR_SDRAM_MODE_8 = DDR_SDRAM_MODE_3_8_VAL; - DDR_SDRAM_MD_CNTL = DDR_SDRAM_MD_CNTL_VAL; + set32(DDR_SDRAM_MODE, DDR_SDRAM_MODE_VAL); + set32(DDR_SDRAM_MODE_2, DDR_SDRAM_MODE_2_VAL); + set32(DDR_SDRAM_MODE_3, DDR_SDRAM_MODE_3_8_VAL); + set32(DDR_SDRAM_MODE_4, DDR_SDRAM_MODE_3_8_VAL); + set32(DDR_SDRAM_MODE_5, DDR_SDRAM_MODE_3_8_VAL); + set32(DDR_SDRAM_MODE_6, DDR_SDRAM_MODE_3_8_VAL); + set32(DDR_SDRAM_MODE_7, DDR_SDRAM_MODE_3_8_VAL); + set32(DDR_SDRAM_MODE_8, DDR_SDRAM_MODE_3_8_VAL); + set32(DDR_SDRAM_MD_CNTL, DDR_SDRAM_MD_CNTL_VAL); /* DDR Configuration */ - DDR_SDRAM_INTERVAL = DDR_SDRAM_INTERVAL_VAL; - DDR_SDRAM_CLK_CNTL = DDR_SDRAM_CLK_CNTL_VAL; - DDR_DATA_INIT = DDR_DATA_INIT_VAL; - DDR_ZQ_CNTL = DDR_ZQ_CNTL_VAL; - DDR_WRLVL_CNTL = DDR_WRLVL_CNTL_VAL; - DDR_WRLVL_CNTL_2 = DDR_WRLVL_CNTL_2_VAL; - DDR_WRLVL_CNTL_3 = DDR_WRLVL_CNTL_3_VAL; - DDR_SR_CNTR = 0; - DDR_SDRAM_RCW_1 = 0; - DDR_SDRAM_RCW_2 = 0; - DDR_DDRCDR_1 = DDR_DDRCDR_1_VAL; - DDR_DDRCDR_2 = DDR_DDRCDR_2_VAL; - DDR_SDRAM_CFG_2 = DDR_SDRAM_CFG_2_VAL; - DDR_INIT_ADDR = 0; - DDR_INIT_EXT_ADDR = 0; - DDR_ERR_DISABLE = 0; - DDR_ERR_INT_EN = DDR_ERR_INT_EN_VAL; - DDR_ERR_SBE = DDR_ERR_SBE_VAL; + set32(DDR_SDRAM_INTERVAL, DDR_SDRAM_INTERVAL_VAL); + set32(DDR_DATA_INIT, DDR_DATA_INIT_VAL); + set32(DDR_WRLVL_CNTL, DDR_WRLVL_CNTL_VAL); + set32(DDR_WRLVL_CNTL_2, DDR_WRLVL_CNTL_2_VAL); + set32(DDR_WRLVL_CNTL_3, DDR_WRLVL_CNTL_3_VAL); + set32(DDR_SR_CNTR, 0); + set32(DDR_SDRAM_RCW_1, 0); + set32(DDR_SDRAM_RCW_2, 0); + set32(DDR_DDRCDR_1, DDR_DDRCDR_1_VAL); + set32(DDR_SDRAM_CFG_2, (DDR_SDRAM_CFG_2_VAL | DDR_SDRAM_CFG_2_D_INIT)); + set32(DDR_INIT_ADDR, 0); + set32(DDR_INIT_EXT_ADDR, 0); + set32(DDR_DDRCDR_2, DDR_DDRCDR_2_VAL); + set32(DDR_ERR_DISABLE, 0); + set32(DDR_ERR_INT_EN, DDR_ERR_INT_EN_VAL); + set32(DDR_ERR_SBE, DDR_ERR_SBE_VAL); /* Set values, but do not enable the DDR yet */ - DDR_SDRAM_CFG = (DDR_SDRAM_CFG_VAL & ~DDR_SDRAM_CFG_MEM_EN); + set32(DDR_SDRAM_CFG, DDR_SDRAM_CFG_VAL & ~DDR_SDRAM_CFG_MEM_EN); + __asm__ __volatile__("sync;isync"); - /* TODO: Errata A009942 */ + /* busy wait for ~500us */ + udelay(500); + __asm__ __volatile__("sync;isync"); /* Enable controller */ - DDR_SDRAM_CFG |= DDR_SDRAM_CFG_MEM_EN; + reg = get32(DDR_SDRAM_CFG) & ~DDR_SDRAM_CFG_BI; + set32(DDR_SDRAM_CFG, reg | DDR_SDRAM_CFG_MEM_EN); __asm__ __volatile__("sync;isync"); - /* Wait for data initialization is complete */ - while ((DDR_SDRAM_CFG_2 & DDR_SDRAM_CFG_2_D_INIT)); -#endif + /* Wait for data initialization to complete */ + while (get32(DDR_SDRAM_CFG_2) & DDR_SDRAM_CFG_2_D_INIT) { + /* busy wait loop - throttle polling */ + udelay(10000); + } + +#endif /* ENABLE_DDR */ } void hal_early_init(void) { + /* Enable timebase on core 0 */ + set32(RCPM_PCTBENR, (1 << 0)); + + /* Only invalidate the CPC if it is NOT configured as SRAM. + * When CPC SRAM is active (used as stack), writing CPCFI|CPCLFC + * without preserving CPCE would disable the CPC and corrupt the + * stack. Skip invalidation when SRAMEN is set (T2080RM 8.4.2.2). */ + if (!(get32((volatile uint32_t*)(CPC_BASE + CPCSRCR0)) & CPCSRCR0_SRAMEN)) { + set32((volatile uint32_t*)(CPC_BASE + CPCCSR0), + (CPCCSR0_CPCFI | CPCCSR0_CPCLFC)); + /* Wait for self-clearing invalidate bits */ + while (get32((volatile uint32_t*)(CPC_BASE + CPCCSR0)) & + (CPCCSR0_CPCFI | CPCCSR0_CPCLFC)); + } + + /* Set DCSR space = 1G */ + set32(DCFG_DCSR, (get32(DCFG_DCSR) | CORENET_DCSR_SZ_1G)); + get32(DCFG_DCSR); /* read back to sync */ + hal_ddr_init(); } @@ -459,27 +239,27 @@ static void hal_cpld_init(void) { #ifdef ENABLE_CPLD /* CPLD IFC Timing Parameters */ - IFC_FTIM0(3) = (IFC_FTIM0_GPCM_TACSE(16UL) | - IFC_FTIM0_GPCM_TEADC(16UL) | - IFC_FTIM0_GPCM_TEAHC(16UL)); - IFC_FTIM1(3) = (IFC_FTIM1_GPCM_TACO(16UL) | - IFC_FTIM1_GPCM_TRAD(31UL)); - IFC_FTIM2(3) = (IFC_FTIM2_GPCM_TCS(16UL) | - IFC_FTIM2_GPCM_TCH(8UL) | - IFC_FTIM2_GPCM_TWP(31UL)); - IFC_FTIM3(3) = 0; + set32(IFC_FTIM0(3), (IFC_FTIM0_GPCM_TACSE(16UL) | + IFC_FTIM0_GPCM_TEADC(16UL) | + IFC_FTIM0_GPCM_TEAHC(16UL))); + set32(IFC_FTIM1(3), (IFC_FTIM1_GPCM_TACO(16UL) | + IFC_FTIM1_GPCM_TRAD(31UL))); + set32(IFC_FTIM2(3), (IFC_FTIM2_GPCM_TCS(16UL) | + IFC_FTIM2_GPCM_TCH(8UL) | + IFC_FTIM2_GPCM_TWP(31UL))); + set32(IFC_FTIM3(3), 0); /* CPLD IFC Definitions (CS3) */ - IFC_CSPR_EXT(3) = CPLD_BASE_PHYS_HIGH; - IFC_CSPR(3) = (IFC_CSPR_PHYS_ADDR(CPLD_BASE) | - IFC_CSPR_PORT_SIZE_16 | - IFC_CSPR_MSEL_GPCM | - IFC_CSPR_V); - IFC_AMASK(3) = IFC_AMASK_64KB; - IFC_CSOR(3) = 0; - - /* IFC - CPLD */ - set_law(2, CPLD_BASE_PHYS_HIGH, CPLD_BASE, + set32(IFC_CSPR_EXT(3), CPLD_BASE_PHYS_HIGH); + set32(IFC_CSPR(3), (IFC_CSPR_PHYS_ADDR(CPLD_BASE) | + IFC_CSPR_PORT_SIZE_16 | + IFC_CSPR_MSEL_GPCM | + IFC_CSPR_V)); + set32(IFC_AMASK(3), IFC_AMASK_64KB); + set32(IFC_CSOR(3), 0); + + /* IFC - CPLD (use LAW 5; LAW 2 is used for CPC SRAM) */ + set_law(5, CPLD_BASE_PHYS_HIGH, CPLD_BASE, LAW_TRGT_IFC, LAW_SIZE_4KB, 1); /* CPLD - TBL=1, Entry 17 */ @@ -489,85 +269,724 @@ static void hal_cpld_init(void) #endif } +#ifdef ENABLE_DDR +/* Release CPC SRAM back to L2 cache mode. + * Call after stack is relocated to DDR (done in boot_entry_C). + * This gives us the full 2MB CPC as L3 cache for better performance. + * + * Before releasing CPC SRAM, .ramcode (RAMFUNCTION) is copied to DDR + * and TLB9 is remapped: VA 0xF8F00000 -> PA DDR_RAMCODE_ADDR so that + * RAMFUNCTION code (memcpy, wolfBoot_start, etc.) continues to work. */ +static void hal_reconfigure_cpc_as_cache(void) +{ + volatile uint32_t *cpc_csr0 = (volatile uint32_t *)(CPC_BASE + CPCCSR0); + volatile uint32_t *cpc_srcr0 = (volatile uint32_t *)(CPC_BASE + CPCSRCR0); + uint32_t reg; + + /* Linker symbols for .ramcode section boundaries */ + extern unsigned int _start_ramcode; + extern unsigned int _end_ramcode; + uint32_t ramcode_size = (uint32_t)&_end_ramcode - (uint32_t)&_start_ramcode; + + /* Step 1: Copy .ramcode from CPC SRAM to DDR. + * Must use volatile loop — memcpy itself is in .ramcode! */ + if (ramcode_size > 0) { + volatile const uint32_t *src = (volatile const uint32_t *)&_start_ramcode; + volatile uint32_t *dst = (volatile uint32_t *)DDR_RAMCODE_ADDR; + volatile uint32_t *end = (volatile uint32_t *)(DDR_RAMCODE_ADDR + + ramcode_size); + while (dst < end) { + *dst++ = *src++; + } + + /* Flush D-cache and invalidate I-cache for the DDR copy */ + flush_cache(DDR_RAMCODE_ADDR, ramcode_size); + + /* Step 2: Remap TLB9: same VA (0xF8F00000) -> DDR physical address. + * All .ramcode references use VA 0xF8F00000, so this makes them + * transparently access the DDR copy instead of CPC SRAM. */ + set_tlb(1, 9, + L2SRAM_ADDR, DDR_RAMCODE_ADDR, 0, + MAS3_SX | MAS3_SW | MAS3_SR, MAS2_M, 0, + INITIAL_SRAM_BOOKE_SZ, 1); + + /* Ensure TLB update and I-cache pick up new mapping */ + invalidate_icache(); + } + +#ifdef DEBUG_UART + wolfBoot_printf("Ramcode: copied %d bytes to DDR, TLB9 remapped\n", + ramcode_size); +#endif + + /* Step 3: Flush the CPC to push any dirty SRAM data out. + * Read-modify-write to preserve CPCE/CPCPE enable bits. */ + reg = *cpc_csr0; + reg |= CPCCSR0_CPCFL; + *cpc_csr0 = reg; + __asm__ __volatile__("sync; isync" ::: "memory"); + + /* Step 4: Poll until flush completes (CPCFL clears) */ + while (*cpc_csr0 & CPCCSR0_CPCFL); + + /* Step 5: Disable SRAM mode - release all ways back to cache */ + *cpc_srcr0 = 0; + __asm__ __volatile__("sync; isync" ::: "memory"); + + /* Step 6: Disable CPC SRAM LAW (no longer needed — TLB9 now routes + * to DDR via LAW4, not CPC SRAM via LAW2). + * Keep TLB9 — it's remapped to DDR and still in use. */ + set32(LAWAR(2), 0); + + /* Step 7: Flash invalidate CPC to start fresh as cache */ + reg = *cpc_csr0; + reg |= CPCCSR0_CPCFI; + *cpc_csr0 = reg; + __asm__ __volatile__("sync; isync" ::: "memory"); + while (*cpc_csr0 & CPCCSR0_CPCFI); + + /* Step 8: Enable parity/ECC now that SRAM is released and cache is clean. + * CPCPE was intentionally omitted during ASM init to avoid ECC machine + * checks on uninitialized SRAM (cold power cycle). Safe to enable here: + * SRAM mode is off, CPC is freshly invalidated, no stale data. */ + reg = *cpc_csr0; + reg |= CPCCSR0_CPCPE; + *cpc_csr0 = reg; + __asm__ __volatile__("sync; isync" ::: "memory"); + + /* CPC is now fully enabled (CPCE|CPCPE), all 2MB as L3 cache */ + +#ifdef DEBUG_UART + wolfBoot_printf("CPC: Released SRAM, full 2MB L3 CPC cache enabled\n"); +#endif +} + +/* Make flash TLB cacheable for XIP code performance. + * Changes TLB Entry 2 (flash) from MAS2_I|MAS2_G to MAS2_M. + * This enables L1 I-cache + L2 + CPC to cache flash instructions. */ +static void hal_flash_enable_caching(void) +{ + /* Rewrite flash TLB entry with cacheable attributes. + * MAS2_M = memory coherent, enables caching */ + set_tlb(1, 2, + FLASH_BASE_ADDR, FLASH_BASE_ADDR, FLASH_BASE_PHYS_HIGH, + MAS3_SX | MAS3_SW | MAS3_SR, MAS2_M, 0, + FLASH_TLB_PAGESZ, 1); + + /* Invalidate L1 I-cache so new TLB attributes take effect */ + invalidate_icache(); + +#ifdef DEBUG_UART + wolfBoot_printf("Flash: caching enabled (L1+L2+CPC)\n"); +#endif +} +#endif /* ENABLE_DDR */ + + void hal_init(void) { #if defined(DEBUG_UART) && defined(ENABLE_CPLD) uint32_t fw; #endif + /* Enable timebase on core 0 */ + set32(RCPM_PCTBENR, (1 << 0)); + law_init(); #ifdef DEBUG_UART uart_init(); uart_write("wolfBoot Init\n", 14); +#ifndef WOLFBOOT_REPRODUCIBLE_BUILD + wolfBoot_printf("Build: %s %s\n", __DATE__, __TIME__); +#endif + wolfBoot_printf("System Clock: %lu MHz\n", + (unsigned long)(SYS_CLK / 1000000)); + wolfBoot_printf("Platform Clock: %lu MHz\n", + (unsigned long)(hal_get_plat_clk() / 1000000)); + wolfBoot_printf("Core Clock: %lu MHz\n", + (unsigned long)(hal_get_core_clk() / 1000000)); + wolfBoot_printf("Bus Clock: %lu MHz\n", + (unsigned long)(hal_get_bus_clk() / 1000000)); + wolfBoot_printf("Timebase: %lu MHz\n", + (unsigned long)(TIMEBASE_HZ / 1000000)); #endif hal_flash_init(); hal_cpld_init(); #ifdef ENABLE_CPLD - CPLD_DATA(CPLD_PROC_STATUS) = 1; /* Enable proc reset */ - CPLD_DATA(CPLD_WR_TEMP_ALM_OVRD) = 0; /* Enable temp alarm */ + set8(CPLD_DATA(CPLD_PROC_STATUS), 1); /* Enable proc reset */ + set8(CPLD_DATA(CPLD_WR_TEMP_ALM_OVRD), 0); /* Enable temp alarm */ #ifdef DEBUG_UART - fw = CPLD_DATA(CPLD_FW_REV); + fw = get8(CPLD_DATA(CPLD_FW_REV)); wolfBoot_printf("CPLD FW Rev: 0x%x\n", fw); #endif #endif /* ENABLE_CPLD */ -#if 0 /* not tested */ - /* Disable SATA Write Protection */ - SATA_ENBL = 0; +#ifdef ENABLE_DDR + /* Stack is already in DDR (relocated in boot_entry_C via + * ddr_call_with_stack trampoline before main() was called). + * + * Now release CPC SRAM back to L2 cache and enable flash caching. + * This dramatically improves ECC signature verification performance: + * - CPC (2MB) becomes L3 cache for all memory accesses + * - Flash code is cached by L1 I-cache + L2 + CPC + * - Stack/data in DDR is cached by L1 D-cache + L2 + CPC */ + hal_reconfigure_cpc_as_cache(); + hal_flash_enable_caching(); + + /* Enable branch prediction now that DDR stack and cache hierarchy + * are fully configured. Disabled during early ASM boot to avoid + * speculative fetches during hardware init. */ + { + uint32_t bucsr = BUCSR_STAC_EN | BUCSR_LS_EN | BUCSR_BBFI | BUCSR_BPEN; + __asm__ __volatile__("mtspr %0, %1; isync" :: "i"(SPRN_BUCSR), "r"(bucsr)); + } +#endif + +#ifdef ENABLE_MP + /* Start secondary cores AFTER CPC release and flash caching. + * Secondary cores' L2 flash-invalidate on the shared cluster L2 + * must not disrupt the CPC SRAM→cache transition. Starting them + * after ensures the cache hierarchy is fully stable. */ + hal_mp_init(); #endif } -int hal_flash_write(uint32_t address, const uint8_t *data, int len) +/* Switch flash TLB to cache-inhibited + guarded for direct flash chip access. + * AMD flash commands require writes to reach the chip immediately and status + * reads to come directly from the chip. With MAS2_M (cacheable), stores go + * through the CPC coherency fabric; IFC does not support coherent writes and + * returns a bus error (DSI). */ +static void RAMFUNCTION hal_flash_cache_disable(void) { - (void)address; - (void)data; - (void)len; - /* TODO: Implement NOR flash write using IFC */ - return 0; + set_tlb(1, 2, FLASH_BASE_ADDR, FLASH_BASE_ADDR, FLASH_BASE_PHYS_HIGH, + MAS3_SX | MAS3_SW | MAS3_SR, MAS2_I | MAS2_G, 0, FLASH_TLB_PAGESZ, 1); } -int hal_flash_erase(uint32_t address, int len) +/* Restore flash TLB to cacheable mode after flash operation. + * Flash must be back in read-array mode before calling (AMD_CMD_RESET sent). + * Invalidate caches afterward so stale pre-erase data is not served. */ +static void RAMFUNCTION hal_flash_cache_enable(void) { - (void)address; - (void)len; - /* TODO: Implement NOR flash erase using IFC */ + set_tlb(1, 2, FLASH_BASE_ADDR, FLASH_BASE_ADDR, FLASH_BASE_PHYS_HIGH, + MAS3_SX | MAS3_SW | MAS3_SR, MAS2_M, 0, FLASH_TLB_PAGESZ, 1); + invalidate_dcache(); + invalidate_icache(); +} + +/* Clear IFC write-protect. T2080RM says IFC_CSPR should only be written + * when V=0. Must be called from RAMFUNCTION (DDR) with flash TLB set to + * guarded (MAS2_G) so no speculative access occurs while V is briefly 0. */ +static void RAMFUNCTION hal_flash_clear_wp(void) +{ + uint32_t cspr = get32(IFC_CSPR(0)); + if (cspr & IFC_CSPR_WP) { + /* Clear V first, then modify WP, then re-enable V */ + set32(IFC_CSPR(0), cspr & ~(IFC_CSPR_WP | IFC_CSPR_V)); + __asm__ __volatile__("sync; isync"); + set32(IFC_CSPR(0), (cspr & ~IFC_CSPR_WP) | IFC_CSPR_V); + __asm__ __volatile__("sync; isync"); + /* Verify WP cleared */ + cspr = get32(IFC_CSPR(0)); + #ifdef DEBUG_FLASH + wolfBoot_printf("WP clear: CSPR0=0x%x%s\n", cspr, + (cspr & IFC_CSPR_WP) ? " (FAILED)" : " (OK)"); + #endif + } +} + +static void RAMFUNCTION hal_flash_unlock_sector(uint32_t sector) +{ + /* AMD unlock sequence */ + FLASH_IO8_WRITE(sector, FLASH_UNLOCK_ADDR1, AMD_CMD_UNLOCK_START); + FLASH_IO8_WRITE(sector, FLASH_UNLOCK_ADDR2, AMD_CMD_UNLOCK_ACK); +} + +/* Check and clear PPB (Persistent Protection Bits) for a sector. + * S29GL01GS has per-sector non-volatile protection bits. If set, erase/program + * fails with DQ5 error. PPB erase is chip-wide (clears ALL sectors). + * Returns: 0 if unprotected or successfully cleared, -1 on failure. */ +static int RAMFUNCTION hal_flash_ppb_unlock(uint32_t sector) +{ + uint16_t ppb_status; + uint16_t read1, read2; + uint32_t timeout; + + /* Enter PPB ASO (Address Space Overlay) */ + FLASH_IO8_WRITE(0, FLASH_UNLOCK_ADDR1, AMD_CMD_UNLOCK_START); + FLASH_IO8_WRITE(0, FLASH_UNLOCK_ADDR2, AMD_CMD_UNLOCK_ACK); + FLASH_IO8_WRITE(0, FLASH_UNLOCK_ADDR1, AMD_CMD_SET_PPB_ENTRY); + + /* Read PPB status for target sector: DQ0=0 means protected. + * On 16-bit bus, must read both chip lanes to check both devices. */ +#if FLASH_CFI_WIDTH == 16 + ppb_status = FLASH_IO16_READ(sector, 0); + if ((ppb_status & 0x0101) == 0x0101) { +#else + ppb_status = FLASH_IO8_READ(sector, 0); + if ((ppb_status & 0x01) == 0x01) { +#endif + /* Both chips report unprotected — exit PPB mode and return */ + FLASH_IO8_WRITE(0, 0, AMD_CMD_SET_PPB_EXIT_BC1); + FLASH_IO8_WRITE(0, 0, AMD_CMD_SET_PPB_EXIT_BC2); + return 0; + } + + /* Exit PPB ASO before calling printf (flash must be in read-array + * mode for I-cache misses to fetch valid instructions) */ + FLASH_IO8_WRITE(0, 0, AMD_CMD_SET_PPB_EXIT_BC1); + FLASH_IO8_WRITE(0, 0, AMD_CMD_SET_PPB_EXIT_BC2); + FLASH_IO8_WRITE(0, 0, AMD_CMD_RESET); + udelay(50); + +#ifdef DEBUG_FLASH + wolfBoot_printf("PPB: sector %d protected (0x%x), erasing all PPBs\n", + sector, ppb_status); +#endif + + /* Re-enter PPB ASO for erase */ + FLASH_IO8_WRITE(0, FLASH_UNLOCK_ADDR1, AMD_CMD_UNLOCK_START); + FLASH_IO8_WRITE(0, FLASH_UNLOCK_ADDR2, AMD_CMD_UNLOCK_ACK); + FLASH_IO8_WRITE(0, FLASH_UNLOCK_ADDR1, AMD_CMD_SET_PPB_ENTRY); + + /* PPB Erase All (clears all sectors' PPBs) */ + FLASH_IO8_WRITE(0, 0, AMD_CMD_PPB_UNLOCK_BC1); /* 0x80 */ + FLASH_IO8_WRITE(0, 0, AMD_CMD_PPB_UNLOCK_BC2); /* 0x30 */ + + /* Wait for PPB erase completion — poll for toggle stop. + * On 16-bit bus, read both chip lanes to ensure both complete. */ + timeout = 0; + do { +#if FLASH_CFI_WIDTH == 16 + read1 = FLASH_IO16_READ(0, 0); + read2 = FLASH_IO16_READ(0, 0); +#else + read1 = FLASH_IO8_READ(0, 0); + read2 = FLASH_IO8_READ(0, 0); +#endif + if (read1 == read2) + break; + udelay(10); + } while (timeout++ < 100000); /* 1 second */ + + /* Exit PPB ASO */ + FLASH_IO8_WRITE(0, 0, AMD_CMD_SET_PPB_EXIT_BC1); + FLASH_IO8_WRITE(0, 0, AMD_CMD_SET_PPB_EXIT_BC2); + + /* Reset to read-array mode */ + FLASH_IO8_WRITE(0, 0, AMD_CMD_RESET); + udelay(50); + + if (timeout >= 100000) { + #ifdef DEBUG_FLASH + wolfBoot_printf("PPB: erase timeout\n"); + #endif + return -1; + } + +#ifdef DEBUG_FLASH + wolfBoot_printf("PPB: erase complete\n"); +#endif return 0; } -void hal_flash_unlock(void) +/* wait for DQ6 toggle to stop within microsecond timeout. + * RAMFUNCTION: executes from DDR while flash is in program/erase command mode. */ +static int RAMFUNCTION hal_flash_status_wait(uint32_t sector, uint32_t timeout_us) { - /* Disable all flash protection bits */ - /* enter Non-volatile protection mode (C0h) */ - *((volatile uint16_t*)(FLASH_BASE + 0xAAA)) = 0xAAAA; - *((volatile uint16_t*)(FLASH_BASE + 0x554)) = 0x5555; - *((volatile uint16_t*)(FLASH_BASE + 0xAAA)) = 0xC0C0; - /* clear all protection bit (80h/30h) */ - *((volatile uint16_t*)(FLASH_BASE + 0x000)) = 0x8080; - *((volatile uint16_t*)(FLASH_BASE + 0x000)) = 0x3030; - /* exit Non-volatile protection mode (90h/00h) */ - *((volatile uint16_t*)(FLASH_BASE + 0x000)) = 0x9090; - *((volatile uint16_t*)(FLASH_BASE + 0x000)) = 0x0000; + int ret = 0; + uint32_t timeout = 0; + uint16_t read1, read2; + + /* Replicate 8-bit AMD toggle/error bits to both bytes for parallel chips */ +#if FLASH_CFI_WIDTH == 16 + uint16_t toggle16 = (AMD_STATUS_TOGGLE << 8) | AMD_STATUS_TOGGLE; + uint16_t error16 = (AMD_STATUS_ERROR << 8) | AMD_STATUS_ERROR; +#else + uint16_t toggle16 = AMD_STATUS_TOGGLE; + uint16_t error16 = AMD_STATUS_ERROR; +#endif + + do { + /* AMD toggle detection: DQ6 toggles on consecutive reads during + * program/erase. When the operation completes, DQ6 reflects actual + * data and consecutive reads return the same value. + * NOTE: Do NOT check programmed data bits against a mask here — + * after write completes, the data depends on what was written, not + * on any fixed status bits. Only erase guarantees 0xFF data. */ +#if FLASH_CFI_WIDTH == 16 + read1 = FLASH_IO16_READ(sector, 0); + read2 = FLASH_IO16_READ(sector, 0); +#else + read1 = FLASH_IO8_READ(sector, 0); + read2 = FLASH_IO8_READ(sector, 0); +#endif + #ifdef DEBUG_FLASH + wolfBoot_printf("Wait toggle %x -> %x\n", read1, read2); + #endif + /* DQ6 stopped toggling → operation complete */ + if (((read1 ^ read2) & toggle16) == 0) + break; + /* Check DQ5 (error) on both chips while still toggling */ + if (read1 & error16) { + /* Read one more time to confirm it's not a false DQ5 */ +#if FLASH_CFI_WIDTH == 16 + read1 = FLASH_IO16_READ(sector, 0); + read2 = FLASH_IO16_READ(sector, 0); +#else + read1 = FLASH_IO8_READ(sector, 0); + read2 = FLASH_IO8_READ(sector, 0); +#endif + if (((read1 ^ read2) & toggle16) == 0) + break; /* toggle stopped — was a race, not an error */ + ret = -2; /* DQ5 error — program/erase failed */ + break; + } + udelay(1); + } while (timeout++ < timeout_us); + if (timeout >= timeout_us) { + ret = -1; /* timeout */ + } +#ifdef DEBUG_FLASH + wolfBoot_printf("Wait done (%d tries): %x -> %x\n", + timeout, read1, read2); +#endif + return ret; +} + +int RAMFUNCTION hal_flash_write(uint32_t address, const uint8_t *data, int len) +{ + int ret = 0; + uint32_t i, sector, offset, nwords; + const uint32_t width_bytes = FLASH_CFI_WIDTH / 8; + + /* Enforce alignment to flash bus width */ + if ((address % width_bytes) != 0 || (len % width_bytes) != 0) { + #ifdef DEBUG_FLASH + wolfBoot_printf("Flash Write: unaligned addr 0x%x or len %d " + "(need %d-byte alignment)\n", address, len, width_bytes); + #endif + return -1; + } + + /* adjust for flash base */ + if (address >= FLASH_BASE_ADDR) + address -= FLASH_BASE_ADDR; + +#ifdef DEBUG_FLASH + wolfBoot_printf("Flash Write: Ptr %p -> Addr 0x%x (len %d)\n", + data, address, len); +#endif + + /* Disable flash caching — AMD commands must reach the chip directly */ + hal_flash_cache_disable(); + hal_flash_clear_wp(); + + /* Reset flash to read-array mode in case previous operation left it + * in command mode (e.g. after a timeout or incomplete operation) */ + FLASH_IO8_WRITE(0, 0, AMD_CMD_RESET); + udelay(50); + + /* Program one word at a time using AMD single-word program (0xA0). + * Each word requires: unlock + 0xA0 + data → poll. + * Typical program time: 60-120us per word. + * This is simpler and more reliable than Write-Buffer-Program (WBP), + * which had DQ1 abort/timeout issues on this IFC + S29GL01GS + * combination. WBP can be re-enabled as an optimization once + * single-word program is verified working on hardware. */ + nwords = (uint32_t)len / width_bytes; + for (i = 0; i < nwords; i++) { + sector = address / FLASH_SECTOR_SIZE; + offset = (address - (sector * FLASH_SECTOR_SIZE)) / width_bytes; + + hal_flash_unlock_sector(sector); + FLASH_IO8_WRITE(sector, FLASH_UNLOCK_ADDR1, AMD_CMD_WRITE); + #if FLASH_CFI_WIDTH == 16 + { + /* Build 16-bit value from bytes to avoid unaligned access */ + const uint8_t *p = &data[i * 2]; + uint16_t val = ((uint16_t)p[0] << 8) | (uint16_t)p[1]; + FLASH_IO16_WRITE(sector, offset, val); + } + #else + FLASH_IO8_WRITE(sector, offset, data[i]); + #endif + + /* Poll for program completion (typical 60-120us, max 200ms) */ + ret = hal_flash_status_wait(sector, 200 * 1000); + if (ret != 0) { + FLASH_IO8_WRITE(sector, 0, AMD_CMD_RESET); + udelay(50); + #ifdef DEBUG_FLASH + wolfBoot_printf("Flash Write: %s at addr 0x%x\n", + ret == -2 ? "DQ5 error" : "Timeout", + (uint32_t)(FLASH_BASE_ADDR + address)); + #endif + break; + } + + address += width_bytes; + } + + /* Restore flash caching — flash is back in read-array mode */ + hal_flash_cache_enable(); + return ret; +} + +int RAMFUNCTION hal_flash_erase(uint32_t address, int len) +{ + int ret = 0; + uint32_t sector; + + /* adjust for flash base */ + if (address >= FLASH_BASE_ADDR) + address -= FLASH_BASE_ADDR; + + /* Disable flash caching — AMD commands must reach the chip directly */ + hal_flash_cache_disable(); + hal_flash_clear_wp(); + + /* Reset flash to read-array mode in case previous operation left it + * in command mode (e.g. after a timeout or incomplete operation) */ + FLASH_IO8_WRITE(0, 0, AMD_CMD_RESET); + udelay(50); + + while (len > 0) { + /* determine sector address */ + sector = (address / FLASH_SECTOR_SIZE); + + #ifdef DEBUG_FLASH + wolfBoot_printf("Flash Erase: Sector %d, Addr 0x%x, Len %d\n", + sector, address, len); + #endif + + /* Check and clear PPB protection if set */ + if (hal_flash_ppb_unlock(sector) != 0) { + #ifdef DEBUG_FLASH + wolfBoot_printf("Flash Erase: PPB unlock failed sector %d\n", sector); + #endif + ret = -1; + break; + } + + #ifdef DEBUG_FLASH + wolfBoot_printf("Erasing sector %d...\n", sector); + #endif + + hal_flash_unlock_sector(sector); + FLASH_IO8_WRITE(sector, FLASH_UNLOCK_ADDR1, AMD_CMD_ERASE_START); + hal_flash_unlock_sector(sector); + FLASH_IO8_WRITE(sector, 0, AMD_CMD_ERASE_SECTOR); + /* block erase timeout = 50us - for additional sectors */ + /* Typical is 200ms (max 1100ms) */ + + /* poll for erase completion - max 1.1 sec + * NOTE: Do NOT call wolfBoot_printf while flash is in erase mode. + * With cache-inhibited TLB, I-cache misses fetch from flash which + * returns status data instead of instructions. */ + ret = hal_flash_status_wait(sector, 1100*1000); + if (ret != 0) { + /* Reset flash to read-array mode BEFORE calling printf */ + FLASH_IO8_WRITE(sector, 0, AMD_CMD_RESET); + udelay(50); + #ifdef DEBUG_FLASH + wolfBoot_printf("Flash Erase: Timeout at sector %d\n", sector); + #endif + break; + } + + /* Erase succeeded — flash is back in read-array mode. + * Reset to be safe before any printf (I-cache may miss) */ + FLASH_IO8_WRITE(sector, 0, AMD_CMD_RESET); + udelay(10); + #ifdef DEBUG_FLASH + wolfBoot_printf("Erase sector %d: OK\n", sector); + #endif + + address += FLASH_SECTOR_SIZE; + len -= FLASH_SECTOR_SIZE; + } + + /* Restore flash caching — flash is back in read-array mode */ + hal_flash_cache_enable(); + return ret; +} + +void RAMFUNCTION hal_flash_unlock(void) +{ + /* Per-sector unlock is done in hal_flash_write/erase before each operation. + * The previous non-volatile PPB protection mode (C0h) approach caused + * unnecessary wear on PPB cells since it was called on every boot. */ + hal_flash_unlock_sector(0); } void hal_flash_lock(void) { - /* Enable all flash protection bits */ - /* enter Non-volatile protection mode (C0h) */ - *((volatile uint16_t*)(FLASH_BASE + 0xAAA)) = 0xAAAA; - *((volatile uint16_t*)(FLASH_BASE + 0x554)) = 0x5555; - *((volatile uint16_t*)(FLASH_BASE + 0xAAA)) = 0xC0C0; - /* set all protection bit (A0h/00h) */ - *((volatile uint16_t*)(FLASH_BASE + 0x000)) = 0xA0A0; - *((volatile uint16_t*)(FLASH_BASE + 0x000)) = 0x0000; - /* exit Non-volatile protection mode (90h/00h) */ - *((volatile uint16_t*)(FLASH_BASE + 0x000)) = 0x9090; - *((volatile uint16_t*)(FLASH_BASE + 0x000)) = 0x0000; + /* intentional no-op: per-sector unlock is done in hal_flash_write/erase */ } +/* SMP Multi-Processor Driver */ +#ifdef ENABLE_MP + +/* from boot_ppc_mp.S */ +extern uint32_t _secondary_start_page; +extern uint32_t _second_half_boot_page; +extern uint32_t _spin_table[]; +extern uint32_t _spin_table_addr; + +/* DDR address of the spin table, set during hal_mp_init() and reused in + * hal_dts_fixup() for cpu-release-addr fixups. */ +static uint32_t g_spin_table_ddr = 0; +extern uint32_t _bootpg_addr; + +/* Startup additional cores with spin table and synchronize the timebase. + * spin_table_ddr: DDR address of the spin table (for checking status) */ +static void hal_mp_up(uint32_t bootpg, uint32_t spin_table_ddr) +{ + uint32_t all_cores, active_cores, whoami; + int timeout = 50, i; + + whoami = get32(PIC_WHOAMI); /* Get current running core number */ + all_cores = ((1 << CPU_NUMCORES) - 1); /* mask of all cores */ + active_cores = (1 << whoami); /* current running cores */ + + wolfBoot_printf("MP: Starting cores (boot page %p, spin table %p)\n", + bootpg, spin_table_ddr); + + /* Set the boot page translation register */ + set32(LCC_BSTRH, 0); + set32(LCC_BSTRL, bootpg); + set32(LCC_BSTAR, (LCC_BSTAR_EN | + LCC_BSTAR_LAWTRGT(LAW_TRGT_DDR_1) | + LAW_SIZE_4KB)); + (void)get32(LCC_BSTAR); /* read back to sync */ + + /* Enable time base on current core only */ + set32(RCPM_PCTBENR, (1 << whoami)); + + /* Release the CPU core(s) */ + set32(DCFG_BRR, all_cores); + __asm__ __volatile__("sync; isync; msync"); + + /* wait for other core(s) to start */ + while (timeout) { + for (i = 0; i < CPU_NUMCORES; i++) { + volatile uint32_t* entry = (volatile uint32_t*)( + spin_table_ddr + (i * ENTRY_SIZE) + ENTRY_ADDR_LOWER); + if (*entry) { + active_cores |= (1 << i); + } + } + if ((active_cores & all_cores) == all_cores) { + break; + } + + udelay(100); + timeout--; + } + + if (timeout == 0) { + wolfBoot_printf("MP: Timeout enabling additional cores!\n"); + } + + /* Synchronize and reset timebase across all cores. + * On e6500, mtspr to TBL/TBU (SPR 284/285) may cause an illegal + * instruction exception — skip timebase reset if secondary cores + * did not start (timebase sync only matters for multi-core). */ + if ((active_cores & all_cores) == all_cores) { + /* Disable all timebases */ + set32(RCPM_PCTBENR, 0); + + /* Reset our timebase */ + mtspr(SPRN_TBWU, 0); + mtspr(SPRN_TBWL, 0); + + /* Enable timebase for all cores */ + set32(RCPM_PCTBENR, all_cores); + } else { + /* Only re-enable timebase for boot core */ + set32(RCPM_PCTBENR, (1 << whoami)); + } +} + +static void hal_mp_init(void) +{ + uint32_t *fixup = (uint32_t*)&_secondary_start_page; + uint32_t bootpg, second_half_ddr, spin_table_ddr; + int i_tlb = 0; /* always 0 */ + size_t i; + const volatile uint32_t *s; + volatile uint32_t *d; + + /* Assign virtual boot page at end of LAW-mapped DDR region. + * DDR LAW maps 2GB (LAW_SIZE_2GB) starting at DDR_ADDRESS. + * DDR_SIZE may exceed 32-bit range (e.g. 8GB), so use the LAW-mapped + * size to ensure bootpg fits in 32 bits and is accessible. */ + bootpg = DDR_ADDRESS + 0x80000000UL - BOOT_ROM_SIZE; + + /* Second half boot page (spin loop + spin table) goes just below. + * For XIP flash builds, .bootmp is in flash — secondary cores can't + * write to flash, so the spin table MUST be in DDR. */ + second_half_ddr = bootpg - BOOT_ROM_SIZE; + + /* DDR addresses for second half symbols */ + spin_table_ddr = second_half_ddr + + ((uint32_t)_spin_table - (uint32_t)&_second_half_boot_page); + + /* Flush DDR destination before copying */ + flush_cache(bootpg, BOOT_ROM_SIZE); + flush_cache(second_half_ddr, BOOT_ROM_SIZE); + + /* Map reset page to bootpg so we can copy code there. + * Boot page translation will redirect secondary core fetches from + * 0xFFFFF000 to bootpg in DDR. */ + disable_tlb1(i_tlb); + set_tlb(1, i_tlb, BOOT_ROM_ADDR, bootpg, 0, /* tlb, epn, rpn, urpn */ + (MAS3_SX | MAS3_SW | MAS3_SR), (MAS2_I | MAS2_G), /* perms, wimge */ + 0, BOOKE_PAGESZ_4K, 1); /* ts, esel, tsize, iprot */ + + /* Copy first half (startup code) to DDR via BOOT_ROM_ADDR mapping. + * Uses cache-inhibited TLB to ensure data reaches DDR immediately. */ + s = (const uint32_t*)fixup; + d = (uint32_t*)BOOT_ROM_ADDR; + for (i = 0; i < BOOT_ROM_SIZE/4; i++) { + d[i] = s[i]; + } + + /* Write _bootpg_addr and _spin_table_addr into the DDR first-half copy. + * These variables are .long 0 in the linked .bootmp (flash), and direct + * stores to their flash addresses silently fail on XIP builds. + * Calculate offsets within the boot page and write via BOOT_ROM_ADDR. */ + { + volatile uint32_t *bp = (volatile uint32_t*)(BOOT_ROM_ADDR + + ((uint32_t)&_bootpg_addr - (uint32_t)&_secondary_start_page)); + volatile uint32_t *st = (volatile uint32_t*)(BOOT_ROM_ADDR + + ((uint32_t)&_spin_table_addr - (uint32_t)&_secondary_start_page)); + *bp = second_half_ddr; + *st = spin_table_ddr; + } + + /* Copy second half (spin loop + spin table) directly to DDR. + * Master has DDR TLB (entry 12, MAS2_M). Flush cache after copy + * to ensure secondary cores see the data. */ + s = (const uint32_t*)&_second_half_boot_page; + d = (uint32_t*)second_half_ddr; + for (i = 0; i < BOOT_ROM_SIZE/4; i++) { + d[i] = s[i]; + } + flush_cache(second_half_ddr, BOOT_ROM_SIZE); + + /* Persist DDR spin-table base for use in hal_dts_fixup() */ + g_spin_table_ddr = spin_table_ddr; + + /* start cores and wait for them to be enabled */ + hal_mp_up(bootpg, spin_table_ddr); +} +#endif /* ENABLE_MP */ + void hal_prepare_boot(void) { @@ -578,4 +997,94 @@ void* hal_get_dts_address(void) { return (void*)WOLFBOOT_DTS_BOOT_ADDRESS; } -#endif + +int hal_dts_fixup(void* dts_addr) +{ +#ifndef BUILD_LOADER_STAGE1 + struct fdt_header *fdt = (struct fdt_header *)dts_addr; + int off; + uint32_t *reg; + + /* verify the FDT is valid */ + off = fdt_check_header(dts_addr); + if (off != 0) { + wolfBoot_printf("FDT: Invalid header! %d\n", off); + return off; + } + + /* display FDT information */ + wolfBoot_printf("FDT: Version %d, Size %d\n", + fdt_version(fdt), fdt_totalsize(fdt)); + + /* expand total size */ + { + uint32_t new_size = (uint32_t)fdt_totalsize(fdt) + 2048U; + fdt_set_totalsize(fdt, new_size); + wolfBoot_printf("FDT: Expanded (2KB) to %d bytes\n", + fdt_totalsize(fdt)); + } + + /* fixup the memory region - single bank */ + off = fdt_find_devtype(fdt, -1, "memory"); + if (off >= 0) { + /* build addr/size as aligned 64-bit values */ + uint64_t ranges[2]; + ranges[0] = cpu_to_fdt64(DDR_ADDRESS); + ranges[1] = cpu_to_fdt64(DDR_SIZE); + wolfBoot_printf("FDT: Set memory, start=0x%x, size=0x%x\n", + DDR_ADDRESS, (uint32_t)DDR_SIZE); + fdt_setprop(fdt, off, "reg", ranges, sizeof(ranges)); + } + + /* fixup CPU status and release address and enable method */ + off = fdt_find_devtype(fdt, -1, "cpu"); + while (off >= 0) { + int core; + #ifdef ENABLE_MP + uint64_t core_spin_table; + #endif + + reg = (uint32_t*)fdt_getprop(fdt, off, "reg", NULL); + if (reg == NULL) + break; + core = (int)fdt32_to_cpu(*reg); + if (core >= CPU_NUMCORES) { + break; /* invalid core index */ + } + + #ifdef ENABLE_MP + /* Calculate DDR address of this core's spin table entry. + * Must use g_spin_table_ddr (the DDR copy), NOT _spin_table which + * is the flash/VMA address — Linux writes the release word to this + * address, and XIP flash is read-only. */ + core_spin_table = (uint64_t)(g_spin_table_ddr + (core * ENTRY_SIZE)); + + fdt_fixup_str(fdt, off, "cpu", "status", (core == 0) ? "okay" : "disabled"); + fdt_fixup_val64(fdt, off, "cpu", "cpu-release-addr", core_spin_table); + fdt_fixup_str(fdt, off, "cpu", "enable-method", "spin-table"); + #endif + fdt_fixup_val(fdt, off, "cpu", "timebase-frequency", TIMEBASE_HZ); + fdt_fixup_val(fdt, off, "cpu", "clock-frequency", hal_get_core_clk()); + fdt_fixup_val(fdt, off, "cpu", "bus-frequency", hal_get_plat_clk()); + + off = fdt_find_devtype(fdt, off, "cpu"); + } + + /* fixup the soc clock */ + off = fdt_find_devtype(fdt, -1, "soc"); + if (off >= 0) { + fdt_fixup_val(fdt, off, "soc", "bus-frequency", hal_get_plat_clk()); + } + + /* fixup the serial clocks */ + off = fdt_find_devtype(fdt, -1, "serial"); + while (off >= 0) { + fdt_fixup_val(fdt, off, "serial", "clock-frequency", hal_get_bus_clk()); + off = fdt_find_devtype(fdt, off, "serial"); + } + +#endif /* !BUILD_LOADER_STAGE1 */ + (void)dts_addr; + return 0; +} +#endif /* MMU */ diff --git a/hal/nxp_t2080.h b/hal/nxp_t2080.h new file mode 100644 index 0000000000..67e13837fd --- /dev/null +++ b/hal/nxp_t2080.h @@ -0,0 +1,424 @@ +/* nxp_t2080.h + * + * Copyright (C) 2025 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + * + * Board support: + * Default: T2080 RDB (66.66 MHz oscillator, DDR3L SODIMM) + * BOARD_CW_VPX3152: CW VPX3-152 (66.667 MHz oscillator, DDR3L) + * BOARD_NAII_68PPC2: NAII 68PPC2 (100 MHz oscillator, 8GB DDR3) + * + * NXP T2080E Rev 1.1, e6500 core 2.0, PVR 8040_0120 and SVR 8538_0011 + */ + +#ifndef NXP_T2080_H +#define NXP_T2080_H + +#include "nxp_ppc.h" + +/* Uncomment to enable verbose DDR debugging output */ +/* #define DEBUG_DDR */ + +/* T2080 System Clock — oscillator input frequency. + * hal_get_plat_clk() / hal_get_bus_clk() compute the actual frequencies + * from the CLOCKING registers (PLL ratio × SYS_CLK) when + * ENABLE_BUS_CLK_CALC is defined. */ +#ifdef BOARD_NAII_68PPC2 +#define SYS_CLK (100000000) /* 100 MHz oscillator (NAII 68PPC2) */ +#else +#define SYS_CLK (66666667) /* 66.66 MHz oscillator (T2080 RDB / CW VPX3-152) */ +#endif + +/* ---- UART (PC16552D Dual UART) ---- */ +#define BAUD_RATE 115200 +#define UART_SEL 0 /* select UART 0 or 1 */ + +#define UART_BASE(n) (CCSRBAR + 0x11C500 + (n * 0x1000)) + +#define UART_RBR(n) ((volatile uint8_t*)(UART_BASE(n) + 0)) /* receiver buffer register */ +#define UART_THR(n) ((volatile uint8_t*)(UART_BASE(n) + 0)) /* transmitter holding register */ +#define UART_IER(n) ((volatile uint8_t*)(UART_BASE(n) + 1)) /* interrupt enable register */ +#define UART_IIR(n) ((volatile uint8_t*)(UART_BASE(n) + 2)) /* interrupt ID register */ +#define UART_FCR(n) ((volatile uint8_t*)(UART_BASE(n) + 2)) /* FIFO control register */ +#define UART_LCR(n) ((volatile uint8_t*)(UART_BASE(n) + 3)) /* line control register */ +#define UART_MCR(n) ((volatile uint8_t*)(UART_BASE(n) + 4)) /* modem control register */ +#define UART_LSR(n) ((volatile uint8_t*)(UART_BASE(n) + 5)) /* line status register */ + +/* enabled when UART_LCR_DLAB set */ +#define UART_DLB(n) ((volatile uint8_t*)(UART_BASE(n) + 0)) /* divisor least significant byte register */ +#define UART_DMB(n) ((volatile uint8_t*)(UART_BASE(n) + 1)) /* divisor most significant byte register */ + +#define UART_FCR_TFR (0x04) /* Transmitter FIFO reset */ +#define UART_FCR_RFR (0x02) /* Receiver FIFO reset */ +#define UART_FCR_FEN (0x01) /* FIFO enable */ +#define UART_LCR_DLAB (0x80) /* Divisor latch access bit */ +#define UART_LCR_WLS (0x03) /* Word length select: 8-bits */ +#define UART_LSR_TEMT (0x40) /* Transmitter empty */ +#define UART_LSR_THRE (0x20) /* Transmitter holding register empty */ + + +/* ---- IFC (Integrated Flash Controller) - T2080RM 13.3 ---- */ +#define IFC_BASE (CCSRBAR + 0x00124000) +#define IFC_MAX_BANKS 8 + +#define IFC_CSPR_EXT(n) ((volatile uint32_t*)(IFC_BASE + 0x000C + (n * 0xC))) /* Extended Base Address */ +#define IFC_CSPR(n) ((volatile uint32_t*)(IFC_BASE + 0x0010 + (n * 0xC))) /* Chip-select Property */ +#define IFC_AMASK(n) ((volatile uint32_t*)(IFC_BASE + 0x00A0 + (n * 0xC))) +#define IFC_CSOR(n) ((volatile uint32_t*)(IFC_BASE + 0x0130 + (n * 0xC))) +#define IFC_CSOR_EXT(n) ((volatile uint32_t*)(IFC_BASE + 0x0134 + (n * 0xC))) +#define IFC_FTIM0(n) ((volatile uint32_t*)(IFC_BASE + 0x01C0 + (n * 0x30))) +#define IFC_FTIM1(n) ((volatile uint32_t*)(IFC_BASE + 0x01C4 + (n * 0x30))) +#define IFC_FTIM2(n) ((volatile uint32_t*)(IFC_BASE + 0x01C8 + (n * 0x30))) +#define IFC_FTIM3(n) ((volatile uint32_t*)(IFC_BASE + 0x01CC + (n * 0x30))) + +#define IFC_CSPR_PHYS_ADDR(x) (((uint32_t)x) & 0xFFFF0000) /* Physical base address */ +#define IFC_CSPR_PORT_SIZE_8 0x00000080 /* Port Size 8 */ +#define IFC_CSPR_PORT_SIZE_16 0x00000100 /* Port Size 16 */ +#define IFC_CSPR_WP 0x00000040 /* Write Protect */ +#define IFC_CSPR_MSEL_NOR 0x00000000 /* Mode Select - NOR */ +#define IFC_CSPR_MSEL_NAND 0x00000002 /* Mode Select - NAND */ +#define IFC_CSPR_MSEL_GPCM 0x00000004 /* Mode Select - GPCM (General-purpose chip-select machine) */ +#define IFC_CSPR_V 0x00000001 /* Bank Valid */ + +/* NOR Timings (IFC clocks) */ +#define IFC_FTIM0_NOR_TACSE(n) (((n) & 0x0F) << 28) /* After address hold cycle */ +#define IFC_FTIM0_NOR_TEADC(n) (((n) & 0x3F) << 16) /* External latch address delay cycles */ +#define IFC_FTIM0_NOR_TAVDS(n) (((n) & 0x3F) << 8) /* Delay between CS assertion */ +#define IFC_FTIM0_NOR_TEAHC(n) (((n) & 0x3F) << 0) /* External latch address hold cycles */ +#define IFC_FTIM1_NOR_TACO(n) (((n) & 0xFF) << 24) /* CS assertion to output enable */ +#define IFC_FTIM1_NOR_TRAD(n) (((n) & 0x3F) << 8) /* read access delay */ +#define IFC_FTIM1_NOR_TSEQ(n) (((n) & 0x3F) << 0) /* sequential read access delay */ +#define IFC_FTIM2_NOR_TCS(n) (((n) & 0x0F) << 24) /* Chip-select assertion setup time */ +#define IFC_FTIM2_NOR_TCH(n) (((n) & 0x0F) << 18) /* Chip-select hold time */ +#define IFC_FTIM2_NOR_TWPH(n) (((n) & 0x3F) << 10) /* Chip-select hold time */ +#define IFC_FTIM2_NOR_TWP(n) (((n) & 0xFF) << 0) /* Write enable pulse width */ + +/* GPCM Timings (IFC clocks) */ +#define IFC_FTIM0_GPCM_TACSE(n) (((n) & 0x0F) << 28) /* After address hold cycle */ +#define IFC_FTIM0_GPCM_TEADC(n) (((n) & 0x3F) << 16) /* External latch address delay cycles */ +#define IFC_FTIM0_GPCM_TEAHC(n) (((n) & 0x3F) << 0) /* External latch address hold cycles */ +#define IFC_FTIM1_GPCM_TACO(n) (((n) & 0xFF) << 24) /* CS assertion to output enable */ +#define IFC_FTIM1_GPCM_TRAD(n) (((n) & 0x3F) << 8) /* read access delay */ +#define IFC_FTIM2_GPCM_TCS(n) (((n) & 0x0F) << 24) /* Chip-select assertion setup time */ +#define IFC_FTIM2_GPCM_TCH(n) (((n) & 0x0F) << 18) /* Chip-select hold time */ +#define IFC_FTIM2_GPCM_TWP(n) (((n) & 0xFF) << 0) /* Write enable pulse width */ + +/* IFC AMASK - RM Table 13-3 - Count of MSB minus 1 */ +enum ifc_amask_sizes { + IFC_AMASK_64KB = 0xFFFF0000, + IFC_AMASK_128KB = 0xFFFE0000, + IFC_AMASK_256KB = 0xFFFC0000, + IFC_AMASK_512KB = 0xFFF80000, + IFC_AMASK_1MB = 0xFFF00000, + IFC_AMASK_2MB = 0xFFE00000, + IFC_AMASK_4MB = 0xFFC00000, + IFC_AMASK_8MB = 0xFF800000, + IFC_AMASK_16MB = 0xFF000000, + IFC_AMASK_32MB = 0xFE000000, + IFC_AMASK_64MB = 0xFC000000, + IFC_AMASK_128MB = 0xF8000000, + IFC_AMASK_256MB = 0xF0000000, + IFC_AMASK_512MB = 0xE0000000, + IFC_AMASK_1GB = 0xC0000000, + IFC_AMASK_2GB = 0x80000000, + IFC_AMASK_4GB = 0x00000000, +}; + + +/* ---- NOR Flash ---- */ +#define FLASH_BANK_SIZE (128*1024*1024) +#define FLASH_PAGE_SIZE (512) /* program buffer (256 bytes per chip x 2 chips) */ +#define FLASH_SECTOR_SIZE (128*1024) +#define FLASH_SECTORS (FLASH_BANK_SIZE / FLASH_SECTOR_SIZE) +#define FLASH_CFI_WIDTH 16 /* 8 or 16 */ + + +/* ---- CPLD ---- */ +/* CPLD is not populated on the NAII 68PPC2 board; define ENABLE_CPLD to enable */ +#define CPLD_BASE 0xFFDF0000 +#define CPLD_BASE_PHYS_HIGH 0xFULL + +#define CPLD_SPARE 0x00 +#define CPLD_SATA_MUX_SEL 0x02 +#define CPLD_BANK_SEL 0x04 +#define CPLD_FW_REV 0x06 +#define CPLD_TTL_RW 0x08 +#define CPLD_TTL_LPBK 0x0A +#define CPLD_TTL_DATA 0x0C +#define CPLD_PROC_STATUS 0x0E /* write 1 to enable proc reset function, reset default value is 0 */ +#define CPLD_FPGA_RDY 0x10 /* read only when reg read 0x0DB1 then fpga is ready */ +#define CPLD_PCIE_SW_RESET 0x12 /* write 1 to reset the PCIe switch */ +#define CPLD_WR_TTL_INT_EN 0x14 +#define CPLD_WR_TTL_INT_DIR 0x16 +#define CPLD_INT_STAT 0x18 +#define CPLD_WR_TEMP_ALM_OVRD 0x1A /* write 0 to enable temp shutdown. reset default value is 1 */ +#define CPLD_PWR_DWN_CMD 0x1C +#define CPLD_TEMP_ALM_INT_STAT 0x1E +#define CPLD_WR_TEMP_ALM_INT_EN 0x20 + +#define CPLD_FLASH_BANK_0 0x00 +#define CPLD_FLASH_BANK_1 0x01 + +#define CPLD_DATA(n) ((volatile uint8_t*)(CPLD_BASE + n)) + + +/* ---- SATA ---- */ +#define SATA_ENBL ((volatile uint32_t*)0xB1003F4C) /* also saw 0xB4003F4C */ + + +/* ---- Boot Page Translation - T2080RM 4.4.9 ---- */ +#define LCC_BSTRH ((volatile uint32_t*)(CCSRBAR + 0x20)) /* Boot space translation register high */ +#define LCC_BSTRL ((volatile uint32_t*)(CCSRBAR + 0x24)) /* Boot space translation register low */ +#define LCC_BSTAR ((volatile uint32_t*)(CCSRBAR + 0x28)) /* Boot space translation attribute register */ +#define LCC_BSTAR_EN 0x80000000 +#define LCC_BSTAR_LAWTRGT(n) ((n) << 20) +#define LCC_BSTAR_LAWSZ(n) ((n) & 0x3F) + +/* ---- DCFG (Device Configuration) - T2080RM 6.3 ---- */ +#define DCFG_BASE (CCSRBAR + 0xE0000) +#define DCFG_DCSR ((volatile uint32_t*)(DCFG_BASE + 0x704)) /* Debug Configuration and Status */ +#define DCFG_DEVDISR1 ((volatile uint32_t*)(DCFG_BASE + 0x070)) /* Device Disable Control 1 */ +#define DCFG_DEVDISR2 ((volatile uint32_t*)(DCFG_BASE + 0x074)) /* Device Disable Control 2 */ +#define DCFG_DEVDISR3 ((volatile uint32_t*)(DCFG_BASE + 0x078)) /* Device Disable Control 3 */ +#define DCFG_DEVDISR4 ((volatile uint32_t*)(DCFG_BASE + 0x07C)) /* Device Disable Control 4 */ +#define DCFG_DEVDISR5 ((volatile uint32_t*)(DCFG_BASE + 0x080)) /* Device Disable Control 5 */ +#define DCFG_BRR ((volatile uint32_t*)(DCFG_BASE + 0xE4)) /* Boot Release Register */ + +/* ---- RCPM (Run Control and Power Management) - T2080RM 6.4 ---- */ +#define RCPM_BASE (CCSRBAR + 0xE2000) +#define RCPM_PCTBENR ((volatile uint32_t*)(RCPM_BASE + 0x1A0)) /* Physical Core Timebase Enable */ + +/* ---- Clocking - T2080RM 5.3 ---- */ +#define CLOCKING_BASE (CCSRBAR + 0xE1000) +#define CLOCKING_CLKCCSR(n) ((volatile uint32_t*)(CLOCKING_BASE + 0x000UL + ((n) * 0x20))) +#define CLOCKING_PLLCNGSR(n) ((volatile uint32_t*)(CLOCKING_BASE + 0x800UL + ((n) * 0x20))) /* PLL cluster n general status */ +#define CLOCKING_PLLPGSR ((volatile uint32_t*)(CLOCKING_BASE + 0xC00UL)) /* Platform PLL general status */ + +/* ---- MPIC - T2080RM 24.3 ---- */ +#define PIC_BASE (CCSRBAR + 0x40000) +#define PIC_WHOAMI ((volatile uint32_t*)(PIC_BASE + 0x0090UL)) + + +/* ---- DDR (T2080RM 12.4) ---- */ +#ifdef BOARD_NAII_68PPC2 +/* NAII 68PPC2: 8GB discrete DDR3 IM8G08D3EBDG-15E */ +/* 1333.333 MT/s data rate 8 GiB (DDR3, 64-bit, CL=9, ECC on) */ +#define DDR_N_RANKS 2 +#define DDR_RANK_DENS 0x100000000 +#define DDR_SDRAM_WIDTH 64 +#define DDR_EC_SDRAM_W 8 +#define DDR_N_ROW_ADDR 16 +#define DDR_N_COL_ADDR 10 +#define DDR_N_BANKS 8 +#define DDR_EDC_CONFIG 2 +#define DDR_BURSTL_MASK 0x0c +#define DDR_TCKMIN_X_PS 1500 +#define DDR_TCMMAX_PS 3000 +#define DDR_CASLAT_X 0x000007E0 +#define DDR_TAA_PS 13500 +#define DDR_TRCD_PS 13500 +#define DDR_TRP_PS 13500 +#define DDR_TRAS_PS 36000 +#define DDR_TRC_PS 49500 +#define DDR_TFAW_PS 30000 +#define DDR_TWR_PS 15000 +#define DDR_TRFC_PS 260000 +#define DDR_TRRD_PS 6000 +#define DDR_TWTR_PS 7500 +#define DDR_TRTP_PS 7500 +#define DDR_REF_RATE_PS 7800000 +#else +/* T2080 RDB / CW VPX3-152: DDR3L SODIMM */ +/* TODO: Fill SPD parameters from DDR3L SODIMM datasheet */ +#define DDR_N_RANKS 2 /* TODO: confirm from CS_CONFIG dump */ +#define DDR_RANK_DENS 0x100000000 /* TODO: confirm */ +#define DDR_SDRAM_WIDTH 64 +#define DDR_EC_SDRAM_W 8 +#define DDR_N_ROW_ADDR 16 /* TODO: confirm */ +#define DDR_N_COL_ADDR 10 /* TODO: confirm */ +#define DDR_N_BANKS 8 +#define DDR_EDC_CONFIG 2 +#define DDR_BURSTL_MASK 0x0c +#define DDR_TCKMIN_X_PS 1500 /* TODO: from DDR3L datasheet */ +#define DDR_TCMMAX_PS 3000 /* TODO: from DDR3L datasheet */ +#define DDR_CASLAT_X 0x000007E0 /* TODO */ +#define DDR_TAA_PS 13500 /* TODO */ +#define DDR_TRCD_PS 13500 /* TODO */ +#define DDR_TRP_PS 13500 /* TODO */ +#define DDR_TRAS_PS 36000 /* TODO */ +#define DDR_TRC_PS 49500 /* TODO */ +#define DDR_TFAW_PS 30000 /* TODO */ +#define DDR_TWR_PS 15000 /* TODO */ +#define DDR_TRFC_PS 260000 /* TODO */ +#define DDR_TRRD_PS 6000 /* TODO */ +#define DDR_TWTR_PS 7500 /* TODO */ +#define DDR_TRTP_PS 7500 /* TODO */ +#define DDR_REF_RATE_PS 7800000 /* TODO */ +#endif + +#ifdef BOARD_NAII_68PPC2 +/* DDR register values from working U-Boot on NAII 68PPC2 board */ +#define DDR_CS0_BNDS_VAL 0x000000FF +#define DDR_CS1_BNDS_VAL 0x010001FF +#define DDR_CS2_BNDS_VAL 0x0300033F +#define DDR_CS3_BNDS_VAL 0x0340037F +#define DDR_CS0_CONFIG_VAL 0x80044402 +#define DDR_CS1_CONFIG_VAL 0x80044402 +#define DDR_CS2_CONFIG_VAL 0x00000202 +#define DDR_CS3_CONFIG_VAL 0x00040202 +#define DDR_CS_CONFIG_2_VAL 0x00000000 + +#define DDR_TIMING_CFG_0_VAL 0xFF530004 +#define DDR_TIMING_CFG_1_VAL 0x98906345 +#define DDR_TIMING_CFG_2_VAL 0x0040A114 +#define DDR_TIMING_CFG_3_VAL 0x010A1100 +#define DDR_TIMING_CFG_4_VAL 0x00000001 +#define DDR_TIMING_CFG_5_VAL 0x04402400 + +#define DDR_SDRAM_MODE_VAL 0x00441C70 +#define DDR_SDRAM_MODE_2_VAL 0x00980000 +#define DDR_SDRAM_MODE_3_8_VAL 0x00000000 +#define DDR_SDRAM_MD_CNTL_VAL 0x00000000 + +#define DDR_SDRAM_CFG_VAL 0xE7040000 +#define DDR_SDRAM_CFG_2_VAL 0x00401000 + +#define DDR_SDRAM_INTERVAL_VAL 0x0C300100 +#define DDR_DATA_INIT_VAL 0xDEADBEEF +#define DDR_SDRAM_CLK_CNTL_VAL 0x02400000 +#define DDR_ZQ_CNTL_VAL 0x89080600 + +/* Write leveling - CRITICAL: board-specific values from U-Boot */ +#define DDR_WRLVL_CNTL_VAL 0x8675F604 +#define DDR_WRLVL_CNTL_2_VAL 0x05060607 +#define DDR_WRLVL_CNTL_3_VAL 0x080A0A0B + +#define DDR_SDRAM_RCW_1_VAL 0x00000000 +#define DDR_SDRAM_RCW_2_VAL 0x00000000 + +#define DDR_DDRCDR_1_VAL 0x80040000 +#define DDR_DDRCDR_2_VAL 0x00000001 + +#define DDR_ERR_INT_EN_VAL 0x0000001D +#define DDR_ERR_SBE_VAL 0x00010000 +#else +/* T2080 RDB / CW VPX3-152: DDR register values */ +/* TODO: Fill ALL values from Phase 1 U-Boot register dump: + * md.l 0xfe008000 4; md.l 0xfe008010 4 (CS BNDS) + * md.l 0xfe008080 4; md.l 0xfe0080c0 4 (CS CONFIG) + * md.l 0xfe008100 4; md.l 0xfe008160 3 (TIMING) + * md.l 0xfe008110 8; md.l 0xfe008130 1 (CONFIG/MODE/CLK) + * md.l 0xfe008170 3; md.l 0xfe008190 2 (WRLVL) + * md.l 0xfe008200 6; md.l 0xfe008b28 2 (MODE3-8/CDR) */ +#define DDR_CS0_BNDS_VAL 0x00000000 /* TODO: from dump */ +#define DDR_CS1_BNDS_VAL 0x00000000 /* TODO: from dump */ +#define DDR_CS2_BNDS_VAL 0x00000000 /* TODO: from dump */ +#define DDR_CS3_BNDS_VAL 0x00000000 /* TODO: from dump */ +#define DDR_CS0_CONFIG_VAL 0x00000000 /* TODO: from dump */ +#define DDR_CS1_CONFIG_VAL 0x00000000 /* TODO: from dump */ +#define DDR_CS2_CONFIG_VAL 0x00000000 /* TODO: from dump */ +#define DDR_CS3_CONFIG_VAL 0x00000000 /* TODO: from dump */ +#define DDR_CS_CONFIG_2_VAL 0x00000000 /* TODO: from dump */ + +#define DDR_TIMING_CFG_0_VAL 0x00000000 /* TODO: from dump */ +#define DDR_TIMING_CFG_1_VAL 0x00000000 /* TODO: from dump */ +#define DDR_TIMING_CFG_2_VAL 0x00000000 /* TODO: from dump */ +#define DDR_TIMING_CFG_3_VAL 0x00000000 /* TODO: from dump */ +#define DDR_TIMING_CFG_4_VAL 0x00000000 /* TODO: from dump */ +#define DDR_TIMING_CFG_5_VAL 0x00000000 /* TODO: from dump */ + +#define DDR_SDRAM_MODE_VAL 0x00000000 /* TODO: from dump */ +#define DDR_SDRAM_MODE_2_VAL 0x00000000 /* TODO: from dump */ +#define DDR_SDRAM_MODE_3_8_VAL 0x00000000 /* TODO: from dump */ +#define DDR_SDRAM_MD_CNTL_VAL 0x00000000 /* TODO: from dump */ + +#define DDR_SDRAM_CFG_VAL 0x00000000 /* TODO: from dump */ +#define DDR_SDRAM_CFG_2_VAL 0x00000000 /* TODO: from dump */ + +#define DDR_SDRAM_INTERVAL_VAL 0x00000000 /* TODO: from dump */ +#define DDR_DATA_INIT_VAL 0xDEADBEEF +#define DDR_SDRAM_CLK_CNTL_VAL 0x00000000 /* TODO: from dump */ +#define DDR_ZQ_CNTL_VAL 0x00000000 /* TODO: from dump */ + +/* Write leveling - CRITICAL: board-specific values from U-Boot. + * These depend on PCB trace lengths and MUST come from the register dump. */ +#define DDR_WRLVL_CNTL_VAL 0x00000000 /* TODO: from dump */ +#define DDR_WRLVL_CNTL_2_VAL 0x00000000 /* TODO: from dump */ +#define DDR_WRLVL_CNTL_3_VAL 0x00000000 /* TODO: from dump */ + +#define DDR_SDRAM_RCW_1_VAL 0x00000000 /* TODO: from dump */ +#define DDR_SDRAM_RCW_2_VAL 0x00000000 /* TODO: from dump */ + +#define DDR_DDRCDR_1_VAL 0x00000000 /* TODO: from dump */ +#define DDR_DDRCDR_2_VAL 0x00000000 /* TODO: from dump */ + +#define DDR_ERR_INT_EN_VAL 0x0000001D +#define DDR_ERR_SBE_VAL 0x00010000 +#endif + + +/* 12.4 DDR Memory Map */ +#define DDR_BASE (CCSRBAR + 0x8000) + +#define DDR_CS_BNDS(n) ((volatile uint32_t*)(DDR_BASE + 0x000 + (n * 8))) /* Chip select n memory bounds */ +#define DDR_CS_CONFIG(n) ((volatile uint32_t*)(DDR_BASE + 0x080 + (n * 4))) /* Chip select n configuration */ +#define DDR_CS_CONFIG_2(n) ((volatile uint32_t*)(DDR_BASE + 0x0C0 + (n * 4))) /* Chip select n configuration 2 */ +#define DDR_TIMING_CFG_3 ((volatile uint32_t*)(DDR_BASE + 0x100)) /* DDR SDRAM timing configuration 3 */ +#define DDR_TIMING_CFG_0 ((volatile uint32_t*)(DDR_BASE + 0x104)) /* DDR SDRAM timing configuration 0 */ +#define DDR_TIMING_CFG_1 ((volatile uint32_t*)(DDR_BASE + 0x108)) /* DDR SDRAM timing configuration 1 */ +#define DDR_TIMING_CFG_2 ((volatile uint32_t*)(DDR_BASE + 0x10C)) /* DDR SDRAM timing configuration 2 */ +#define DDR_SDRAM_CFG ((volatile uint32_t*)(DDR_BASE + 0x110)) /* DDR SDRAM control configuration */ +#define DDR_SDRAM_CFG_2 ((volatile uint32_t*)(DDR_BASE + 0x114)) /* DDR SDRAM control configuration 2 */ +#define DDR_SDRAM_MODE ((volatile uint32_t*)(DDR_BASE + 0x118)) /* DDR SDRAM mode configuration */ +#define DDR_SDRAM_MODE_2 ((volatile uint32_t*)(DDR_BASE + 0x11C)) /* DDR SDRAM mode configuration 2 */ +#define DDR_SDRAM_MD_CNTL ((volatile uint32_t*)(DDR_BASE + 0x120)) /* DDR SDRAM mode control */ +#define DDR_SDRAM_INTERVAL ((volatile uint32_t*)(DDR_BASE + 0x124)) /* DDR SDRAM interval configuration */ +#define DDR_DATA_INIT ((volatile uint32_t*)(DDR_BASE + 0x128)) /* DDR training initialization value */ +#define DDR_SDRAM_CLK_CNTL ((volatile uint32_t*)(DDR_BASE + 0x130)) /* DDR SDRAM clock control */ +#define DDR_INIT_ADDR ((volatile uint32_t*)(DDR_BASE + 0x148)) /* DDR training initialization address */ +#define DDR_INIT_EXT_ADDR ((volatile uint32_t*)(DDR_BASE + 0x14C)) /* DDR training initialization extended address */ +#define DDR_TIMING_CFG_4 ((volatile uint32_t*)(DDR_BASE + 0x160)) /* DDR SDRAM timing configuration 4 */ +#define DDR_TIMING_CFG_5 ((volatile uint32_t*)(DDR_BASE + 0x164)) /* DDR SDRAM timing configuration 5 */ +#define DDR_TIMING_CFG_6 ((volatile uint32_t*)(DDR_BASE + 0x168)) /* DDR SDRAM timing configuration 6 */ +#define DDR_ZQ_CNTL ((volatile uint32_t*)(DDR_BASE + 0x170)) /* DDR ZQ calibration control */ +#define DDR_WRLVL_CNTL ((volatile uint32_t*)(DDR_BASE + 0x174)) /* DDR write leveling control */ +#define DDR_SR_CNTR ((volatile uint32_t*)(DDR_BASE + 0x17C)) /* DDR Self Refresh Counter */ +#define DDR_SDRAM_RCW_1 ((volatile uint32_t*)(DDR_BASE + 0x180)) /* DDR Register Control Word 1 */ +#define DDR_SDRAM_RCW_2 ((volatile uint32_t*)(DDR_BASE + 0x184)) /* DDR Register Control Word 2 */ +#define DDR_WRLVL_CNTL_2 ((volatile uint32_t*)(DDR_BASE + 0x190)) /* DDR write leveling control 2 */ +#define DDR_WRLVL_CNTL_3 ((volatile uint32_t*)(DDR_BASE + 0x194)) /* DDR write leveling control 3 */ +#define DDR_SDRAM_MODE_3 ((volatile uint32_t*)(DDR_BASE + 0x200)) /* DDR SDRAM mode configuration 3 */ +#define DDR_SDRAM_MODE_4 ((volatile uint32_t*)(DDR_BASE + 0x204)) /* DDR SDRAM mode configuration 4 */ +#define DDR_SDRAM_MODE_5 ((volatile uint32_t*)(DDR_BASE + 0x208)) /* DDR SDRAM mode configuration 5 */ +#define DDR_SDRAM_MODE_6 ((volatile uint32_t*)(DDR_BASE + 0x20C)) /* DDR SDRAM mode configuration 6 */ +#define DDR_SDRAM_MODE_7 ((volatile uint32_t*)(DDR_BASE + 0x210)) /* DDR SDRAM mode configuration 7 */ +#define DDR_SDRAM_MODE_8 ((volatile uint32_t*)(DDR_BASE + 0x214)) /* DDR SDRAM mode configuration 8 */ +#define DDR_DDRCDR_1 ((volatile uint32_t*)(DDR_BASE + 0xB28)) /* DDR Control Driver Register 1 */ +#define DDR_DDRCDR_2 ((volatile uint32_t*)(DDR_BASE + 0xB2C)) /* DDR Control Driver Register 2 */ +#define DDR_DDRDSR_1 ((volatile uint32_t*)(DDR_BASE + 0xB20)) /* DDR Debug Status Register 1 */ +#define DDR_DDRDSR_2 ((volatile uint32_t*)(DDR_BASE + 0xB24)) /* DDR Debug Status Register 2 */ +#define DDR_ERR_DETECT ((volatile uint32_t*)(DDR_BASE + 0xE40)) /* Memory error detect */ +#define DDR_ERR_DISABLE ((volatile uint32_t*)(DDR_BASE + 0xE44)) /* Memory error disable */ +#define DDR_ERR_INT_EN ((volatile uint32_t*)(DDR_BASE + 0xE48)) /* Memory error interrupt enable */ +#define DDR_ERR_SBE ((volatile uint32_t*)(DDR_BASE + 0xE58)) /* Single-Bit ECC memory error management */ + +#define DDR_SDRAM_CFG_MEM_EN 0x80000000 /* SDRAM interface logic is enabled */ +#define DDR_SDRAM_CFG_BI 0x00000001 /* Bypass initialization */ +#define DDR_SDRAM_CFG_2_D_INIT 0x00000010 /* data initialization in progress */ + +#endif /* NXP_T2080_H */ diff --git a/hal/nxp_t2080.ld b/hal/nxp_t2080.ld index 43e692cab9..a688b99840 100644 --- a/hal/nxp_t2080.ld +++ b/hal/nxp_t2080.ld @@ -13,8 +13,9 @@ MEMORY { FLASH (rx) : ORIGIN = @WOLFBOOT_ORIGIN@, LENGTH = @BOOTLOADER_PARTITION_SIZE@ - /* L2 as SRAM - 256KB */ - RAM (rwx) : ORIGIN = 0xF8F80000, LENGTH = 0x40000 + /* CPC as SRAM - 1MB (T2080 supports up to 2MB, using 1MB for P384 stack) + * Layout: .ramcode at bottom, stack grows down from top */ + RAM (rwx) : ORIGIN = 0xF8F00000, LENGTH = 0x100000 /* DDR - 2GB */ DRAM (rwx) : ORIGIN = 0x00000000, LENGTH = 0x7FFFFFFF @@ -57,14 +58,31 @@ SECTIONS .gnu.hash : { *(.gnu.hash) } .rela.dyn : { *(.rela.dyn) } - _stored_data = .; + /* Store flash location for .ramcode copy */ + _stored_ramcode = .; + + /* RAMFUNCTION code in CPC SRAM - copied before DDR is used + * This ensures memcpy/memmove are available early */ + .ramcode : AT (_stored_ramcode) + { + _start_ramcode = .; + KEEP(*(.ramcode)) + . = ALIGN(4); + _end_ramcode = .; + } > RAM + + /* Calculate where .data starts in flash (after .ramcode), ensuring + * at least 16-byte alignment for the .data load address */ + _stored_data = (_stored_ramcode + (_end_ramcode - _start_ramcode) + 15) & ~15; .data : AT (_stored_data) { _start_data = .; KEEP(*(.data*)) - . = ALIGN(4); - KEEP(*(.ramcode)) + *(.got*) + *(.got2*) + *(.plt*) + *(.dynamic) . = ALIGN(4); _end_data = .; } > DRAM @@ -84,5 +102,7 @@ SECTIONS } -PROVIDE(_start_heap = ORIGIN(RAM)); +/* Heap starts after .ramcode in CPC SRAM */ +PROVIDE(_start_heap = _end_ramcode); +/* Stack at top of CPC SRAM, grows down */ PROVIDE(_end_stack = ORIGIN(RAM) + (LENGTH(RAM)) ); diff --git a/include/user_settings.h b/include/user_settings.h index 9168fedadb..393112e827 100644 --- a/include/user_settings.h +++ b/include/user_settings.h @@ -477,11 +477,11 @@ extern int tolower(int c); #if !defined(WOLFCRYPT_SECURE_MODE) && !defined(WOLFBOOT_TPM_PARMENC) && \ !defined(WOLFCRYPT_TEST) && !defined(WOLFCRYPT_BENCHMARK) -#if !(defined(WOLFBOOT_ENABLE_WOLFHSM_CLIENT) && \ - defined(WOLFBOOT_SIGN_ML_DSA)) && \ - !defined(WOLFBOOT_ENABLE_WOLFHSM_SERVER) -#define WC_NO_RNG -#endif + #if !(defined(WOLFBOOT_ENABLE_WOLFHSM_CLIENT) && \ + defined(WOLFBOOT_SIGN_ML_DSA)) && \ + !defined(WOLFBOOT_ENABLE_WOLFHSM_SERVER) + #define WC_NO_RNG + #endif #define WC_NO_HASHDRBG #define NO_AES_CBC #else @@ -493,6 +493,9 @@ extern int tolower(int c); #define CUSTOM_RAND_GENERATE_SEED my_rng_seed_gen #define CUSTOM_RAND_GENERATE_BLOCK my_rng_seed_gen extern int my_rng_seed_gen(unsigned char* output, unsigned int sz); + + #define HAVE_AESGCM + #define GCM_TABLE #else #define HAVE_HASHDRBG #define WOLFSSL_AES_CFB @@ -590,6 +593,9 @@ extern int tolower(int c); #if defined(WOLFCRYPT_TEST) || defined(WOLFCRYPT_BENCHMARK) #define NO_WRITE_TEMP_FILES + /* Use printf for wolfSSL logging (redirected to UART via syscalls.c) */ + #define WOLFSSL_LOG_PRINTF + /* Use static memory pool to avoid system malloc dependency. * benchmark.c provides gBenchMemory static buffer. * Default is 50KB with BENCH_EMBEDDED, override for smaller targets */ diff --git a/include/wolfboot/wolfboot.h b/include/wolfboot/wolfboot.h index eda5055677..f7b5fdb0b2 100644 --- a/include/wolfboot/wolfboot.h +++ b/include/wolfboot/wolfboot.h @@ -47,6 +47,8 @@ extern "C" { # if defined(__WOLFBOOT) && defined(RAM_CODE) # if defined(ARCH_ARM) # define RAMFUNCTION __attribute__((used,section(".ramcode"),long_call)) +# elif defined(ARCH_PPC) +# define RAMFUNCTION __attribute__((used,section(".ramcode"),longcall)) # else # define RAMFUNCTION __attribute__((used,section(".ramcode"))) # endif diff --git a/options.mk b/options.mk index ff57622796..35f1ea3e77 100644 --- a/options.mk +++ b/options.mk @@ -654,6 +654,10 @@ ifeq ($(DEBUG_UART),1) endif endif endif +# Flash erase/write/read test at WOLFBOOT_PARTITION_UPDATE_ADDRESS. +ifeq ($(TEST_FLASH),1) + CFLAGS+=-DTEST_FLASH +endif ifeq ($(NO_QNX),1) CFLAGS+=-D"NO_QNX" endif diff --git a/src/boot_ppc.c b/src/boot_ppc.c index fe86cad08a..fd5c50c08d 100644 --- a/src/boot_ppc.c +++ b/src/boot_ppc.c @@ -31,12 +31,18 @@ extern unsigned int __bss_end__; extern unsigned int _stored_data; extern unsigned int _start_data; extern unsigned int _end_data; +#ifdef RAM_CODE +/* .ramcode section (RAMFUNCTION) - may be in separate memory region */ +extern unsigned int _stored_ramcode; +extern unsigned int _start_ramcode; +extern unsigned int _end_ramcode; +#endif extern void main(void); extern void hal_early_init(void); -void write_tlb(uint32_t mas0, uint32_t mas1, uint32_t mas2, uint32_t mas3, - uint32_t mas7) +void RAMFUNCTION write_tlb(uint32_t mas0, uint32_t mas1, uint32_t mas2, + uint32_t mas3, uint32_t mas7) { mtspr(MAS0, mas0); mtspr(MAS1, mas1); @@ -46,7 +52,7 @@ void write_tlb(uint32_t mas0, uint32_t mas1, uint32_t mas2, uint32_t mas3, __asm__ __volatile__("isync;msync;tlbwe;isync"); } -void set_tlb(uint8_t tlb, uint8_t esel, uint32_t epn, uint32_t rpn, +void RAMFUNCTION set_tlb(uint8_t tlb, uint8_t esel, uint32_t epn, uint32_t rpn, uint32_t urpn, uint8_t perms, uint8_t wimge, uint8_t ts, uint8_t tsize, uint8_t iprot) { @@ -113,32 +119,89 @@ int WEAKFUNCTION hal_dts_fixup(void* dts_addr) } #endif +/* forward declaration */ +#ifndef BUILD_LOADER_STAGE1 +void flush_cache(uint32_t start_addr, uint32_t size); +#endif + void boot_entry_C(void) { - register unsigned int *dst, *src, *end; + volatile unsigned int *dst; + volatile const unsigned int *src; + volatile unsigned int *end; + +#ifdef RAM_CODE + /* Copy .ramcode section FIRST - to CPC SRAM which is already available. + * This makes RAMFUNCTION code (memcpy, memmove) available before DDR. + * Use volatile to prevent compiler from transforming to memcpy call. */ + src = (volatile const unsigned int*)&_stored_ramcode; + dst = (volatile unsigned int*)&_start_ramcode; + end = (volatile unsigned int*)&_end_ramcode; + while (dst < end) { + *dst = *src; + dst++; + src++; + } +#ifndef BUILD_LOADER_STAGE1 + /* Flush D-cache and invalidate I-cache for .ramcode in CPC SRAM. + * PowerPC I/D caches are not coherent — explicit dcbst+icbi required. */ + if ((uint32_t)&_end_ramcode > (uint32_t)&_start_ramcode) { + flush_cache((uint32_t)&_start_ramcode, + (uint32_t)&_end_ramcode - (uint32_t)&_start_ramcode); + } +#endif +#endif /* RAM_CODE */ + + /* Now initialize DDR and other hardware */ hal_early_init(); - /* Copy the .data section from flash to RAM */ - src = (unsigned int*)&_stored_data; - dst = (unsigned int*)&_start_data; - end = (unsigned int*)&_end_data; + /* Copy the .data section from flash to DDR. + * Use volatile to prevent the compiler from transforming this loop + * into a memcpy() call. */ + src = (volatile const unsigned int*)&_stored_data; + dst = (volatile unsigned int*)&_start_data; + end = (volatile unsigned int*)&_end_data; while (dst < end) { *dst = *src; dst++; src++; } - /* Initialize the BSS section to 0 */ - dst = (unsigned int*)&__bss_start__; - end = (unsigned int*)&__bss_end__; +#ifndef BUILD_LOADER_STAGE1 + /* Flush D-cache and invalidate I-cache for .data region in DDR. */ + flush_cache((uint32_t)&_start_data, + (uint32_t)&_end_data - (uint32_t)&_start_data); +#endif + + /* Initialize the BSS section to 0 (volatile prevents memset transform) */ + dst = (volatile unsigned int*)&__bss_start__; + end = (volatile unsigned int*)&__bss_end__; while (dst < end) { *dst = 0U; dst++; } /* Run wolfBoot! */ +#if defined(ENABLE_DDR) && defined(DDR_STACK_TOP) + /* DDR is initialized, .data and .bss are set up. + * Switch stack from CPC SRAM to DDR for: + * 1. Better performance (DDR stack is cacheable by L1/L2/CPC) + * 2. More stack space (64KB vs shared CPC SRAM) + * Uses assembly trampoline since we can't return after stack switch. + * The CPC SRAM will be released back to L2 cache in hal_init(). */ + { + extern void ddr_call_with_stack(uint32_t func, uint32_t sp); + /* Zero DDR stack area using volatile to prevent memset transform */ + volatile uint32_t *p = (volatile uint32_t *)DDR_STACK_BASE; + volatile uint32_t *e = (volatile uint32_t *)DDR_STACK_TOP; + while (p < e) { *p++ = 0; } + ddr_call_with_stack((uint32_t)main, DDR_STACK_TOP - 64); + /* Does not return */ + } +#else main(); +#endif } #ifndef BUILD_LOADER_STAGE1 diff --git a/src/boot_ppc_mp.S b/src/boot_ppc_mp.S index 3b62c40e88..7dc74698a1 100644 --- a/src/boot_ppc_mp.S +++ b/src/boot_ppc_mp.S @@ -110,8 +110,8 @@ branch_prediction: srwi r10, r0, 5 /* r10 = cluster */ mulli r5, r10, CORES_PER_CLUSTER - add r5, r5, r8 - mulli r4, r5, CORES_PER_CLUSTER + add r5, r5, r8 /* r5 = linear core ID */ + mr r4, r5 /* r4 = PIR = linear core ID */ #elif defined(CORE_E500MC) /* BOOKE e500mc family */ rlwinm r4, r0, 27, 27, 31 mr r5, r4 @@ -132,34 +132,15 @@ branch_prediction: mtspr L1CSR2, r8 #if defined(CORE_E6500) /* --- L2 E6500 --- */ -l2_setup_cache: - /* E6500CORERM: 11.7 L2 cache state */ - /* R5 = L2 cluster 1 base */ - lis r5, L2_CLUSTER_BASE(0)@h - ori r5, r5, L2_CLUSTER_BASE(0)@l - /* Invalidate and clear locks */ - lis r1, (L2CSR0_L2FI | L2CSR0_L2LFC)@h - ori r1, r1, (L2CSR0_L2FI | L2CSR0_L2LFC)@l - sync - stw r1, L2CSR0(r5) - - /* poll till invalidate and lock bits are cleared */ -l2_poll_invclear: - lwz r4, L2CSR0(r5) - and. r4, r1, r4 - bne l2_poll_invclear - isync - - /* set stash id to (coreID * 2) + 32 + L2 (1) */ - addi r3, r8,1 - stw r3, L2CSR1(r5) - - /* enable L2 with parity */ - sync - isync - lis r4, (L2CSR0_L2E | L2CSR0_L2PE)@h - stw r4, L2CSR0(r5) - isync + /* e6500 L2 is per-cluster (shared by all cores in the cluster). + * The primary core already invalidated and enabled L2 during boot. + * Secondary cores must NOT do L2FI (flash invalidate) — it discards + * ALL dirty L2 lines including the primary core's stack, return + * addresses, and cached code, causing the primary core to crash + * (typically SRR0=0 from corrupted return address). + * L1 stash ID (set above via L1CSR2 SPR) is per-core and sufficient. + * L2CSR1 (stash ID) is per-cluster and already set by core 0. + * No CCSR TLB mapping needed since we skip L2 register access. */ #elif defined(CORE_E5500) /* --- L2 E5500 --- */ l2_setup_cache: diff --git a/src/boot_ppc_start.S b/src/boot_ppc_start.S index b0fb90b9ab..0337203b41 100644 --- a/src/boot_ppc_start.S +++ b/src/boot_ppc_start.S @@ -72,6 +72,21 @@ All TLBs for boot will be in TLB1 and supervisor mode (not user) #include "hal/nxp_ppc.h" +/* e6500 has 64-bit GPRs. When loading 32-bit addresses with bit 31 set + * (addresses >= 0x80000000), the lis instruction sign-extends, putting + * 0xFFFFFFFF in the upper 32 bits. This causes memory access failures. + * Use LOAD_ADDR32 macro to properly load 32-bit addresses on e6500. */ +#ifdef CORE_E6500 +#define LOAD_ADDR32(reg, addr) \ + li reg, 0; \ + oris reg, reg, (addr)@h; \ + ori reg, reg, (addr)@l +#else +#define LOAD_ADDR32(reg, addr) \ + lis reg, (addr)@h; \ + ori reg, reg, (addr)@l +#endif + /* variables from linker script */ .global _start_vector .global isr_empty @@ -173,9 +188,10 @@ hardware_reg: #ifndef BUILD_LOADER_STAGE1 branch_prediction: - /* enable branch prediction */ - lis r0, (BUCSR_ENABLE)@h - ori r0, r0, (BUCSR_ENABLE)@l + /* Disable branch prediction during early boot. + * Enabled later in C after DDR stack relocation to avoid + * speculative fetches during hardware init. */ + li r0, 0 mtspr SPRN_BUCSR, r0 #endif @@ -194,6 +210,13 @@ startup_init: #ifndef TLB1_NEW_SIZE #define TLB1_NEW_SIZE BOOKE_PAGESZ_256K #endif +/* EPN alignment mask for TLB1_NEW_SIZE page. + * e6500: page = 2^(TSIZE+10), e500/e5500: page = 2^(2*TSIZE+10) */ +#ifdef CORE_E6500 +#define TLB1_EPN_MASK (~((1 << (TLB1_NEW_SIZE + 10)) - 1)) +#else +#define TLB1_EPN_MASK (~((1 << (2 * TLB1_NEW_SIZE + 10)) - 1)) +#endif shrink_default_tlb1: /* Shrink the current TLB1 entry */ bl find_pc @@ -219,12 +242,15 @@ find_pc: oris r3, r3, MAS1_IPROT@h mtspr MAS1, r3 - /* Find page for PC (R1) */ - lis r3, MAS2_EPN@h - ori r3, r3, MAS2_EPN@l + /* Align PC (R1) to TLB page size boundary. + * Use LOAD_ADDR32: TLB1_EPN_MASK has bit 31 set (e.g. 0xFFFC0000), + * so lis would sign-extend to 0xFFFFFFFF_FFFC0000 on e6500. */ + LOAD_ADDR32(r3, TLB1_EPN_MASK) and r1, r1, r3 - /* Set the real and virtual page for this TLB */ + /* Set the real and virtual page for this TLB. + * Use LOAD_ADDR32: MAS2_EPN (0xFFFFF000) has bit 31 set. */ + LOAD_ADDR32(r3, MAS2_EPN) mfspr r2, MAS2 andc r2, r2, r3 or r2, r2, r1 @@ -267,7 +293,8 @@ find_pc: setup_interrupts: /* Setup interrupt vectors */ - lis r1, (_start_vector)@h + /* e6500 GPRs are 64-bit; avoid sign-extension for high addresses */ + LOAD_ADDR32(r1, _start_vector) mtspr IVPR, r1 /* set the 48-bit high-order prefix address */ #ifdef ENABLE_INTERRUPTS @@ -412,10 +439,17 @@ invalidate_temp_tlb: boot_page: /* make sure we have the default boot page added to MMU */ /* BOOT_PAGE: TLB 1, Entry 0, Supervisor X/R/W, I, TS=0, 4KB, IPROT */ + /* Skip if Entry 0 is the currently executing TLB (R14 from + * shrink_default_tlb1). Overwriting it with a 4K page would unmap + * the code we are running from. The shrink code already set Entry 0 + * to 256K with IPROT, which is sufficient. */ + cmpwi r14, 0 + beq 1f set_tlb(1, 0, BOOT_ROM_ADDR, BOOT_ROM_ADDR, 0, MAS3_SX | MAS3_SW | MAS3_SR, MAS2_I, 0, BOOKE_PAGESZ_4K, 1, r3); +1: #endif ccsr_tlb: @@ -431,8 +465,7 @@ ccsr_law: #define CCSR_LAW (LAWAR_ENABLE | \ LAWAR_TRGT_ID(LAW_TRGT_CORENET) | \ LAW_SIZE_16MB) - lis r9, CCSRBAR + LAWBAR_BASE(0)@h - ori r9, r9, CCSRBAR + LAWBAR_BASE(0)@l + LOAD_ADDR32(r9, CCSRBAR + LAWBAR_BASE(0)) lis r0, CCSRBAR_PHYS_HIGH@h ori r0, r0, CCSRBAR_PHYS_HIGH@l lis r1, CCSRBAR@h @@ -456,8 +489,7 @@ flash_law: #define FLASH_LAW (LAWAR_ENABLE | \ LAWAR_TRGT_ID(LAW_TRGT_IFC) | \ FLASH_LAW_SIZE) - lis r9, CCSRBAR + LAWBAR_BASE(1)@h - ori r9, r9, CCSRBAR + LAWBAR_BASE(1)@l + LOAD_ADDR32(r9, CCSRBAR + LAWBAR_BASE(1)) lis r0, FLASH_BASE_PHYS_HIGH@h ori r0, r0, FLASH_BASE_PHYS_HIGH@l lis r1, FLASH_BASE_ADDR@h @@ -472,15 +504,14 @@ flash_law: lwz r2, 8(r9) isync flash_tlb: - /* Flash: TLB 1, Entry 2, Super X/R/W, W/I/G, TS=0, 64/128M, IPROT */ - /* Write is required for Write/Erase using CFI commands to base */ - #ifdef BUILD_LOADER_STAGE1 - /* Using XIP from this flash, so cannot use cache inhibit */ + /* Flash: TLB 1, Entry 2, Super X/R/W, W+G, TS=0, 64/128M, IPROT + * Write-through (W) enables L1 I-cache to cache flash instruction + * fetches during XIP boot — matches reference T2080 implementation. + * Guarded (G) prevents speculative prefetches to the IFC. + * After DDR stack relocation, C code switches to I|G for flash + * write/erase (hal_flash_cache_disable) or M for full caching + * (hal_flash_enable_caching). */ #define FLASH_TLB_WING (MAS2_W | MAS2_G) - #else - /* IFC polling requires cache inhibit */ - #define FLASH_TLB_WING (MAS2_I | MAS2_G) - #endif set_tlb(1, 2, FLASH_BASE_ADDR, FLASH_BASE_ADDR, FLASH_BASE_PHYS_HIGH, MAS3_SX | MAS3_SW | MAS3_SR, FLASH_TLB_WING, 0, @@ -511,94 +542,153 @@ flash_tlb: #endif /* Map initial DDR, but can be adjusted later in hal_ddr_init() */ - /* DDR - TBL=1, Entry 12/13 */ + /* DDR - TBL=1, Entry 12 (and 13 for e500) */ + #ifdef CORE_E6500 + /* e6500 supports 2GB page size - use single TLB entry */ + set_tlb(1, 12, DDR_ADDRESS, DDR_ADDRESS, 0, + MAS3_SX | MAS3_SW | MAS3_SR, DDR_WING, + 0, BOOKE_PAGESZ_2G, 1, r3); + #else + /* e500 uses two 1GB TLB entries */ set_tlb(1, 12, DDR_ADDRESS, DDR_ADDRESS, 0, MAS3_SX | MAS3_SW | MAS3_SR, DDR_WING, 0, BOOKE_PAGESZ_1G, 1, r3); - #if DDR_SIZE > 0x40000000 + #if DDR_SIZE > 0x40000000 set_tlb(1, 13, DDR_ADDRESS + 0x40000000, DDR_ADDRESS + 0x40000000, 0, MAS3_SX | MAS3_SW | MAS3_SR, DDR_WING, 0, BOOKE_PAGESZ_1G, 1, r3); + #endif #endif #endif /* ENABLE_DDR */ +/* ========================================================================= + * CPC SRAM Initialization + * Order: 1) CPC invalidate, 2) CPCSRCR config, 3) LAW, 4) TLB, 5) CPC enable + * Note: TLB must be created BEFORE CPC enable (original working sequence) + * ========================================================================= */ +#if defined(ENABLE_L2_CACHE) && defined(L2SRAM_ADDR) && (defined(CORE_E5500) || defined(CORE_E6500)) +cpc_setup_sram: + /* T2080RM: 8.4.2.2 - CPC initialization sequence: + * Step 1: Flash invalidate CPC and clear locks (CPCFI | CPCLFC) + * Step 2: Poll until invalidate completes + * Step 3: Configure SRAM control registers (CPCSRCR1, CPCSRCR0) + * Step 4: Configure LAW for SRAM routing (done after this block) + * Step 5: Enable CPC with parity (CPCE | CPCPE) + * Step 6: Create TLB for SRAM access + * The LAW (DDR_1) provides CoreNet routing; CPC intercepts before DDR. */ + + /* R1 = CPC base - preserve across LAW setup */ + LOAD_ADDR32(r1, CPC_BASE) + + /* Step 1: Flash invalidate CPC and clear all locks */ + lis r0, (CPCCSR0_CPCFI | CPCCSR0_CPCLFC)@h + ori r0, r0, (CPCCSR0_CPCFI | CPCCSR0_CPCLFC)@l + stw r0, CPCCSR0(r1) + + /* Step 2: Poll until CPCFI and CPCLFC clear */ +cpc_poll_invalidate: + lwz r2, CPCCSR0(r1) + and. r2, r2, r0 + bne cpc_poll_invalidate + isync + + /* Step 3: Configure CPC SRAM control registers */ + li r0, 0 + stw r0, CPCSRCR1(r1) /* SRAM high address = 0 */ + /* SRAM low address - use LOAD_ADDR32 on e6500 to avoid sign extension */ + LOAD_ADDR32(r0, L2SRAM_ADDR) + /* Enable SRAM and set size (must match L2SRAM_SIZE = 1MB for P384) */ + ori r0, r0, (CPCSRCR0_SRAMSZ_1024 | CPCSRCR0_SRAMEN) + stw r0, CPCSRCR0(r1) + mbar + isync +#endif /* ENABLE_L2_CACHE && L2SRAM_ADDR */ + +/* Step 3: Configure LAW for SRAM */ #ifdef INITIAL_SRAM_ADDR +#ifndef INITIAL_SRAM_NO_LAW init_sram_law: - /* Intial SRAM LAW 2 */ + /* CPC SRAM uses LAW 2 - DO NOT reuse this LAW index elsewhere! + * The stack resides in CPC SRAM; overwriting this LAW causes crashes. */ #define INITIAL_SRAM_LAW (LAWAR_ENABLE | \ LAWAR_TRGT_ID(INITIAL_SRAM_LAW_TRGT) | \ INITIAL_SRAM_LAW_SZ) - lis r9, CCSRBAR + LAWBAR_BASE(2)@h - ori r9, r9, CCSRBAR + LAWBAR_BASE(2)@l + LOAD_ADDR32(r9, CCSRBAR + LAWBAR_BASE(2)) li r0, 0 /* UPPER=0 */ - lis r1, INITIAL_SRAM_ADDR@h - ori r1, r1, INITIAL_SRAM_ADDR@l - lis r2, INITIAL_SRAM_LAW@h - ori r2, r2, INITIAL_SRAM_LAW@l + /* Use LOAD_ADDR32 on e6500 to avoid sign-extension for addresses >= 0x80000000 */ + LOAD_ADDR32(r3, INITIAL_SRAM_ADDR) + LOAD_ADDR32(r2, INITIAL_SRAM_LAW) stw r0, 0(r9) /* LAWBARH */ - stw r1, 4(r9) /* LAWBARL */ + stw r3, 4(r9) /* LAWBARL */ sync stw r2, 8(r9) /* LAWAR */ /* read back LAWAR (per 2.3.2 Configuring Local Access Windows) */ lwz r2, 8(r9) isync +#endif /* !INITIAL_SRAM_NO_LAW */ +#endif /* INITIAL_SRAM_ADDR */ +/* Step 4: Create TLB for SRAM - BEFORE CPC enable (original working order) + * This is for e5500/e6500 CPC SRAM only. e500 has its own init_sram_tlb below. */ +#if defined(INITIAL_SRAM_ADDR) && (defined(CORE_E5500) || defined(CORE_E6500)) init_sram_tlb: - /* Initial SRAM: TLB 1, Entry 9, Supervisor X/R/W, M, TS=0, IPROT */ + /* Initial SRAM: TLB 1, Entry 9, Supervisor X/R/W, M, TS=0, IPROT + * Original working T2080 code (commit 11f46a51) used MAS2_M. */ set_tlb(1, 9, INITIAL_SRAM_ADDR, INITIAL_SRAM_ADDR, 0, MAS3_SX | MAS3_SW | MAS3_SR, MAS2_M, 0, INITIAL_SRAM_BOOKE_SZ, 1, r3); -#endif - -#ifdef ENABLE_L2_CACHE +#endif /* INITIAL_SRAM_ADDR && (CORE_E5500 || CORE_E6500) */ -#if defined(CORE_E5500) || defined(CORE_E6500) /* --- L2 E5500/E6500 --- */ -#ifdef L2SRAM_ADDR -l2_setup_sram: - /* T2080RM: 8.4.2.2 or T1024RM 13.4.2.2 - * Enabling the CPC after Power-On Reset */ +/* Step 5: Enable CPC after TLB is configured */ +#if defined(ENABLE_L2_CACHE) && defined(L2SRAM_ADDR) && (defined(CORE_E5500) || defined(CORE_E6500)) +cpc_enable: /* R1 = CPC base */ - lis r1, CPC_BASE@h - ori r1, r1, CPC_BASE@l - - /* Set CPC SRAM control register */ - /* SRAM high addrress 0x0 */ - li r0, 0 - stw r0, CPCSRCR1(r1) - /* SRAM low address */ - lis r0, L2SRAM_ADDR@h - ori r0, r0, L2SRAM_ADDR@l - /* Enable SRAM and set size (must match L2SRAM_SIZE) */ - ori r0, r0, (CPCSRCR0_SRAMSZ_256 | CPCSRCR0_SRAMEN) - stw r0, CPCSRCR0(r1) - - /* Enable memory mapped SRAM */ - lis r0, CPCCSR0_SRAM_ENABLE@h + LOAD_ADDR32(r1, CPC_BASE) + + /* Enable CPC WITHOUT parity in SRAM mode. + * SRAM is uninitialized at cold power cycle; enabling CPCPE causes CPC + * to read-modify-write ECC on the first dcbz, which reads uninitialized + * SRAM and triggers an ECC/parity machine check. + * Parity is enabled later when CPC transitions to full cache mode in C. */ + lis r0, (CPCCSR0_CPCE)@h mbar isync stw r0, CPCCSR0(r1) mbar - /* Disable speculation */ + /* Verify CPC is enabled by reading back CPCCSR0 */ +cpc_poll_enable: + lwz r2, CPCCSR0(r1) + andis. r2, r2, CPCCSR0_CPCE@h /* check CPCE bit */ + beq cpc_poll_enable + isync + + /* Disable speculation (Errata A-006593) */ lwz r0, CPCHDBCR0(r1) oris r0, r0, CPCHDBCR0_SPEC_DIS@h stw r0, CPCHDBCR0(r1) -#endif /* L2SRAM_ADDR */ + mbar + isync +#endif /* ENABLE_L2_CACHE && L2SRAM_ADDR */ + +#ifdef ENABLE_L2_CACHE +#if defined(CORE_E5500) || defined(CORE_E6500) /* --- L2 E5500/E6500 --- */ +/* Note: CPC SRAM setup moved above for correct T2080RM sequence */ #if defined(CORE_E6500) /* --- L2 E6500 --- */ l2_setup_cache: /* E6500CORERM: 11.7 L2 cache state */ /* R5 = L2 cluster 1 base */ - lis r5, L2_CLUSTER_BASE(0)@h - ori r5, r5, L2_CLUSTER_BASE(0)@l - /* Invalidate and clear locks */ - lis r1, (L2CSR0_L2FI | L2CSR0_L2LFC)@h - ori r1, r1, (L2CSR0_L2FI | L2CSR0_L2LFC)@l + LOAD_ADDR32(r5, L2_CLUSTER_BASE(0)) + + /* Flash invalidate L2 (locks already clear after reset) */ + lis r1, L2CSR0_L2FI@h + ori r1, r1, L2CSR0_L2FI@l sync stw r1, L2CSR0(r5) - /* poll till invalidate and lock bits are cleared */ + /* Poll until L2FI clears */ l2_poll_invclear: lwz r4, L2CSR0(r5) and. r4, r1, r4 @@ -612,8 +702,15 @@ l2_poll_invclear: /* enable L2 with parity */ sync isync - lis r4, (L2CSR0_L2E | L2CSR0_L2PE)@h + LOAD_ADDR32(r4, (L2CSR0_L2E | L2CSR0_L2PE)) stw r4, L2CSR0(r5) + mbar + + /* Verify L2 is enabled by reading back L2CSR0 */ +l2_poll_enable: + lwz r3, L2CSR0(r5) + andis. r3, r3, L2CSR0_L2E@h /* check bit 31 (L2E) */ + beq l2_poll_enable /* loop until enabled */ isync #elif defined(CORE_E5500) /* --- L2 E5500 --- */ @@ -675,6 +772,17 @@ l2_setup_sram: stw r1, L2SRBAR0(r5) mbar #endif /* L2SRAM_ADDR */ + +#ifdef INITIAL_SRAM_ADDR +init_sram_tlb: + /* Initial SRAM: TLB 1, Entry 9, Supervisor X/R/W, M, TS=0, IPROT + * For e500, L2 SRAM uses cacheable memory-coherent (M) access. + * TLB is created AFTER l2_setup_sram configures L2 as SRAM. */ + set_tlb(1, 9, + INITIAL_SRAM_ADDR, INITIAL_SRAM_ADDR, 0, + MAS3_SX | MAS3_SW | MAS3_SR, MAS2_M, 0, + INITIAL_SRAM_BOOKE_SZ, 1, r3); +#endif /* INITIAL_SRAM_ADDR */ #endif /* CORE_E500 */ #endif /* ENABLE_L2_CACHE */ @@ -713,10 +821,9 @@ l1_tlb: #endif #endif /* ENABLE_L1_CACHE */ -#ifdef CACHE_SRAM_ADDR +#ifdef L1_CACHE_ADDR cache_sram_init: - lis r3, CACHE_SRAM_ADDR@h - ori r3, r3, CACHE_SRAM_ADDR@l + LOAD_ADDR32(r3, L1_CACHE_ADDR) /* read the cache size */ mfspr r2, L1CFG0 andi. r2, r2, 0x1FF @@ -736,13 +843,27 @@ cache_sram_init_loop: #endif addi r3, r3, CACHE_LINE_SIZE bdnz cache_sram_init_loop -#endif /* CACHE_SRAM_ADDR */ +#elif defined(L2SRAM_ADDR) +cache_sram_init: + /* CPC SRAM: skip bulk zeroing. + * Bulk sequential writes to CPC SRAM via CoreNet hang on cold power + * cycle (bus errors before L1/L2 caches are fully operational). + * The L1 locked dcache is used as initial stack instead (see + * L1_CACHE_ADDR). CPC SRAM is later released back to L3 cache mode + * by hal_reconfigure_cpc_as_cache() after DDR init. */ +#endif /* L1_CACHE_ADDR */ setup_stack: /* Build top of stack address */ /* Reserve 64 bytes of initial data (must be 16 byte aligned) */ - lis r1, (_end_stack-64)@h - ori r1, r1, (_end_stack-64)@l +#ifdef L1_CACHE_ADDR + /* Use L1 locked dcache as initial stack (16KB). + * L1_CACHE_ADDR + 4*4KB = top of locked region. + * CPC SRAM is unreliable on cold power cycle (bus errors via CoreNet). */ + LOAD_ADDR32(r1, L1_CACHE_ADDR + 0x4000 - 64) +#else + LOAD_ADDR32(r1, _end_stack-64) +#endif /* PowerPC e500 Application Binary Interface User's Guide * 2.3.5.1.1 Minimal Stack Frame: No Local Variables or Saved Parameters @@ -751,14 +872,14 @@ setup_stack: stwu r0, -4(r1) stwu r0, -4(r1) /* Terminate Back chain */ stwu r1, -8(r1) /* Save back chain and move SP */ - lis r0, RESET_VECTOR@h /* Address of reset vector */ - ori r0, r0, RESET_VECTOR@l + /* RESET_VECTOR (0xEFFFFFFC) has bit 31 set; use LOAD_ADDR32 on e6500 */ + LOAD_ADDR32(r0, RESET_VECTOR) stwu r1, -8(r1) /* Save back chain and move SP */ stw r0, +12(r1) /* Save return addr (underflow vect) */ - /* switch back to AS/TS=0 */ - lis r3, (MSR_CE | MSR_ME | MSR_DE)@h - ori r3, r3, (MSR_CE | MSR_ME | MSR_DE)@l + /* switch back to AS/TS=0, enable recoverable interrupts */ + lis r3, (MSR_CE | MSR_ME | MSR_DE | MSR_RI)@h + ori r3, r3, (MSR_CE | MSR_ME | MSR_DE | MSR_RI)@l mtmsr r3 isync @@ -767,10 +888,11 @@ setup_stack: #endif #ifdef USE_LONG_JUMP - /* load absolute address into "LR" and branch return to it */ - /* Enables long jump in 32-bit */ - lis r3, boot_entry_C@h - ori r3, r3, boot_entry_C@l + /* Load boot_entry_C into LR for indirect branch. + * Use LOAD_ADDR32, not lis, because e6500 has 64-bit GPRs and lis + * sign-extends for addresses >= 0x80000000 (e.g. 0xEFFExxxx becomes + * 0xFFFFFFFF_EFFExxxx), causing an instruction TLB miss on blr. */ + LOAD_ADDR32(r3, boot_entry_C) mtlr r3 blr #else @@ -780,6 +902,14 @@ setup_stack: /* -- Assembly Functions -- */ + +/* Functions placed in .ramcode when RAM_CODE is defined. + * This allows them to be called from RAMFUNCTION code during flash + * command mode (when I-cache misses to flash return status data). */ +#ifdef RAM_CODE + .section .ramcode, "ax", @progbits +#endif + /* * unsigned long long get_ticks(void); * @@ -815,15 +945,6 @@ wait_ticks: mtlr r8 /* restore link register */ blr -/* return the address we are running at */ -.global get_pc -get_pc: - mflr r0 - bl 1f -1: mflr r3 - mtlr r0 - blr - /* L1 Cache Helpers */ .global invalidate_icache invalidate_icache: @@ -845,12 +966,37 @@ invalidate_dcache: isync blr +/* Back to .text for functions that must remain in flash */ +#ifdef RAM_CODE + .text +#endif + +/* return the address we are running at */ +.global get_pc +get_pc: + mflr r0 + bl 1f +1: mflr r3 + mtlr r0 + blr + #ifndef BUILD_LOADER_STAGE1 .global icache_enable icache_enable: +#ifdef RAM_CODE + /* Inline invalidation — invalidate_icache is in .ramcode, + * unreachable via bl from .text */ + mfspr r4, L1CSR1 + ori r4, r4, L1CSR_CFI + msync + isync + mtspr L1CSR1, r4 + isync +#else mflr r8 bl invalidate_icache mtlr r8 +#endif isync mfspr r4, L1CSR1 ori r4, r4, L1CSR_CE @@ -862,9 +1008,20 @@ icache_enable: .global dcache_enable dcache_enable: +#ifdef RAM_CODE + /* Inline invalidation — invalidate_dcache is in .ramcode, + * unreachable via bl from .text */ + mfspr r4, L1CSR0 + ori r4, r4, L1CSR_CFI + msync + isync + mtspr L1CSR0, r4 + isync +#else mflr r8 bl invalidate_dcache mtlr r8 +#endif isync mfspr r4, L1CSR0 ori r4, r4, L1CSR_CE @@ -882,11 +1039,35 @@ dcache_disable: lis r4, 0 ori r4, r4, L1CSR_CE andc r3, r3, r4 - mtspr L1CSR0, r0 + mtspr L1CSR0, r3 isync blr #endif +/* void ddr_call_with_stack(uint32_t func_ptr, uint32_t new_sp) + * Switches stack pointer to DDR and calls the given function. + * Used by boot_entry_C to transition from CPC SRAM stack to DDR stack + * before calling main(). Does not return. + * r3 = function pointer to call + * r4 = new stack pointer (top of DDR stack area) */ +.global ddr_call_with_stack +ddr_call_with_stack: + mr r1, r4 /* Set stack pointer to DDR */ + /* Create minimal PPC ABI stack frame with terminated back chain */ + li r0, 0 + stwu r0, -4(r1) /* Terminate back chain */ + stwu r0, -4(r1) + stwu r1, -8(r1) /* Save back chain and move SP */ + /* RESET_VECTOR (0xEFFFFFFC) has bit 31 set; use LOAD_ADDR32 on e6500 */ + LOAD_ADDR32(r0, RESET_VECTOR) + stwu r1, -8(r1) /* Save back chain and move SP */ + stw r0, +12(r1) /* Save return addr (underflow vector) */ + /* Call the function */ + mtctr r3 + bctrl + /* Should never reach here */ +1: b 1b + #ifdef USE_GOT /* function to relocate code, handling cache flushing and continue to @@ -982,8 +1163,8 @@ in_ram: stwu r0, -4(r1) stwu r0, -4(r1) /* Terminate Back chain */ stwu r1, -8(r1) /* Save back chain and move SP */ - lis r0, RESET_VECTOR@h /* Address of reset vector */ - ori r0, r0, RESET_VECTOR@l + /* RESET_VECTOR (0xEFFFFFFC) has bit 31 set; use LOAD_ADDR32 on e6500 */ + LOAD_ADDR32(r0, RESET_VECTOR) stwu r1, -8(r1) /* Save back chain and move SP */ stw r0, +12(r1) /* Save return addr (underflow vect) */ @@ -996,8 +1177,45 @@ in_ram: .section .isr_vector .align 8 isr_empty: - nop - rfi + /* Minimal fault dump for early bring-up. + * IMPORTANT: Do NOT use r0 as base register for addi/stw! + * PowerPC treats RA=0 specially: addi with RA=0 uses literal 0 + * (not GPR0), and stw with RA=0 uses EA=0+D (not GPR0+D). + * This caused all stores to go to address 0x0000 (DDR, not + * initialized on cold boot) -> nested machine check -> checkstop. + * Use r3 as base, r4 as scratch. */ +#if defined(DEBUG_UART) && defined(TARGET_nxp_t2080) + /* Print '!' to UART to signal exception occurred */ + LOAD_ADDR32(r3, 0xFE11C500) +.L_isr_wait: + lbz r4, 5(r3) + andi. r4, r4, 0x20 + beq .L_isr_wait + li r4, '!' + stb r4, 0(r3) + eieio +#endif +#ifdef L2SRAM_ADDR + LOAD_ADDR32(r3, L2SRAM_ADDR + 0x200) + mfspr r4, SRR0 + stw r4, 0x00(r3) + mfspr r4, SRR1 + stw r4, 0x04(r3) + mfspr r4, SPRN_ESR + stw r4, 0x08(r3) + mfspr r4, SPRN_DEAR + stw r4, 0x0C(r3) + mfspr r4, SPRN_MCSR + stw r4, 0x10(r3) + mfspr r4, SPRN_PIR + stw r4, 0x14(r3) + /* Machine check exceptions use MCSRR0/MCSRR1 (not SRR0/SRR1) */ + mfspr r4, SPRN_MCSRR0 + stw r4, 0x18(r3) + mfspr r4, SPRN_MCSRR1 + stw r4, 0x1C(r3) +#endif +1: b 1b #endif /* reset entry point - must be at end of .S */ diff --git a/src/string.c b/src/string.c index 7245ead8cd..ea716a690e 100644 --- a/src/string.c +++ b/src/string.c @@ -342,7 +342,7 @@ void uart_writenum(int num, int base, int zeropad, int maxdigits) void uart_vprintf(const char* fmt, va_list argp) { char* fmtp = (char*)fmt; - int zeropad, maxdigits; + int zeropad, maxdigits, precision, leftjust; while (fmtp != NULL && *fmtp != '\0') { /* print non formatting characters */ if (*fmtp != '%') { @@ -352,17 +352,42 @@ void uart_vprintf(const char* fmt, va_list argp) fmtp++; /* skip % */ /* find formatters */ - zeropad = maxdigits = 0; + zeropad = maxdigits = leftjust = 0; + precision = -1; /* -1 = not specified */ + /* check for left-justify flag */ + if (*fmtp == '-') { + leftjust = 1; + fmtp++; + } while (*fmtp != '\0') { - if (*fmtp >= '0' && *fmtp <= '9') { + if (*fmtp == '*') { + /* width from argument */ + maxdigits = va_arg(argp, int); + fmtp++; + } + else if (*fmtp >= '0' && *fmtp <= '9') { /* length formatter */ - if (*fmtp == '0') { + if (*fmtp == '0' && maxdigits == 0) { zeropad = 1; } maxdigits <<= 8; maxdigits += (*fmtp - '0'); fmtp++; } + else if (*fmtp == '.') { + /* precision */ + fmtp++; + if (*fmtp == '*') { + precision = va_arg(argp, int); + fmtp++; + } else { + precision = 0; + while (*fmtp >= '0' && *fmtp <= '9') { + precision = precision * 10 + (*fmtp - '0'); + fmtp++; + } + } + } else if (*fmtp == 'l') { /* long - skip */ fmtp++; @@ -401,7 +426,20 @@ void uart_vprintf(const char* fmt, va_list argp) case 's': { char* str = (char*)va_arg(argp, char*); - uart_write(str, (uint32_t)strlen(str)); + int slen = (int)strlen(str); + if (leftjust) { + uart_write(str, slen); + while (slen < maxdigits) { + uart_write(" ", 1); + slen++; + } + } else { + while (slen < maxdigits) { + uart_write(" ", 1); + slen++; + } + uart_write(str, (uint32_t)strlen(str)); + } break; } case 'c': @@ -410,6 +448,44 @@ void uart_vprintf(const char* fmt, va_list argp) uart_write(&c, 1); break; } +#ifdef UART_PRINTF_FLOAT + case 'f': + case 'e': + case 'g': + { + double val = va_arg(argp, double); + int prec = (precision >= 0) ? precision : 3; + int digit; + unsigned int ipart; + + /* handle negative */ + if (val < 0.0) { + uart_write("-", 1); + val = -val; + } + + /* integer part */ + ipart = (unsigned int)val; + uart_writenum((int)ipart, 10, 0, 0); + + /* fractional part */ + if (prec > 0) { + double frac = val - (double)ipart; + char c; + int i; + uart_write(".", 1); + for (i = 0; i < prec; i++) { + frac *= 10.0; + digit = (int)frac; + if (digit > 9) digit = 9; + c = '0' + digit; + uart_write(&c, 1); + frac -= (double)digit; + } + } + break; + } +#endif /* UART_PRINTF_FLOAT */ default: break; } diff --git a/test-app/Makefile b/test-app/Makefile index 514f00cf4c..e0b0d5a3d1 100644 --- a/test-app/Makefile +++ b/test-app/Makefile @@ -107,9 +107,14 @@ endif ifeq ($(WOLFCRYPT_BENCHMARK),1) CFLAGS+=-DWOLFCRYPT_BENCHMARK WOLFCRYPT_SUPPORT=1 + UART_PRINTF_FLOAT?=1 APP_OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/benchmark/benchmark.o endif +ifeq ($(UART_PRINTF_FLOAT),1) + CFLAGS+=-DUART_PRINTF_FLOAT +endif + ifeq ($(WOLFCRYPT_SUPPORT),1) # Add support infrastructure APP_OBJS+=wolfcrypt_support.o @@ -141,25 +146,60 @@ ifeq ($(WOLFCRYPT_SUPPORT),1) APP_OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/ecc.o APP_OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/sp_int.o - # Add SP math implementations for ARM Cortex-M - APP_OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/sp_cortexm.o + # Add SP C math (all architectures) APP_OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/sp_c32.o - ifneq ($(NO_ARM_ASM),1) - APP_OBJS+= \ - $(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/arm/thumb2-aes-asm_c.o \ - $(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.o \ - $(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.o \ - $(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.o \ - $(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.o + ifneq ($(NO_ASM),1) + # SP Cortex M + ifneq ($(filter ARM ARM_BE,$(ARCH)),) + ifeq ($(SPMATH),1) + APP_OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/sp_cortexm.o + endif + + ifneq ($(NO_ARM_ASM),1) + APP_OBJS+= \ + $(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/arm/thumb2-aes-asm_c.o \ + $(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.o \ + $(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.o \ + $(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.o \ + $(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.o + + CFLAGS+=-DWOLFSSL_ARMASM -DWOLFSSL_ARMASM_NO_HW_CRYPTO -DWOLFSSL_ARMASM_INLINE \ + -DWOLFSSL_ARMASM_NO_NEON -DWOLFSSL_ARMASM_THUMB2 + endif + endif + + # Power PC + ifeq ($(ARCH),PPC) + # SP mp int PowerPC ASM + ifeq ($(PPC64),1) + CFLAGS+=-DWOLFSSL_SP_PPC64 + else + CFLAGS+=-DWOLFSSL_SP_PPC + endif + + # SHA256 + APP_OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/ppc32/ppc32-sha256-asm_c.o + CFLAGS+=-DWOLFSSL_PPC32_ASM + CFLAGS+=-DWOLFSSL_PPC32_ASM_INLINE + CFLAGS+=-DWOLFSSL_PPC32_ASM_SMALL + + ifeq ($(PPC64),1) # requires wolfssl PR 9852 + # AES + APP_OBJS+=$(WOLFBOOT_LIB_WOLFSSL)/wolfcrypt/src/port/ppc64/ppc64-aes-asm_c.o + CFLAGS+=-DWOLFSSL_PPC64_ASM + CFLAGS+=-DWOLFSSL_PPC64_ASM_INLINE + CFLAGS+=-DWOLFSSL_PPC64_ASM_SMALL + CFLAGS+=-DWOLFSSL_PPC64_ASM_AES_NO_HARDEN + endif - CFLAGS+=-DWOLFSSL_ARMASM -DWOLFSSL_ARMASM_NO_HW_CRYPTO -DWOLFSSL_ARMASM_INLINE \ - -DWOLFSSL_ARMASM_NO_NEON -DWOLFSSL_ARMASM_THUMB2 + CFLAGS+=-fomit-frame-pointer + endif endif CFLAGS+=-DWOLFSSL_USER_SETTINGS CFLAGS+=-I"$(WOLFBOOT_LIB_WOLFSSL)" -endif +endif # WOLFCRYPT_SUPPORT ifeq ($(TZEN),1) CFLAGS+=-DNONSECURE_APP @@ -414,6 +454,22 @@ ifeq ($(TARGET),va416x0) CFLAGS+=-ffunction-sections -fdata-sections endif +PPC64=0 +ifeq ($(TARGET),nxp_t2080) + ifneq ($(SIGN),NONE) + APP_OBJS+=../src/keystore.o + endif + CFLAGS+=-ffunction-sections -fdata-sections + + # PowerPC e6500 + PPC64=1 +endif + +ifeq ($(TARGET),nxp_t1024) + # PowerPC e5500 + PPC64=1 +endif + ifeq ($(TARGET),sim) # LD on MacOS does not support "-Map=" LDMAPSUPPORTED=$(shell $(CC) -Wl,-Map=image.map 2>&1 | grep 'unknown option') diff --git a/test-app/PPC.ld b/test-app/PPC.ld index 58a65ff83f..eeb964b9e3 100644 --- a/test-app/PPC.ld +++ b/test-app/PPC.ld @@ -1,6 +1,6 @@ OUTPUT_ARCH( "powerpc" ) -ENTRY( main ) +ENTRY( _app_entry ) PHDRS { @@ -14,6 +14,7 @@ SECTIONS .text : { + *(.text._app_entry) *(.text.main) *(.text*) *(.rodata*) @@ -23,6 +24,7 @@ SECTIONS { _start_data = .; KEEP(*(.data*)) + *(.sdata*) /* PPC small initialized data */ . = ALIGN(4); KEEP(*(.ramcode)) @@ -54,6 +56,7 @@ SECTIONS _start_bss = .; __bss_start__ = .; *(.bss*) + *(.sbss*) /* PPC small uninitialized data */ *(COMMON) . = ALIGN(4); _end_bss = .; @@ -62,4 +65,12 @@ SECTIONS } . = ALIGN(4); + /* Heap for _sbrk (used by syscalls.c malloc) */ + _Min_Heap_Size = 0x10000; /* 64KB */ + + /* Stack: 64KB, grows downward from _stack_top. + * wolfCrypt ECC384 operations need deep stack (~11KB per frame). */ + _Min_Stack_Size = 0x10000; /* 64KB */ + _stack_end = _end + _Min_Heap_Size; + _stack_top = _stack_end + _Min_Stack_Size; } diff --git a/test-app/app_nxp_t2080.c b/test-app/app_nxp_t2080.c index 3e07b785d9..8535b7c88a 100644 --- a/test-app/app_nxp_t2080.c +++ b/test-app/app_nxp_t2080.c @@ -1,4 +1,6 @@ /* app_nxp_t2080.c + * + * Test bare-metal application for NXP T2080. * * Copyright (C) 2025 wolfSSL Inc. * @@ -20,32 +22,163 @@ */ #include -#include "../hal/nxp_ppc.h" +#include + +#include "wolfboot/wolfboot.h" + +/* Assembly entry point: set stack pointer, enable FPU, and call main. + * The linker script defines _stack_top at the end of the stack region. + * PPC ABI: r1 = stack pointer, 16-byte aligned, back-chain to 0. */ +void __attribute__((naked, section(".text._app_entry"))) _app_entry(void) +{ + __asm__ volatile ( + "lis 1, _stack_top@ha\n" + "addi 1, 1, _stack_top@l\n" + "li 0, 0\n" + "stwu 0, -16(1)\n" /* Create initial stack frame, back-chain = 0 */ + /* Enable FPU: set MSR[FP] (bit 18) = 0x2000. + * Required for benchmark float formatting. */ + "mfmsr 0\n" + "ori 0, 0, 0x2000\n" + "mtmsr 0\n" + "isync\n" + "b main\n" + ); +} +#include "target.h" #include "printf.h" +#include "keystore.h" + +#include "../hal/nxp_ppc.h" + +/* wolfCrypt test/benchmark support */ +#ifdef WOLFCRYPT_TEST +#include +#include +int wolfcrypt_test(void *args); +#endif -static const char* hex_lut = "0123456789abcdef"; +#ifdef WOLFCRYPT_BENCHMARK +#include +#include +int benchmark_test(void *args); +#endif + +static uint8_t boot_part_state = IMG_STATE_NEW; +static uint8_t update_part_state = IMG_STATE_NEW; + +const char part_state_names[6][16] = { + "NEW", + "UPDATING", + "FFLAGS", + "TESTING", + "CONFIRMED", + "[Invalid state]" +}; + +static const char *part_state_name(uint8_t state) +{ + switch(state) { + case IMG_STATE_NEW: + return part_state_names[0]; + case IMG_STATE_UPDATING: + return part_state_names[1]; + case IMG_STATE_FINAL_FLAGS: + return part_state_names[2]; + case IMG_STATE_TESTING: + return part_state_names[3]; + case IMG_STATE_SUCCESS: + return part_state_names[4]; + default: + return part_state_names[5]; + } +} + +static int print_info(void) +{ + int i, j; + uint32_t n_keys; + + wolfBoot_get_partition_state(PART_BOOT, &boot_part_state); + wolfBoot_get_partition_state(PART_UPDATE, &update_part_state); + + wolfBoot_printf("\r\n"); + wolfBoot_printf("System information\r\n"); + wolfBoot_printf("====================================\r\n"); + wolfBoot_printf("Current firmware state: %s\r\n", + part_state_name(boot_part_state)); + wolfBoot_printf("Update state: %s\r\n", + part_state_name(update_part_state)); + + + wolfBoot_printf("\r\n"); + wolfBoot_printf("Bootloader keystore information\r\n"); + wolfBoot_printf("====================================\r\n"); + n_keys = keystore_num_pubkeys(); + wolfBoot_printf("Number of public keys: %lu\r\n", n_keys); + for (i = 0; i < (int)n_keys; i++) { + uint32_t size = keystore_get_size(i); + uint32_t type = keystore_get_key_type(i); + uint32_t mask = keystore_get_mask(i); + uint8_t *keybuf = keystore_get_buffer(i); + + wolfBoot_printf("\r\n"); + wolfBoot_printf(" Public Key #%d: size %lu, type %lx, mask %08lx\r\n", + i, size, type, mask); + wolfBoot_printf(" ====================================\r\n "); + for (j = 0; j < (int)size; j++) { + wolfBoot_printf("%02X ", keybuf[j]); + if (j % 16 == 15) { + wolfBoot_printf("\r\n "); + } + } + wolfBoot_printf("\r\n"); + } + return 0; +} void main(void) { - int i = 0; - int j = 0; - int k = 0; - char snum[8]; + /* Zero BSS - required for bare-metal since there's no crt0 startup. + * Without this, static variables (gTestMemory, HEAP_HINT, etc.) + * contain DDR garbage, causing crashes in wc_LoadStaticMemory. */ + extern char _start_bss[], _end_bss[]; + { + char *p = _start_bss; + while (p < _end_bss) + *p++ = 0; + } uart_init(); - uart_write("Test App\n", 9); + wolfBoot_printf("========================\r\n"); + wolfBoot_printf("NXP T2080 wolfBoot demo Application\r\n"); + wolfBoot_printf("Copyright 2026 wolfSSL Inc\r\n"); + wolfBoot_printf("GPL v3\r\n"); + wolfBoot_printf("========================\r\n"); - /* Wait for reboot */ - while(1) { - for (j=0; j<1000000; j++) - ; - i++; + print_info(); - uart_write("\r\n0x", 4); - for (k=0; k<8; k++) { - snum[7 - k] = hex_lut[(i >> 4*k) & 0xf]; - } - uart_write(snum, 8); +#if defined(WOLFCRYPT_TEST) || defined(WOLFCRYPT_BENCHMARK) + wolfCrypt_Init(); + +#ifdef WOLFCRYPT_TEST + wolfBoot_printf("\r\nRunning wolfCrypt tests...\r\n"); + wolfcrypt_test(NULL); + wolfBoot_printf("Tests complete.\r\n\r\n"); +#endif + +#ifdef WOLFCRYPT_BENCHMARK + wolfBoot_printf("Running wolfCrypt benchmarks...\r\n"); + benchmark_test(NULL); + wolfBoot_printf("Benchmarks complete.\r\n\r\n"); +#endif + + wolfCrypt_Cleanup(); +#endif + + wolfBoot_printf("Test App: idle loop\r\n"); + while(1) { + /* Idle */ } } diff --git a/test-app/syscalls.c b/test-app/syscalls.c index ae5a3f5f74..a167c3c8bb 100644 --- a/test-app/syscalls.c +++ b/test-app/syscalls.c @@ -22,11 +22,22 @@ */ #include +#include #include -#include -#undef errno -extern int errno; +/* Provide our own errno for bare-metal. + * Using the libc errno via can conflict with TLS-based errno + * on cross-toolchains (e.g. powerpc-linux-gnu glibc). */ +#ifndef ENOMEM +#define ENOMEM 12 +#endif +#ifndef EBADF +#define EBADF 9 +#endif +#ifndef EINVAL +#define EINVAL 22 +#endif +int errno; /* Heap management */ extern char _end; /* Defined by linker */ @@ -85,8 +96,9 @@ void *_sbrk(int incr) return prev_heap_end; } -/* Forward declaration of UART write function */ +/* Forward declarations of UART functions from wolfBoot string.c */ extern void uart_write(const char *buf, unsigned int sz); +extern void uart_vprintf(const char* fmt, va_list argp); int _write(int file, char *ptr, int len) { @@ -117,3 +129,367 @@ int _getpid(void) { return 1; } + +/* ========== Standard I/O functions for bare-metal ========== + * These override glibc's implementations which require TLS and + * other OS facilities that don't exist in bare-metal. + * wolfCrypt test/benchmark code calls standard printf, not wolfBoot_printf. + */ + +/* Route all printf-family functions through our vsnprintf (in this file) + * rather than uart_vprintf (in string.c). This ensures float formatting + * via UART_PRINTF_FLOAT works, since string.c is compiled by the parent + * Makefile without that flag. */ +int vprintf(const char *fmt, va_list args) +{ + char buf[256]; + int len = vsnprintf(buf, sizeof(buf), fmt, args); + if (len > 0) + uart_write(buf, (len < (int)sizeof(buf)) ? len : (int)sizeof(buf) - 1); + return len; +} + +int printf(const char *fmt, ...) +{ + va_list args; + int len; + va_start(args, fmt); + len = vprintf(fmt, args); + va_end(args); + return len; +} + +/* fprintf - ignore FILE* stream, all output goes to UART */ +int fprintf(void *stream, const char *fmt, ...) +{ + va_list args; + int len; + (void)stream; + va_start(args, fmt); + len = vprintf(fmt, args); + va_end(args); + return len; +} + +int vfprintf(void *stream, const char *fmt, va_list args) +{ + (void)stream; + return vprintf(fmt, args); +} + +/* ========== Buffer-based formatting (snprintf) ========== */ + +static int buf_num(char *buf, int pos, int size, unsigned int num, + int base, int is_signed, int zeropad, int width, + int is_upper) +{ + char tmp[12]; + int i = 0, neg = 0, total; + + if (is_signed && (int)num < 0) { + neg = 1; + num = (unsigned int)(-(int)num); + } + + if (num == 0) { + tmp[i++] = '0'; + } else { + while (num > 0 && i < (int)sizeof(tmp)) { + int d = num % base; + tmp[i++] = (d < 10) ? ('0' + d) : + ((is_upper ? 'A' : 'a') + d - 10); + num /= base; + } + } + + total = i + neg; + while (total < width && pos < size - 1) { + buf[pos++] = zeropad ? '0' : ' '; + total++; + } + if (neg && pos < size - 1) + buf[pos++] = '-'; + while (i > 0 && pos < size - 1) + buf[pos++] = tmp[--i]; + + return pos; +} + +int vsnprintf(char *buf, unsigned int size, const char *fmt, va_list argp) +{ + int pos = 0; + const char *fmtp = fmt; + int zeropad, maxdigits, precision, leftjust; + + if (size == 0) return 0; + + while (fmtp && *fmtp != '\0' && pos < (int)size - 1) { + if (*fmtp != '%') { + buf[pos++] = *fmtp++; + continue; + } + fmtp++; /* skip % */ + + zeropad = maxdigits = leftjust = 0; + precision = -1; + if (*fmtp == '-') { leftjust = 1; fmtp++; } + while (*fmtp != '\0') { + if (*fmtp == '*') { + maxdigits = va_arg(argp, int); + fmtp++; + } else if (*fmtp >= '0' && *fmtp <= '9') { + if (*fmtp == '0' && maxdigits == 0) + zeropad = 1; + maxdigits = maxdigits * 10 + (*fmtp - '0'); + fmtp++; + } else if (*fmtp == '.') { + fmtp++; + if (*fmtp == '*') { + precision = va_arg(argp, int); + fmtp++; + } else { + precision = 0; + while (*fmtp >= '0' && *fmtp <= '9') { + precision = precision * 10 + (*fmtp - '0'); + fmtp++; + } + } + } else if (*fmtp == 'l' || *fmtp == 'z') { + fmtp++; + } else { + break; + } + } + + switch (*fmtp) { + case '%': + if (pos < (int)size - 1) buf[pos++] = '%'; + break; + case 'd': case 'i': + pos = buf_num(buf, pos, size, + (unsigned int)va_arg(argp, int), 10, 1, + zeropad, maxdigits, 0); + break; + case 'u': + pos = buf_num(buf, pos, size, + va_arg(argp, unsigned int), 10, 0, + zeropad, maxdigits, 0); + break; + case 'x': + pos = buf_num(buf, pos, size, + va_arg(argp, unsigned int), 16, 0, + zeropad, maxdigits, 0); + break; + case 'X': + pos = buf_num(buf, pos, size, + va_arg(argp, unsigned int), 16, 0, + zeropad, maxdigits, 1); + break; + case 'p': + if (pos < (int)size - 1) buf[pos++] = '0'; + if (pos < (int)size - 1) buf[pos++] = 'x'; + pos = buf_num(buf, pos, size, + (unsigned int)(uintptr_t)va_arg(argp, void*), 16, 0, + 1, 8, 0); + break; + case 's': + { + const char *str = va_arg(argp, const char*); + int slen; + const char *sp; + if (!str) str = "(null)"; + sp = str; + slen = 0; + while (*sp++) slen++; + if (leftjust) { + sp = str; + while (*sp && pos < (int)size - 1) + buf[pos++] = *sp++; + while (slen < maxdigits && pos < (int)size - 1) { + buf[pos++] = ' '; + slen++; + } + } else { + while (slen < maxdigits && pos < (int)size - 1) { + buf[pos++] = ' '; + slen++; + } + sp = str; + while (*sp && pos < (int)size - 1) + buf[pos++] = *sp++; + } + break; + } + case 'c': + if (pos < (int)size - 1) + buf[pos++] = (char)va_arg(argp, int); + break; +#ifdef UART_PRINTF_FLOAT + case 'f': case 'e': case 'g': + { + double val = va_arg(argp, double); + int prec = (precision >= 0) ? precision : 3; + unsigned int ipart; + double frac; + int digit, k; + + if (val < 0.0) { + if (pos < (int)size - 1) buf[pos++] = '-'; + val = -val; + } + ipart = (unsigned int)val; + pos = buf_num(buf, pos, size, ipart, 10, 0, 0, 0, 0); + if (prec > 0) { + frac = val - (double)ipart; + if (pos < (int)size - 1) buf[pos++] = '.'; + for (k = 0; k < prec && pos < (int)size - 1; k++) { + frac *= 10.0; + digit = (int)frac; + if (digit > 9) digit = 9; + buf[pos++] = '0' + digit; + frac -= (double)digit; + } + } + break; + } +#endif /* UART_PRINTF_FLOAT */ + default: + break; + } + fmtp++; + } + + buf[pos] = '\0'; + return pos; +} + +int snprintf(char *buf, unsigned int size, const char *fmt, ...) +{ + va_list args; + int ret; + va_start(args, fmt); + ret = vsnprintf(buf, size, fmt, args); + va_end(args); + return ret; +} + +int puts(const char *s) +{ + const char *p = s; + unsigned int len = 0; + while (*p++) len++; + uart_write(s, len); + uart_write("\n", 1); + return 0; +} + +int putchar(int c) +{ + char ch = (char)c; + uart_write(&ch, 1); + return c; +} + +int fputc(int c, void *stream) +{ + char ch = (char)c; + (void)stream; + uart_write(&ch, 1); + return c; +} + +int fputs(const char *s, void *stream) +{ + const char *p = s; + unsigned int len = 0; + (void)stream; + while (*p++) len++; + uart_write(s, len); + return 0; +} + +/* ========== Fortified (_chk) function overrides ========== + * GCC with _FORTIFY_SOURCE converts printf/snprintf/memset calls to + * __printf_chk/__snprintf_chk/__memset_chk at compile time. + * These resolve to glibc which crashes bare-metal. Provide our own. */ + +int __printf_chk(int flag, const char *fmt, ...) +{ + va_list args; + int len; + (void)flag; + va_start(args, fmt); + len = vprintf(fmt, args); + va_end(args); + return len; +} + +int __snprintf_chk(char *buf, unsigned int maxlen, int flag, + unsigned int buflen, const char *fmt, ...) +{ + va_list args; + unsigned int size = (maxlen < buflen) ? maxlen : buflen; + int ret; + (void)flag; + va_start(args, fmt); + ret = vsnprintf(buf, size, fmt, args); + va_end(args); + return ret; +} + +extern void *memset(void *s, int c, unsigned int n); +extern void *memcpy(void *dst, const void *src, unsigned int n); + +void *__memset_chk(void *s, int c, unsigned int len, unsigned int slen) +{ + (void)slen; /* skip bounds check in bare-metal */ + return memset(s, c, len); +} + +void *__memcpy_chk(void *dst, const void *src, unsigned int len, + unsigned int dstlen) +{ + (void)dstlen; + return memcpy(dst, src, len); +} + +/* ========== Heap allocator (bare-metal) ========== + * Simple allocator using _sbrk. Replaces glibc malloc/free/realloc + * which require TLS and internal glibc state not available bare-metal. */ + +void *malloc(unsigned int size) +{ + void *p = _sbrk((int)size); + if (p == (void *)-1) + return (void *)0; + return p; +} + +void free(void *ptr) +{ + (void)ptr; /* no-op: bare-metal bump allocator doesn't reclaim */ +} + +void *realloc(void *ptr, unsigned int size) +{ + void *newp; + if (!ptr) + return malloc(size); + newp = malloc(size); + if (newp) + memcpy(newp, ptr, size); /* may over-copy, but safe for bump alloc */ + return newp; +} + +/* ========== Stdio stubs ========== + * stdout and fflush referenced by wolfCrypt test/benchmark code. */ + +static int _stdout_fd = 1; +void *stdout = &_stdout_fd; +void *stderr = &_stdout_fd; + +int fflush(void *stream) +{ + (void)stream; + return 0; /* UART output is unbuffered */ +} diff --git a/test-app/wolfcrypt_support.c b/test-app/wolfcrypt_support.c index 030d746915..d15df4f036 100644 --- a/test-app/wolfcrypt_support.c +++ b/test-app/wolfcrypt_support.c @@ -42,6 +42,41 @@ #elif defined(TARGET_va416x0) /* Use Vorago SDK HAL_time_ms (incremented by SysTick_Handler every 1ms) */ extern volatile uint64_t HAL_time_ms; +#elif defined(TARGET_nxp_t2080) || defined(TARGET_nxp_t1024) + /* PPC timebase register for accurate timing. + * Timebase frequency = platform_clock / 16. */ + static uint32_t ppc_tb_hz = 0; + static unsigned long long ppc_start_ticks = 0; + + static unsigned long long ppc_get_ticks(void) + { + unsigned long hi, lo, tmp; + __asm__ volatile ( + "1: mftbu %0\n" + " mftb %1\n" + " mftbu %2\n" + " cmpw %0, %2\n" + " bne 1b\n" + : "=r"(hi), "=r"(lo), "=r"(tmp) + ); + return ((unsigned long long)hi << 32) | lo; + } + + static uint32_t ppc_get_timebase_hz(void) + { + /* Read Platform PLL ratio from CLOCKING_PLLPGSR register. + * CCSRBAR=0xFE000000, CLOCKING_BASE=CCSRBAR+0xE1000, + * PLLPGSR=CLOCKING_BASE+0xC00 */ + volatile uint32_t *pllpgsr = + (volatile uint32_t *)(0xFE000000UL + 0xE1C00UL); + uint32_t plat_ratio = ((*pllpgsr) >> 1) & 0x1F; + #if defined(BOARD_NAII_68PPC2) || defined(TARGET_nxp_t1024) + uint32_t sys_clk = 100000000; /* 100 MHz */ + #else + uint32_t sys_clk = 66666667; /* 66.66 MHz (T2080 RDB) */ + #endif + return (sys_clk * plat_ratio) / 16; + } #else /* Simple tick counter fallback */ static volatile unsigned int tick_counter = 0; @@ -61,6 +96,14 @@ unsigned long my_time(unsigned long* timer) unsigned long t = (unsigned long)(HAL_time_ms / 1000); if (timer) *timer = t; return t; +#elif defined(TARGET_nxp_t2080) || defined(TARGET_nxp_t1024) + if (ppc_tb_hz == 0) + ppc_tb_hz = ppc_get_timebase_hz(); + { + unsigned long t = (unsigned long)(ppc_get_ticks() / ppc_tb_hz); + if (timer) *timer = t; + return t; + } #else /* Simple incrementing counter */ tick_counter++; @@ -82,6 +125,12 @@ double current_time(int reset) (void)reset; /* Use Vorago SDK SysTick-based millisecond counter */ return (double)HAL_time_ms / 1000.0; +#elif defined(TARGET_nxp_t2080) || defined(TARGET_nxp_t1024) + if (ppc_tb_hz == 0) + ppc_tb_hz = ppc_get_timebase_hz(); + if (reset) + ppc_start_ticks = ppc_get_ticks(); + return (double)(ppc_get_ticks() - ppc_start_ticks) / (double)ppc_tb_hz; #else /* Simple counter-based timing */ if (reset) diff --git a/tools/scripts/nxp_t2080/t2080_debug.cmm b/tools/scripts/nxp_t2080/t2080_debug.cmm new file mode 100644 index 0000000000..8219fd61b8 --- /dev/null +++ b/tools/scripts/nxp_t2080/t2080_debug.cmm @@ -0,0 +1,184 @@ +; ------------------------------------------------------------------------------ +; @Title: NXP T2080 wolfBoot Debug Script +; @Description: +; Brings up the T2080, loads wolfBoot ELF symbols, and sets breakpoints +; for source-level debugging of wolfBoot running from NOR flash (XIP). +; @Chip: T2080 +; Based on demo scripts from Lauterbach +; ------------------------------------------------------------------------------ +; +; Prerequisites: +; - wolfBoot must already be flashed to NOR (use t2080_flash.cmm) +; - wolfboot.elf must be built with debug symbols (DEBUG=1 recommended) +; ------------------------------------------------------------------------------ + +; Base directory for wolfBoot build output (adjust to match your build path) +&basedir="." + +PRINT "========================================" +PRINT "T2080 wolfBoot Debug Session" +PRINT "========================================" +PRINT "" + +; Reset everything +RESet +SYStem.RESet + +SYStem.BdmClock 15.MHz +SYStem.CPU T2080 +SYStem.DETECT CPU +CORE.ASSIGN 1. +SYStem.Option.FREEZE ON +SYStem.Option.IMASKASM ON + +; Use RCW override to bring up the system in a controlled state. +; This halts the CPU before any flash code executes. +PRINT "Bringing up system with RCW override..." +SYStem.Mode.Prepare + +SYStem.Option.HRCWOVerRide ON + +; RCW values (matching the flash RCW) +Data.Set DBG:0x01000000 0x0c050012 +Data.Set DBG:0x01000001 0x0e000000 +Data.Set DBG:0x01000002 0x00000000 +Data.Set DBG:0x01000003 0x00000000 +Data.Set DBG:0x01000004 0xd8150002 +Data.Set DBG:0x01000005 0x00800000 +Data.Set DBG:0x01000006 0xfc027000 +Data.Set DBG:0x01000007 0xa1000000 +Data.Set DBG:0x01000008 0x00000000 +Data.Set DBG:0x01000009 0x00000000 +Data.Set DBG:0x0100000A 0x00000000 +Data.Set DBG:0x0100000B 0x0002b000 +Data.Set DBG:0x0100000C 0x00000200 +Data.Set DBG:0x0100000D 0x0080000d +Data.Set DBG:0x0100000E 0x00000000 +Data.Set DBG:0x0100000F 0x00000004 + +SYStem.Up + +SYStem.Option.HRCWOVerRide OFF + +PRINT "System up (CPU halted)" + +; Allow non-intrusive run-time memory access +SYStem.MemAccess CPU + +; NOTE: Keep CCSRBAR at default 0xFE000000 to match wolfBoot. +; wolfBoot uses CCSRBAR_DEF = 0xFE000000 in hal/nxp_ppc.h. +; Do NOT relocate CCSRBAR here. + +; Set up LAW0 for CCSR at 0xFE000000 (16 MB) +; LAWAR = ENABLE(0x80000000) | TRGT_CORENET(0x1E<<20) | SIZE_16MB(0x17) +PRINT "Setting up LAW for CCSR access..." +Data.Set ANC:IOBASE()+0x00C00 %Long %BE 0x00000000 ; LAWBARH0 = 0 +Data.Set ANC:IOBASE()+0x00C04 %Long %BE 0xFE000000 ; LAWBARL0 = 0xFE000000 +Data.Set ANC:IOBASE()+0x00C08 %Long %BE 0x81E00017 ; LAWAR0 = enable|CORENET|16 MB + +; Set up LAW1 for IFC NOR Flash at 0xE8000000 (128 MB) +; LAWAR = ENABLE(0x80000000) | TRGT_IFC(0x1F<<20) | SIZE_128MB(0x1A) +PRINT "Setting up LAW for flash access..." +Data.Set ANC:IOBASE()+0x00C10 %Long %BE 0x00000000 ; LAWBARH1 = 0 +Data.Set ANC:IOBASE()+0x00C14 %Long %BE 0xE8000000 ; LAWBARL1 = 0xE8000000 +Data.Set ANC:IOBASE()+0x00C18 %Long %BE 0x81F0001A ; LAWAR1 = enable|IFC|128 MB + +; TLB for CCSR: Entry 1, 0xFE000000, 16 MB +; MAS1: V=1(0x80000000), IPROT=1(0x40000000), TSIZE=14 (16 MB) <<7 = 0x700 +; MAS2: EPN=0xFE000000, I|G=0x0A +; MAS3: RPN=0xFE000000, SX|SW|SR=0x15 +MMU.TLB1.Set 1. 0xC0000700 0xFE00000A 0xFE000015 0x00000000 0x0 + +; TLB for Flash: Entry 2, 0xE8000000, 128 MB +; MAS1: V=1, IPROT=1, TSIZE=17 (128 MB) <<7 = 0x880 +; MAS2: EPN=0xE8000000, I|G=0x0A +; MAS3: RPN=0xE8000000, SX|SW|SR=0x15 +MMU.TLB1.Set 2. 0xC0000880 0xE800000A 0xE8000015 0x00000000 0x0 + +PRINT "CCSR (0xFE000000) and Flash (0xE8000000) configured" + +; Verify flash is accessible - check for RCW preamble +&rcw_preamble=Data.Long(D:0xE8000000) +PRINT "RCW preamble at 0xE8000000: &rcw_preamble" +IF &rcw_preamble!=0xAA55AA55 +( + PRINT %WARNING "WARNING: Expected RCW preamble 0xAA55AA55, got &rcw_preamble" + PRINT " Flash may not be properly programmed" +) +ELSE +( + PRINT " RCW preamble OK" +) + +; Verify wolfBoot code is present at expected location +&wb_first=Data.Long(D:0xEFFE0000) +PRINT "wolfBoot first word at 0xEFFE0000: &wb_first" +IF &wb_first==0xFFFFFFFF +( + PRINT %ERROR "ERROR: wolfBoot region appears erased (0xFFFFFFFF)" + PRINT " Run t2080_flash.cmm first" + STOP +) + +PRINT "" +PRINT "Loading wolfBoot symbols..." + +; Load ELF symbols (debug info only - code is already in flash, XIP) +Data.LOAD.Elf &basedir/wolfboot.elf /NoCODE +Data.LOAD.Elf &basedir/test-app/image.elf /NoCODE /NoClear +sYmbol.SourcePATH.Set &basedir &basedir/src &basedir/hal &basedir/lib/wolfssl &basedir/test-app + +PRINT " Symbols loaded from wolfboot.elf" +PRINT " Symbols loaded from test-app/image.elf" + +; Set breakpoints at key wolfBoot entry points +PRINT "" +PRINT "Setting breakpoints..." + +Break.Delete /ALL + +; Entry point - reset vector +Break.Set _reset /Program /Onchip +PRINT " BP: _reset (0xEFFFF000) - reset vector" + +; main wolfBoot entry +IF sYmbol.EXIST(main) +( + Break.Set main /Program /Onchip + PRINT " BP: main" +) + +; hal_init +IF sYmbol.EXIST(hal_init) +( + Break.Set hal_init /Program /Onchip + PRINT " BP: hal_init" +) + +PRINT "" + +; Set PC to wolfBoot reset vector +Register.Set PC 0xEFFFF000 + +PRINT "PC set to _reset (0xEFFFF000)" +PRINT "" + +PRINT "========================================" +PRINT "wolfBoot Debug Ready" +PRINT "========================================" +PRINT "" +PRINT "Key symbols:" +PRINT " _reset: 0xEFFFF000 (entry)" +PRINT " _start_vector: 0xEFFE0000" +PRINT " hal_init: (see symbol table)" +PRINT "" +PRINT "Commands:" +PRINT " Go - Run to next breakpoint" +PRINT " Step / Step.Over - Single step" +PRINT " Var.View - Inspect variable" +PRINT " Data.dump - Memory dump" +PRINT "" +PRINT "Ready - press Go to start execution" +PRINT "" + +ENDDO diff --git a/tools/scripts/nxp_t2080/t2080_flash.cmm b/tools/scripts/nxp_t2080/t2080_flash.cmm new file mode 100644 index 0000000000..48146423ef --- /dev/null +++ b/tools/scripts/nxp_t2080/t2080_flash.cmm @@ -0,0 +1,382 @@ +; ------------------------------------------------------------------------------ +; @Title: NXP T2080 NOR Flash Programming Script +; @Description: +; Uses CPC_SRAM for target-based NOR FLASH programming. +; Programs wolfBoot, signed application image, RCW, and FMAN microcode. +; @Chip: T2080 +; Based on demo scripts from Lauterbach +; ------------------------------------------------------------------------------ +; +; NOTE: NVMRO pin (GPIO 21) determines which flash bank is visible: +; NVMRO = 0 (GPIO 21 LOW) = Primary flash bank +; NVMRO = 1 (GPIO 21 HIGH) = Secondary flash bank +; +; This script operates on whichever flash bank is selected by NVMRO. +; Set NVMRO appropriately and power cycle before running this script. +; ------------------------------------------------------------------------------ + +; Base directory for wolfBoot build output (adjust to match your build path) +&basedir="." + +; FLASH Number of banks (1 or 2) +&flashBanks=2 + +; FLASH parameters +&waitstates=6 + +; PART 1: Set up MMU, IFC and CPC for target-based flash programming +; ------------------------------------------------------------------------------ + +PRINT "========================================" +PRINT "wolfBoot T2080 Flash Programming" +PRINT "========================================" +PRINT "" + +SYStem.RESet +SYStem.BdmClock 15.MHz +SYStem.CPU T2080 +SYStem.DETECT CPU +CORE.ASSIGN 1. +SYStem.Option.FREEZE OFF + +; Use RCW override to prevent flash RCW from running before we are ready. +; This keeps the system halted for configuration. +PRINT "Initializing with temporary RCW override..." +SYStem.Mode.Prepare + +SYStem.Option.HRCWOVerRide ON + +; Load RCW (known-working values for T2080) +Data.Set DBG:0x01000000 0x0c050012 +Data.Set DBG:0x01000001 0x0e000000 +Data.Set DBG:0x01000002 0x00000000 +Data.Set DBG:0x01000003 0x00000000 +Data.Set DBG:0x01000004 0xd8150002 +Data.Set DBG:0x01000005 0x00800000 +Data.Set DBG:0x01000006 0xfc027000 +Data.Set DBG:0x01000007 0xa1000000 +Data.Set DBG:0x01000008 0x00000000 +Data.Set DBG:0x01000009 0x00000000 +Data.Set DBG:0x0100000A 0x00000000 +Data.Set DBG:0x0100000B 0x0002b000 +Data.Set DBG:0x0100000C 0x00000200 +Data.Set DBG:0x0100000D 0x0080000d +Data.Set DBG:0x0100000E 0x00000000 +Data.Set DBG:0x0100000F 0x00000004 + +SYStem.Up + +SYStem.Option.HRCWOVerRide OFF + +PRINT "System initialized (halted state with temporary RCW)" + +; Set CCSRBAR to 0x40000000 +Data.Set ANC:IOBASE()+0x00004 %Long 0x40000000 +Data.Set ANC:IOBASE()+0x00008 %Long 0x80000000 ; commit +Data.Set ANC:IOBASE()+0x00008 %Long 0x00000000 + +; Set up local access window: 0xC0000000--0xFFFFFFFF +Data.Set ANC:IOBASE()+0x00C00 %Long %BE 0x00000000 +Data.Set ANC:IOBASE()+0x00C04 %Long %BE 0xC0000000 +Data.Set ANC:IOBASE()+0x00C08 %Long %BE 0x81F0001D + +; Set up local access window: 0x10000000--0x1007FFFF (CPC memory complex 1) +Data.Set ANC:IOBASE()+0x00C10 %Long %BE 0x00000000 +Data.Set ANC:IOBASE()+0x00C14 %Long %BE 0x10000000 +Data.Set ANC:IOBASE()+0x00C18 %Long %BE 0x81000012 + +; Set up local access window: 0x10080000--0x100FFFFF (CPC memory complex 2) +Data.Set ANC:IOBASE()+0x00C20 %Long %BE 0x00000000 +Data.Set ANC:IOBASE()+0x00C24 %Long %BE 0x10080000 +Data.Set ANC:IOBASE()+0x00C28 %Long %BE 0x81100012 + +; Enable CoreNet Platform Cache (CPC) as SRAM +; Block 1, memory complex 1 +Data.Set ANC:IOBASE()+0x10000 %Long %BE 0x80200000 +Data.Set ANC:IOBASE()+0x10100 %Long %BE 0x00000000 +Data.Set ANC:IOBASE()+0x10104 %Long %BE 0x10000009 ; 512 kB, start at 0x10000000 +; Block 2, memory complex 2 +Data.Set ANC:IOBASE()+0x11000 %Long %BE 0x80200000 +Data.Set ANC:IOBASE()+0x11100 %Long %BE 0x00000000 +Data.Set ANC:IOBASE()+0x11104 %Long %BE 0x10080009 ; 512 kB, start at 0x10080000 + +; TLB entry for FLASH 0xC0000000--0xFFFFFFFF (cache-inhibited, guarded) +MMU.TLB1.Set 0. 0xC0000A00 0xC000000A 0xC0000015 0x00000000 0x0 + +; TLB entry for CPC-SRAM 0x10000000--0x100FFFFF +MMU.TLB1.Set 1. 0x80000500 0x10000002 0x10000015 0x00000000 0x0 + +; PART 2: FLASH setup (chip select and debugger) +; ------------------------------------------------------------------------------ + +; Initial FLASH base address (8 MB) +&flashbase=0xEF800000 +; End of flash region +&flashend=0xEFFFFFFF + +; Set up initial chip select for FLASH memory +Data.Set ANC:IOBASE.ADDRESS()+0x00124010 %Long (Data.Long(ANC:IOBASE.ADDRESS()+0x00124010)&0x0000FFBF)|&flashbase + +; Get port width +&portwidth=(Data.Long(ANC:IOBASE.ADDRESS()+0x00124010)>>7.)&0x00000003 + +IF &portwidth==1 + &flashwidth="BYTE" +ELSE IF &portwidth==2 + &flashwidth="WORD" +ELSE +( + PRINT %ERROR "ERROR: invalid IFC_CS0 port width" + STOP +) + +; Get FLASH size +&flashsize=FLASH.CFI.SIZE(ANC:&flashbase,&flashwidth) +IF (&flashsize==0) +( + PRINT %ERROR "ERROR: FLASH module could not be detected" + STOP +) + +; Adjust for dual-bank devices +&flashsize=&flashsize/&flashBanks + +; Calculate real FLASH base address +&flashbase=(&flashend+1)-&flashsize + +; Set up true chip select for FLASH memory +Data.Set ANC:IOBASE.ADDRESS()+0x00124010 %Long (Data.Long(ANC:IOBASE.ADDRESS()+0x00124010)&0x0000FFFF)|&flashbase + +; Declare FLASH +FLASH.RESet +; Flash target: code at 0x10000000, data at 0x10002000, buffer 0x4000 +FLASH.CFI &flashbase &flashwidth /TARGET 0x10000000 0x10002000 0x4000 /DualPort + +&sizemb=FORMAT.Decimal(0,&flashsize>>20.) +PRINT "Configured Flash at address &flashbase, &sizemb MBytes." + +; ============================================================================ +; PART 3: Interactive Flash Programming Options +; ============================================================================ + +PRINT "" +PRINT "========================================" +PRINT "Flash Programming Options" +PRINT "========================================" +PRINT "" +PRINT "NOTE: NVMRO pin (GPIO 21) determines which flash bank is programmed" +PRINT "" + +; ============================================================================ +; Option 1: Flash wolfBoot components (most common operation) +; ============================================================================ +DIALOG.YESNO "Flash wolfBoot components (RCW + FMAN + wolfBoot + test app)?" +ENTRY &flashwolfboot +IF &flashwolfboot +( + PRINT "" + PRINT "========================================" + PRINT "Flashing wolfBoot Components" + PRINT "========================================" + PRINT "" + + ; -------------------------------------------------------------------------- + ; 1. RCW (Reset Configuration Word) - CRITICAL for boot + ; -------------------------------------------------------------------------- + PRINT "1. Programming RCW (Reset Configuration Word)..." + PRINT " Source: &basedir/t2080_rcw.bin" + PRINT " Address: 0xE8000000 (64 KB)" + FLASH.UNLOCK 0xE8000000--0xE800FFFF + FLASH.ReProgram 0xE8000000--0xE800FFFF /Erase + Data.LOAD.binary &basedir/t2080_rcw.bin 0xE8000000 + FLASH.ReProgram.off + Data.LOAD.binary &basedir/t2080_rcw.bin 0xE8000000 /Verify + PRINT " RCW programmed and verified" + PRINT "" + + ; -------------------------------------------------------------------------- + ; 2. FMAN Microcode - Required for networking + ; -------------------------------------------------------------------------- + PRINT "2. Programming FMAN microcode..." + PRINT " Source: &basedir/t2080_fman.bin" + PRINT " Address: 0xE8020000 (128 KB)" + FLASH.UNLOCK 0xE8020000--0xE803FFFF + FLASH.ReProgram 0xE8020000--0xE803FFFF /Erase + Data.LOAD.binary &basedir/t2080_fman.bin 0xE8020000 + FLASH.ReProgram.off + Data.LOAD.binary &basedir/t2080_fman.bin 0xE8020000 /Verify + PRINT " FMAN microcode programmed and verified" + PRINT "" + + ; -------------------------------------------------------------------------- + ; 3. wolfBoot Bootloader + ; -------------------------------------------------------------------------- + PRINT "3. Programming wolfBoot bootloader..." + PRINT " Source: &basedir/wolfboot.bin" + PRINT " Address: 0xEFFE0000 (128 KB)" + FLASH.UNLOCK 0xEFFE0000--0xEFFFFFFF + FLASH.ReProgram 0xEFFE0000--0xEFFFFFFF /Erase + Data.LOAD.binary &basedir/wolfboot.bin 0xEFFE0000 + FLASH.ReProgram.off + Data.LOAD.binary &basedir/wolfboot.bin 0xEFFE0000 /Verify + PRINT " wolfBoot programmed and verified" + PRINT "" + + ; -------------------------------------------------------------------------- + ; 4. Test Application (Boot Partition) + ; -------------------------------------------------------------------------- + PRINT "4. Programming test application..." + PRINT " Source: &basedir/test-app/image_v1_signed.bin" + PRINT " Address: 0xEFEE0000 (WOLFBOOT_PARTITION_BOOT_ADDRESS, 1 MB)" + FLASH.UNLOCK 0xEFEE0000--0xEFFDFFFF + FLASH.ReProgram 0xEFEE0000--0xEFFDFFFF /Erase + Data.LOAD.binary &basedir/test-app/image_v1_signed.bin 0xEFEE0000 + FLASH.ReProgram.off + Data.LOAD.binary &basedir/test-app/image_v1_signed.bin 0xEFEE0000 /Verify + PRINT " Test application programmed and verified" + PRINT "" + + PRINT "" + PRINT "========================================" + PRINT "wolfBoot Flash Programming Complete!" + PRINT "========================================" + PRINT "" + PRINT "Flash Layout:" + PRINT " 0xE8000000: RCW (64 KB)" + PRINT " 0xE8020000: FMAN Microcode (128 KB)" + PRINT " 0xEFEE0000: Test Application (boot partition, 1 MB)" + PRINT " 0xEFFDFFFF: End of boot partition" + PRINT " 0xEFFE0000: wolfBoot (128 KB)" + PRINT "" + + PRINT "Verification - First bytes of each component:" + PRINT "" + PRINT "RCW at 0xE8000000 (should be AA 55 AA 55):" + Data.dump 0xE8000000++0x3F /Long + PRINT "" + PRINT "wolfBoot at 0xEFFE0000:" + Data.dump 0xEFFE0000++0x3F /Long + PRINT "" + PRINT "Test app at 0xEFEE0000:" + Data.dump 0xEFEE0000++0x3F /Long + PRINT "" + + GOTO flash_complete +) +ELSE +( + PRINT "" + PRINT "Skipping wolfBoot component flashing" + PRINT "" +) + +; ============================================================================ +; Option 2: Backup current flash bank +; ============================================================================ +PRINT "" +PRINT "========================================" +PRINT "Optional: Backup Current Flash Bank" +PRINT "========================================" +PRINT "" + +DIALOG.YESNO "Backup current flash bank to &basedir/t2080_backup.bin?" +ENTRY &dobackup +IF &dobackup +( + PRINT "" + PRINT "========================================" + PRINT "Backing Up Flash" + PRINT "========================================" + PRINT "" + PRINT "Saving to: &basedir/t2080_backup.bin" + PRINT "Saving flash (128 MB) - this will take several minutes..." + PRINT "" + + Data.SAVE.Binary &basedir/t2080_backup.bin 0xE8000000--0xEFFFFFFF + + PRINT "Flash backup saved to: &basedir/t2080_backup.bin" + PRINT "" + + PRINT "First 64 bytes at 0xE8000000 (RCW area):" + Data.dump 0xE8000000++0x3F /Long + PRINT "" + + PRINT "========================================" + PRINT "Backup Complete!" + PRINT "========================================" + PRINT "" + PRINT "Verify with: hexdump -C t2080_backup.bin | head -n 8" + PRINT "Valid flash should start with 'AA 55 AA 55' (RCW preamble)" + PRINT "" + + GOTO flash_complete +) +ELSE +( + PRINT "" + PRINT "Skipping flash backup" + PRINT "" +) + +; ============================================================================ +; Option 3: Restore full backup (t2080_backup.bin) +; ============================================================================ +DIALOG.YESNO "Restore full backup from &basedir/t2080_backup.bin (128 MB)?" +ENTRY &restorebackup +IF &restorebackup +( + PRINT "" + PRINT "========================================" + PRINT "Restoring Full Flash Backup" + PRINT "========================================" + PRINT "" + PRINT "Source: &basedir/t2080_backup.bin" + PRINT "Target: 0xE8000000--0xEFFFFFFF (128 MB)" + PRINT "This will take several minutes..." + PRINT "" + + PRINT "Unlocking flash..." + FLASH.UNLOCK 0xE8000000--0xEFFFFFFF + + FLASH.ReProgram 0xE8000000--0xEFFFFFFF /Erase + Data.LOAD.binary &basedir/t2080_backup.bin 0xE8000000 + FLASH.ReProgram.off + + PRINT "" + PRINT "Verifying restored backup..." + Data.LOAD.binary &basedir/t2080_backup.bin 0xE8000000 /Verify + + PRINT "" + PRINT "Full backup restored and verified" + PRINT "" + + PRINT "RCW at 0xE8000000 (should be AA 55 AA 55):" + Data.dump 0xE8000000++0x3F /Long + PRINT "" + + GOTO flash_complete +) +ELSE +( + PRINT "" + PRINT "No flash operation selected" + PRINT "" +) + +flash_complete: + +PRINT "" +PRINT "========================================" +PRINT "Script Complete!" +PRINT "========================================" +PRINT "" +PRINT "Next steps:" +PRINT " 1. Disconnect Lauterbach" +PRINT " 2. Power cycle the board" +PRINT " 3. Monitor UART for boot output" +PRINT "" +PRINT "Expected UART output after wolfBoot flash:" +PRINT " wolfBoot init" +PRINT "" + +ENDDO