Table of Contents

  1. ARM Toolchain Setup
  2. Toolchain File Structure
  3. Linker Scripts Integration
  4. Startup Code Configuration
  5. Memory Map Configuration
  6. Binary Output Formats
  7. Flash Programming
  8. STM32 HAL Integration
  9. CMSIS Configuration
  10. Debugging with GDB + OpenOCD
  11. Size Optimization Flags
Back to CMake Mastery Series

Embedded ARM (Bare Metal)

June 4, 2026 Wasil Zafar 14 min read

A complete platform guide for building bare-metal ARM firmware with CMake — toolchain files for arm-none-eabi-gcc, linker script integration, memory maps, binary output formats, flash programming with OpenOCD and J-Link, and STM32 HAL/CMSIS configuration.

ARM Toolchain Setup

Bare-metal ARM development uses the arm-none-eabi toolchain — a cross-compiler that produces code for ARM Cortex-M and Cortex-R processors without an operating system. Unlike hosted compilers, this toolchain links against newlib (or newlib-nano) rather than glibc, producing standalone firmware binaries.

# Install on Ubuntu/Debian
sudo apt install gcc-arm-none-eabi binutils-arm-none-eabi \
    libnewlib-arm-none-eabi gdb-multiarch openocd

# Install on macOS (Homebrew)
brew install --cask gcc-arm-embedded
brew install open-ocd

# Install on Windows (Chocolatey)
choco install gcc-arm-embedded

# Verify installation
arm-none-eabi-gcc --version
arm-none-eabi-objcopy --version
openocd --version
Toolchain Versioning: Pin the toolchain version in your project documentation. ARM GCC releases (e.g., 13.2.rel1) can introduce code generation differences. Use the official ARM GNU Toolchain downloads for reproducible builds.

Toolchain File Structure

CMake requires a toolchain file to cross-compile for ARM. This file sets CMAKE_SYSTEM_NAME to Generic (indicating no OS), specifies the compiler paths, and disables the default compiler test (which would fail without a linker script).

# arm-none-eabi-toolchain.cmake
# Complete toolchain file for ARM Cortex-M bare-metal development

set(CMAKE_SYSTEM_NAME Generic)
set(CMAKE_SYSTEM_PROCESSOR arm)

# Toolchain prefix — adjust path if not in system PATH
set(TOOLCHAIN_PREFIX arm-none-eabi-)

# Find the toolchain executables
find_program(CMAKE_C_COMPILER ${TOOLCHAIN_PREFIX}gcc)
find_program(CMAKE_CXX_COMPILER ${TOOLCHAIN_PREFIX}g++)
find_program(CMAKE_ASM_COMPILER ${TOOLCHAIN_PREFIX}gcc)
find_program(CMAKE_AR ${TOOLCHAIN_PREFIX}ar)
find_program(CMAKE_OBJCOPY ${TOOLCHAIN_PREFIX}objcopy)
find_program(CMAKE_OBJDUMP ${TOOLCHAIN_PREFIX}objdump)
find_program(CMAKE_SIZE ${TOOLCHAIN_PREFIX}size)

# Prevent CMake from testing the compilers (no runtime available)
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)

# CPU-specific flags (Cortex-M4 with FPU example)
set(CPU_FLAGS "-mcpu=cortex-m4 -mthumb -mfpu=fpv4-sp-d16 -mfloat-abi=hard")

set(CMAKE_C_FLAGS_INIT "${CPU_FLAGS}")
set(CMAKE_CXX_FLAGS_INIT "${CPU_FLAGS}")
set(CMAKE_ASM_FLAGS_INIT "${CPU_FLAGS}")

# Optimization defaults per build type
set(CMAKE_C_FLAGS_DEBUG_INIT "-Og -g3 -DDEBUG")
set(CMAKE_C_FLAGS_RELEASE_INIT "-Os -DNDEBUG")
set(CMAKE_C_FLAGS_MINSIZEREL_INIT "-Os -DNDEBUG -flto")
set(CMAKE_C_FLAGS_RELWITHDEBINFO_INIT "-Os -g -DNDEBUG")

# Disable shared libraries (not supported on bare metal)
set(BUILD_SHARED_LIBS OFF)
set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "")
set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")

# Search paths — only look in toolchain sysroot
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
# Configure with the toolchain file
cmake -G Ninja \
    -DCMAKE_TOOLCHAIN_FILE=cmake/arm-none-eabi-toolchain.cmake \
    -DCMAKE_BUILD_TYPE=Release \
    -S . -B build

cmake --build build
Critical: Always set CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY in bare-metal toolchain files. Without this, CMake's compiler test attempts to link an executable, which fails without a linker script — causing cryptic "compiler broken" errors.

Linker Scripts Integration

Linker scripts define the memory layout of your target MCU — specifying where code (.text), initialized data (.data), and uninitialized data (.bss) reside. CMake must pass the linker script to the compiler via -T flag.

cmake_minimum_required(VERSION 3.21)
project(FirmwareProject LANGUAGES C CXX ASM)

# Linker script path (relative to project root)
set(LINKER_SCRIPT ${CMAKE_SOURCE_DIR}/linker/STM32F407VGTx_FLASH.ld)

add_executable(firmware
    src/main.c
    src/system_init.c
    startup/startup_stm32f407xx.s
)

# Pass linker script and garbage-collect unused sections
target_link_options(firmware PRIVATE
    -T${LINKER_SCRIPT}
    -Wl,--gc-sections
    -Wl,--print-memory-usage
    -Wl,-Map=${CMAKE_BINARY_DIR}/firmware.map
    --specs=nano.specs
    --specs=nosys.specs
    -lc -lm -lnosys
)

# Ensure rebuild when linker script changes
set_target_properties(firmware PROPERTIES
    LINK_DEPENDS ${LINKER_SCRIPT}
    SUFFIX ".elf"
)
LINK_DEPENDS: Setting LINK_DEPENDS on the target ensures CMake re-links the firmware when the linker script is modified. Without this property, changes to memory layout won't trigger a rebuild.

Startup Code Configuration

ARM Cortex-M devices require startup code that initializes the vector table, copies .data from Flash to RAM, zeros .bss, and calls main(). CMake must compile .s assembly files with the ASM compiler.

cmake_minimum_required(VERSION 3.21)
project(CortexM4App LANGUAGES C CXX ASM)

# Enable assembly language
enable_language(ASM)

# Startup file — vendor-provided or custom
set(STARTUP_FILE ${CMAKE_SOURCE_DIR}/startup/startup_stm32f407xx.s)

add_executable(app
    ${STARTUP_FILE}
    src/main.c
    src/system_stm32f4xx.c
    src/syscalls.c
)

# Include paths for CMSIS and device headers
target_include_directories(app PRIVATE
    ${CMAKE_SOURCE_DIR}/include
    ${CMAKE_SOURCE_DIR}/CMSIS/Include
    ${CMAKE_SOURCE_DIR}/CMSIS/Device/ST/STM32F4xx/Include
)

# Define the target MCU (used by STM32 headers)
target_compile_definitions(app PRIVATE
    STM32F407xx
    USE_HAL_DRIVER
    HSE_VALUE=8000000U
)

Memory Map Configuration

The linker script defines the physical memory regions of your MCU. Here's a typical layout for an STM32F407 with 1MB Flash and 192KB RAM:

/* STM32F407VGTx_FLASH.ld — Memory regions */
MEMORY
{
    FLASH (rx)      : ORIGIN = 0x08000000, LENGTH = 1024K
    RAM (xrw)       : ORIGIN = 0x20000000, LENGTH = 128K
    CCMRAM (xrw)    : ORIGIN = 0x10000000, LENGTH = 64K
}

/* Entry point */
ENTRY(Reset_Handler)

/* Stack and heap sizes */
_Min_Heap_Size = 0x200;
_Min_Stack_Size = 0x400;

SECTIONS
{
    .isr_vector :
    {
        . = ALIGN(4);
        KEEP(*(.isr_vector))
        . = ALIGN(4);
    } >FLASH

    .text :
    {
        . = ALIGN(4);
        *(.text)
        *(.text*)
        *(.rodata)
        *(.rodata*)
        . = ALIGN(4);
        _etext = .;
    } >FLASH

    .data :
    {
        . = ALIGN(4);
        _sdata = .;
        *(.data)
        *(.data*)
        . = ALIGN(4);
        _edata = .;
    } >RAM AT> FLASH

    .bss :
    {
        . = ALIGN(4);
        _sbss = .;
        *(.bss)
        *(.bss*)
        *(COMMON)
        . = ALIGN(4);
        _ebss = .;
    } >RAM
}
Embedded Memory Optimization Scenario

On a Cortex-M4 with 128KB RAM, placing lookup tables in Flash (.rodata) instead of RAM saves precious SRAM for stack and heap. Use const qualifiers on all read-only data, and verify placement with arm-none-eabi-size --format=SysV firmware.elf to audit section allocation.

Memory Layout Flash vs RAM Cortex-M4

Binary Output Formats

Flash programmers require firmware in specific binary formats — Intel HEX for most tools, raw BIN for mass storage bootloaders, and ELF for debuggers. CMake's add_custom_command automates conversion from the ELF output.

cmake_minimum_required(VERSION 3.21)
project(FirmwareBuild LANGUAGES C ASM)

add_executable(firmware src/main.c startup/startup.s)

# ... (linker script and compile options as above) ...

# Generate HEX file from ELF
add_custom_command(TARGET firmware POST_BUILD
    COMMAND ${CMAKE_OBJCOPY} -O ihex
        $ ${CMAKE_BINARY_DIR}/firmware.hex
    COMMENT "Generating Intel HEX: firmware.hex"
)

# Generate raw BIN file from ELF
add_custom_command(TARGET firmware POST_BUILD
    COMMAND ${CMAKE_OBJCOPY} -O binary
        $ ${CMAKE_BINARY_DIR}/firmware.bin
    COMMENT "Generating raw binary: firmware.bin"
)

# Print section sizes after build
add_custom_command(TARGET firmware POST_BUILD
    COMMAND ${CMAKE_SIZE} --format=berkeley $
    COMMENT "Firmware size summary:"
)

# Generate disassembly for inspection
add_custom_command(TARGET firmware POST_BUILD
    COMMAND ${CMAKE_OBJDUMP} -d -S $
        > ${CMAKE_BINARY_DIR}/firmware.lst
    COMMENT "Generating disassembly listing"
)

Flash Programming

CMake custom targets provide one-command flash programming. Both OpenOCD and J-Link Commander integrate seamlessly.

# OpenOCD flash target
add_custom_target(flash
    COMMAND openocd
        -f interface/stlink-v2.cfg
        -f target/stm32f4x.cfg
        -c "program ${CMAKE_BINARY_DIR}/firmware.elf verify reset exit"
    DEPENDS firmware
    COMMENT "Flashing firmware via OpenOCD (ST-Link V2)"
    VERBATIM
)

# J-Link flash target (using J-Link Commander)
set(JLINK_DEVICE "STM32F407VG")
configure_file(
    ${CMAKE_SOURCE_DIR}/tools/flash.jlink.in
    ${CMAKE_BINARY_DIR}/flash.jlink
    @ONLY
)

add_custom_target(flash-jlink
    COMMAND JLinkExe -device ${JLINK_DEVICE}
        -if SWD -speed 4000 -autoconnect 1
        -CommandFile ${CMAKE_BINARY_DIR}/flash.jlink
    DEPENDS firmware
    COMMENT "Flashing firmware via J-Link"
    VERBATIM
)

# Erase target (useful for factory reset)
add_custom_target(erase
    COMMAND openocd
        -f interface/stlink-v2.cfg
        -f target/stm32f4x.cfg
        -c "init; reset halt; flash erase_address 0x08000000 0x100000; exit"
    COMMENT "Erasing entire Flash memory"
    VERBATIM
)
# flash.jlink.in — J-Link Commander script template
r
h
loadfile @CMAKE_BINARY_DIR@/firmware.hex
verifybin @CMAKE_BINARY_DIR@/firmware.bin, 0x08000000
r
g
exit
# Usage: configure, build, and flash in one sequence
cmake --preset arm-release
cmake --build build --target firmware
cmake --build build --target flash

STM32 HAL Integration

The STM32 Hardware Abstraction Layer (HAL) is distributed as source files generated by STM32CubeMX. CMake can manage these sources cleanly as a static library.

cmake_minimum_required(VERSION 3.21)
project(STM32HALProject LANGUAGES C ASM)

# HAL driver sources (generated by STM32CubeMX)
set(HAL_SRC_DIR ${CMAKE_SOURCE_DIR}/Drivers/STM32F4xx_HAL_Driver/Src)

# Collect only the HAL modules you use (avoid bloat)
set(HAL_SOURCES
    ${HAL_SRC_DIR}/stm32f4xx_hal.c
    ${HAL_SRC_DIR}/stm32f4xx_hal_cortex.c
    ${HAL_SRC_DIR}/stm32f4xx_hal_gpio.c
    ${HAL_SRC_DIR}/stm32f4xx_hal_rcc.c
    ${HAL_SRC_DIR}/stm32f4xx_hal_uart.c
    ${HAL_SRC_DIR}/stm32f4xx_hal_dma.c
    ${HAL_SRC_DIR}/stm32f4xx_hal_tim.c
    ${HAL_SRC_DIR}/stm32f4xx_hal_spi.c
    ${HAL_SRC_DIR}/stm32f4xx_hal_i2c.c
)

# Build HAL as a static library
add_library(stm32_hal STATIC ${HAL_SOURCES})

target_include_directories(stm32_hal PUBLIC
    ${CMAKE_SOURCE_DIR}/Drivers/STM32F4xx_HAL_Driver/Inc
    ${CMAKE_SOURCE_DIR}/Drivers/CMSIS/Include
    ${CMAKE_SOURCE_DIR}/Drivers/CMSIS/Device/ST/STM32F4xx/Include
    ${CMAKE_SOURCE_DIR}/Core/Inc  # stm32f4xx_hal_conf.h lives here
)

target_compile_definitions(stm32_hal PUBLIC
    STM32F407xx
    USE_HAL_DRIVER
)

# Suppress HAL warnings (vendor code quality varies)
target_compile_options(stm32_hal PRIVATE -w)

# Main application links against HAL
add_executable(app
    Core/Src/main.c
    Core/Src/stm32f4xx_it.c
    Core/Src/system_stm32f4xx.c
    Core/Startup/startup_stm32f407vgtx.s
)

target_link_libraries(app PRIVATE stm32_hal)
HAL Bloat Warning: Avoid globbing all HAL sources with file(GLOB ...). Each unused HAL module adds 1–8KB to your binary. Explicitly list only the peripheral drivers your project actually uses — the linker's --gc-sections helps but cannot remove entire translation units.

CMSIS Configuration

CMSIS (Cortex Microcontroller Software Interface Standard) provides a vendor-independent hardware abstraction layer. It includes the core register definitions, startup code templates, and DSP libraries.

cmake_minimum_required(VERSION 3.21)
project(CMSISProject LANGUAGES C ASM)

# CMSIS-Core (required for all Cortex-M projects)
add_library(cmsis_core INTERFACE)
target_include_directories(cmsis_core INTERFACE
    ${CMAKE_SOURCE_DIR}/CMSIS/Core/Include
    ${CMAKE_SOURCE_DIR}/CMSIS/Device/ST/STM32F4xx/Include
)

# CMSIS-DSP library (optional — for signal processing)
add_library(cmsis_dsp STATIC
    CMSIS/DSP/Source/BasicMathFunctions/BasicMathFunctions.c
    CMSIS/DSP/Source/FastMathFunctions/FastMathFunctions.c
    CMSIS/DSP/Source/FilteringFunctions/FilteringFunctions.c
    CMSIS/DSP/Source/TransformFunctions/TransformFunctions.c
    CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c
)

target_include_directories(cmsis_dsp PUBLIC
    ${CMAKE_SOURCE_DIR}/CMSIS/DSP/Include
)

target_compile_definitions(cmsis_dsp PUBLIC
    ARM_MATH_CM4
    __FPU_PRESENT=1
)

target_link_libraries(cmsis_dsp PUBLIC cmsis_core)

# Application using CMSIS-DSP
add_executable(dsp_app src/main.c src/signal_processing.c)
target_link_libraries(dsp_app PRIVATE cmsis_core cmsis_dsp)

Debugging with GDB + OpenOCD

Remote debugging with GDB connects to the target via OpenOCD's GDB server. CMake can create convenient debug targets that launch both the server and client.

# Start OpenOCD GDB server
add_custom_target(debug-server
    COMMAND openocd
        -f interface/stlink-v2.cfg
        -f target/stm32f4x.cfg
    COMMENT "Starting OpenOCD GDB server on :3333"
    USES_TERMINAL
)

# Launch GDB with firmware symbols
add_custom_target(debug
    COMMAND gdb-multiarch
        -ex "target remote :3333"
        -ex "monitor reset halt"
        -ex "load"
        -ex "break main"
        -ex "continue"
        $
    DEPENDS firmware
    COMMENT "Starting GDB debug session"
    USES_TERMINAL
)
# Terminal 1: Start OpenOCD server
cmake --build build --target debug-server

# Terminal 2: Connect GDB
cmake --build build --target debug

# Or manually with arm-none-eabi-gdb
arm-none-eabi-gdb build/firmware.elf \
    -ex "target remote :3333" \
    -ex "monitor reset halt" \
    -ex "load"
Embedded Semihosting Debug Output

For printf debugging without UART, enable ARM semihosting — the debugger intercepts SVC calls and routes output to your terminal. Add --specs=rdimon.specs to linker flags and call initialise_monitor_handles() before any I/O. OpenOCD enables this with monitor arm semihosting enable.

Semihosting Printf Debug OpenOCD

Size Optimization Flags

Embedded targets have strict Flash and RAM constraints. CMake's build types combined with GCC-specific flags minimize firmware footprint.

cmake_minimum_required(VERSION 3.21)
project(SizeOptimized LANGUAGES C CXX ASM)

add_executable(firmware src/main.c startup/startup.s)

# Size-critical compiler flags
target_compile_options(firmware PRIVATE
    -Os                     # Optimize for size
    -ffunction-sections     # Each function in its own section
    -fdata-sections         # Each variable in its own section
    -fno-common             # Don't merge uninitialized globals
    -fno-exceptions         # No C++ exceptions (saves ~20KB)
    -fno-rtti               # No runtime type info (saves ~5KB)
    -fno-unwind-tables      # No stack unwinding tables
    -fshort-enums           # Use smallest type for enums
    -Wall -Wextra
)

# Size-critical linker flags
target_link_options(firmware PRIVATE
    -Wl,--gc-sections       # Remove unused sections
    -Wl,--print-memory-usage
    --specs=nano.specs      # Use newlib-nano (smaller libc)
    --specs=nosys.specs     # No OS syscalls
    -lc -lm -lnosys
)

# LTO for Release builds (cross-module dead code elimination)
if(CMAKE_BUILD_TYPE STREQUAL "Release" OR
   CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
    target_compile_options(firmware PRIVATE -flto)
    target_link_options(firmware PRIVATE -flto)
endif()

# Print detailed size after build
add_custom_command(TARGET firmware POST_BUILD
    COMMAND ${CMAKE_SIZE} --format=sysv $
    COMMENT "Detailed section sizes:"
)
# Typical size output for a minimal blinky project:
#    text    data     bss     dec     hex filename
#    3284      12    1568    4864    1300 firmware.elf

# With full HAL + RTOS:
#    text    data     bss     dec     hex filename
#   42680    1124    8192   51996    CB1C firmware.elf
nano.specs vs nosys.specs: nano.specs uses newlib-nano — a minimal C library without float printf support (saves ~30KB). Add -u _printf_float to linker flags only if you need printf with %f. nosys.specs provides stub syscalls (no filesystem, no process management).