What is Reproducibility?
A reproducible build guarantees that given the same source code, build instructions, and environment specification, any machine produces a bit-for-bit identical binary. This isn't just academic — it's essential for security auditing, debugging production issues, and regulatory compliance. The CMake Presets documentation provides the foundation for codifying build configurations.
Sources of Non-Determinism
Common reasons builds differ across machines or over time:
- Timestamps:
__DATE__,__TIME__macros, file modification times embedded in binaries - Absolute paths:
__FILE__macro, debug info containing build directory paths - Randomization: Hash table iteration order, UUID generation during build
- Compiler version drift: Patch updates changing optimization strategies
- Dependency updates: Unpinned dependencies pulling new versions
- Filesystem ordering: Source file enumeration varying between systems
CMake Presets for Team-Wide Configuration
CMake Presets (CMakePresets.json) codify configure, build, and test settings in a version-controlled file that every team member and CI system uses identically:
{
"version": 6,
"cmakeMinimumRequired": { "major": 3, "minor": 25, "patch": 0 },
"configurePresets": [
{
"name": "base",
"hidden": true,
"generator": "Ninja",
"binaryDir": "${sourceDir}/build/${presetName}",
"cacheVariables": {
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
"CMAKE_EXPORT_NO_PACKAGE_REGISTRY": "ON",
"FETCHCONTENT_FULLY_DISCONNECTED": "OFF"
}
},
{
"name": "release",
"inherits": "base",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release",
"CMAKE_INTERPROCEDURAL_OPTIMIZATION": "ON"
}
},
{
"name": "debug",
"inherits": "base",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug",
"CMAKE_CXX_FLAGS": "-fsanitize=address,undefined"
}
},
{
"name": "ci-linux",
"inherits": "release",
"toolchainFile": "${sourceDir}/cmake/toolchains/gcc-13-linux.cmake",
"environment": {
"CCACHE_DIR": "/tmp/ccache",
"SOURCE_DATE_EPOCH": "0"
}
}
],
"buildPresets": [
{
"name": "release",
"configurePreset": "release",
"jobs": 0
}
],
"testPresets": [
{
"name": "release",
"configurePreset": "release",
"output": { "verbosity": "verbose" },
"execution": { "jobs": 8, "timeout": 300 }
}
]
}
Preset Inheritance
# Use presets from the command line
cmake --preset ci-linux
cmake --build --preset release
ctest --preset release
# List available presets
cmake --list-presets
# User-specific overrides go in CMakeUserPresets.json (gitignored)
# This allows personal paths/settings without affecting the team
flowchart TD
A[base preset - hidden] --> B[release]
A --> C[debug]
A --> D[coverage]
B --> E[ci-linux]
B --> F[ci-windows]
B --> G[ci-macos]
E --> H[ci-linux-gcc13]
E --> I[ci-linux-clang18]
Docker Build Environments
Multi-Stage Dockerfile
A multi-stage Dockerfile pins every tool version and produces a minimal build image:
# Dockerfile for reproducible C++ builds
FROM ubuntu:24.04 AS build-base
# Pin exact package versions
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc-13=13.2.0-* \
g++-13=13.2.0-* \
cmake=3.28.* \
ninja-build=1.11.* \
ccache=4.9.* \
git=1:2.43.* \
&& rm -rf /var/lib/apt/lists/*
# Set GCC 13 as default
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-13 100 \
&& update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-13 100
# Configure ccache
ENV CCACHE_DIR=/cache/ccache
ENV CCACHE_MAXSIZE=2G
ENV CMAKE_C_COMPILER_LAUNCHER=ccache
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
WORKDIR /src
# Build stage
FROM build-base AS builder
COPY . /src
RUN cmake --preset ci-linux && cmake --build --preset release
# Runtime stage (minimal)
FROM ubuntu:24.04 AS runtime
COPY --from=builder /src/build/ci-linux/bin/myapp /usr/local/bin/
ENTRYPOINT ["myapp"]
# Build with Docker (reproducible across any host)
docker build --target builder -t myproject:build .
# Extract artifacts
docker cp $(docker create myproject:build):/src/build/ci-linux/bin/myapp ./myapp
# Run tests inside container
docker run --rm myproject:build ctest --preset release
Dev Containers
# .devcontainer/devcontainer.json
{
"name": "MyProject Dev",
"dockerFile": "../Dockerfile",
"target": "build-base",
"customizations": {
"vscode": {
"extensions": [
"ms-vscode.cmake-tools",
"ms-vscode.cpptools"
],
"settings": {
"cmake.configureSettings": {
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON"
}
}
}
},
"mounts": [
"source=myproject-ccache,target=/cache/ccache,type=volume"
]
}
Objective: Verify bit-for-bit reproducibility using Docker.
Build your project inside Docker twice (with --no-cache both times), extract the binaries, and compare their SHA-256 hashes. If they differ, use diffoscope to identify what changed (timestamps? paths? randomization?) and fix each source of non-determinism until hashes match.
Toolchain Pinning
Toolchain files lock the compiler, sysroot, and system library versions. Combined with Docker, they guarantee identical compilation regardless of the host system:
# cmake/toolchains/gcc-13-linux.cmake
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR x86_64)
# Pin compiler paths (inside Docker container)
set(CMAKE_C_COMPILER /usr/bin/gcc-13)
set(CMAKE_CXX_COMPILER /usr/bin/g++-13)
set(CMAKE_AR /usr/bin/gcc-ar-13)
set(CMAKE_RANLIB /usr/bin/gcc-ranlib-13)
# Sysroot for cross-compilation reproducibility
# set(CMAKE_SYSROOT /opt/sysroots/x86_64-linux-gnu)
# Disable compiler feature detection caching
# Forces CMake to re-detect on every configure
set(CMAKE_C_COMPILER_FORCED TRUE)
set(CMAKE_CXX_COMPILER_FORCED TRUE)
# Reproducibility flags
set(CMAKE_C_FLAGS_INIT "-ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
set(CMAKE_CXX_FLAGS_INIT "-ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
# Use the toolchain file
cmake --preset ci-linux
# The preset references toolchainFile, which points to this file
# Verify the compiler version matches expectations
cmake -S . -B build-verify \
-DCMAKE_TOOLCHAIN_FILE=cmake/toolchains/gcc-13-linux.cmake \
2>&1 | grep "C compiler identification"
# Should output: "The C compiler identification is GNU 13.2.0"
Dependency Pinning
FetchContent with Exact Commits
# CMakeLists.txt — pin every dependency to exact commit SHA
include(FetchContent)
# Pin googletest to exact commit (not a tag that could move)
FetchContent_Declare(googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG f8d7d77c06936315286eb55f8de22cd23c188571 # v1.14.0
GIT_SHALLOW TRUE
FIND_PACKAGE_ARGS NAMES GTest
)
# Pin fmt to exact commit
FetchContent_Declare(fmt
GIT_REPOSITORY https://github.com/fmtlib/fmt.git
GIT_TAG 0c9fce2ffefecfdce794e1859584e25877b7b592 # 10.2.1
GIT_SHALLOW TRUE
)
# Pin spdlog with exact URL hash (most reproducible)
FetchContent_Declare(spdlog
URL https://github.com/gabime/spdlog/archive/refs/tags/v1.13.0.tar.gz
URL_HASH SHA256=1f7a8a5925c43be3f1e73b5b0fc08c39e12aef5c5c3b1e7a7b16c0e4c3e8b5a1
)
FetchContent_MakeAvailable(googletest fmt spdlog)
vcpkg Manifest Mode
# vcpkg.json — version-locked dependency manifest
{
"name": "myproject",
"version": "1.0.0",
"dependencies": [
{ "name": "boost-asio", "version>=": "1.84.0" },
{ "name": "openssl", "version>=": "3.2.0" },
{ "name": "nlohmann-json", "version>=": "3.11.3" },
{ "name": "spdlog", "version>=": "1.13.0" }
],
"builtin-baseline": "a34c873a9717a888f58dc05268dea15592c2f0ff",
"overrides": [
{ "name": "openssl", "version": "3.2.1" }
]
}
# CMakePresets.json entry for vcpkg
{
"name": "vcpkg-release",
"inherits": "base",
"toolchainFile": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release",
"VCPKG_MANIFEST_MODE": "ON",
"VCPKG_OVERLAY_TRIPLETS": "${sourceDir}/cmake/triplets"
}
}
CMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON in reproducible builds. This prevents CMake from finding packages registered by other projects in the user's home directory, which would vary between machines.
Deterministic Compiler Flags
Even with pinned toolchains and dependencies, the binary can differ due to embedded metadata. These flags eliminate common sources of non-determinism:
# cmake/ReproducibleBuild.cmake — include in your project
# Strip absolute paths from debug info and __FILE__ macro
add_compile_options(
-ffile-prefix-map=${CMAKE_SOURCE_DIR}=.
-ffile-prefix-map=${CMAKE_BINARY_DIR}=./build
)
# Disable recording of GCC command-line switches in .comment section
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
add_compile_options(-fno-record-gcc-switches)
add_link_options(-Wl,--build-id=sha1)
endif()
# Disable __DATE__ and __TIME__ macros (GCC 12+ / Clang 14+)
add_compile_definitions(
$<$:__DATE__="redacted">
$<$:__TIME__="redacted">
)
# Set SOURCE_DATE_EPOCH for tools that respect it
# (ar, tar, gzip, zip, etc.)
if(DEFINED ENV{SOURCE_DATE_EPOCH})
set(ENV{SOURCE_DATE_EPOCH} $ENV{SOURCE_DATE_EPOCH})
else()
# Use Git commit timestamp as epoch
execute_process(
COMMAND git log -1 --format=%ct
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_EPOCH
OUTPUT_STRIP_TRAILING_WHITESPACE
)
set(ENV{SOURCE_DATE_EPOCH} ${GIT_EPOCH})
endif()
# Verify determinism: build twice, compare hashes
SOURCE_DATE_EPOCH=0 cmake --preset ci-linux
cmake --build --preset release
sha256sum build/ci-linux/bin/myapp > hash1.txt
# Clean and rebuild
rm -rf build/ci-linux
SOURCE_DATE_EPOCH=0 cmake --preset ci-linux
cmake --build --preset release
sha256sum build/ci-linux/bin/myapp > hash2.txt
# Should be identical
diff hash1.txt hash2.txt && echo "Reproducible!" || echo "NOT reproducible"
Objective: Identify and eliminate all sources of non-determinism in a build.
Start with a project that produces different binaries on each build. Use diffoscope to compare two builds and identify differences (timestamps, paths, random bytes). Apply fixes one by one: -ffile-prefix-map, SOURCE_DATE_EPOCH, -fno-record-gcc-switches, fixed --build-id. Verify each fix reduces differences until builds are identical.
CI/CD Pipeline Integration
# .github/workflows/reproducible.yml
name: Reproducible Build
on: [push, pull_request]
jobs:
build:
runs-on: ubuntu-24.04
container:
image: ghcr.io/myorg/build-env:gcc13-v2
env:
SOURCE_DATE_EPOCH: 0
CCACHE_DIR: /tmp/ccache
steps:
- uses: actions/checkout@v4
- name: Restore ccache
uses: actions/cache@v4
with:
path: /tmp/ccache
key: ccache-${{ runner.os }}-${{ hashFiles('CMakeLists.txt', 'cmake/**') }}
restore-keys: ccache-${{ runner.os }}-
- name: Configure
run: cmake --preset ci-linux
- name: Build
run: cmake --build --preset release
- name: Test
run: ctest --preset release
- name: Verify reproducibility
run: |
sha256sum build/ci-linux/bin/* > checksums.txt
cat checksums.txt
- name: Upload checksums
uses: actions/upload-artifact@v4
with:
name: build-checksums
path: checksums.txt
flowchart TD
A[Git Push] --> B[Pull Pinned Docker Image]
B --> C[Restore ccache from Cache]
C --> D[cmake --preset ci-linux]
D --> E[cmake --build --preset release]
E --> F[ctest --preset release]
F --> G{Tests Pass?}
G -->|Yes| H[Generate SHA-256 Checksums]
G -->|No| I[Fail Build]
H --> J[Compare with Previous Build Checksums]
J -->|Match| K[Deploy / Release]
J -->|Differ| L[Flag for Investigation]
Caching Strategies
Compiler caches accelerate builds without affecting reproducibility — they cache based on preprocessed source + flags, so identical inputs always produce identical outputs:
# CMakeLists.txt — enable ccache/sccache transparently
find_program(CCACHE_PROGRAM ccache)
find_program(SCCACHE_PROGRAM sccache)
if(SCCACHE_PROGRAM)
set(CMAKE_C_COMPILER_LAUNCHER ${SCCACHE_PROGRAM})
set(CMAKE_CXX_COMPILER_LAUNCHER ${SCCACHE_PROGRAM})
message(STATUS "Using sccache: ${SCCACHE_PROGRAM}")
elseif(CCACHE_PROGRAM)
set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_PROGRAM})
set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_PROGRAM})
message(STATUS "Using ccache: ${CCACHE_PROGRAM}")
endif()
# ccache configuration for reproducible builds
# ~/.config/ccache/ccache.conf (or CCACHE_* env vars)
max_size = 5G
compression = true
compression_level = 6
# Critical for reproducibility:
hash_dir = false # Don't include CWD in hash
sloppiness = time_macros # Ignore __DATE__/__TIME__ changes
# sccache for distributed/cloud caching
export SCCACHE_BUCKET=my-build-cache
export SCCACHE_REGION=us-east-1
export SCCACHE_S3_KEY_PREFIX=sccache/myproject/
sccache --start-server
Objective: Measure the speedup from ccache in a CI-like scenario.
Configure ccache with statistics enabled (CCACHE_STATS=true). Do a cold build (empty cache), record the time. Then rebuild from scratch — the cache should produce near-instant compilation. Measure hit rates with ccache -s and compare cold vs warm build times. Also test that cached builds produce identical binaries to uncached builds.
Conclusion & Next Steps
Reproducible builds are the foundation of trustworthy software delivery. By combining CMake Presets (configuration), Docker (environment), toolchain files (compiler), FetchContent/vcpkg (dependencies), and deterministic flags (binary content), you create a build pipeline where any engineer or CI runner produces bit-for-bit identical artifacts from the same commit.
Next in the Series
In Part 30: Build Performance Optimization, we'll explore techniques to minimize build times — from precompiled headers and unity builds to link-time optimization, module partitioning, and distributed compilation with distcc/icecream.