CMake's Built-in Find Modules
CMake ships FindBLAS.cmake and FindLAPACK.cmake modules that detect any BLAS/LAPACK implementation installed on the system. These are the standard entry points:
# Basic BLAS and LAPACK discovery
cmake_minimum_required(VERSION 3.20)
project(LinAlgApp LANGUAGES C CXX Fortran)
find_package(BLAS REQUIRED)
find_package(LAPACK REQUIRED)
add_executable(solver solver.cpp)
target_link_libraries(solver PRIVATE ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES})
# Report what was found
message(STATUS "BLAS libraries: ${BLAS_LIBRARIES}")
message(STATUS "BLAS linker flags: ${BLAS_LINKER_FLAGS}")
message(STATUS "LAPACK libraries: ${LAPACK_LIBRARIES}")
FindBLAS and FindLAPACK work by compiling and linking a small test program. They set BLAS_FOUND, BLAS_LIBRARIES, and BLAS_LINKER_FLAGS but do NOT create imported targets. You must pass the variables to target_link_libraries() directly.
# Create imported targets for cleaner usage
find_package(BLAS REQUIRED)
find_package(LAPACK REQUIRED)
if(NOT TARGET BLAS::BLAS)
add_library(BLAS::BLAS INTERFACE IMPORTED)
set_target_properties(BLAS::BLAS PROPERTIES
INTERFACE_LINK_LIBRARIES "${BLAS_LIBRARIES}"
INTERFACE_LINK_OPTIONS "${BLAS_LINKER_FLAGS}"
)
endif()
if(NOT TARGET LAPACK::LAPACK)
add_library(LAPACK::LAPACK INTERFACE IMPORTED)
set_target_properties(LAPACK::LAPACK PROPERTIES
INTERFACE_LINK_LIBRARIES "${LAPACK_LIBRARIES}"
)
endif()
add_executable(solver solver.cpp)
target_link_libraries(solver PRIVATE BLAS::BLAS LAPACK::LAPACK)
BLA_VENDOR Selection
The BLA_VENDOR variable controls which BLAS/LAPACK implementation CMake searches for. Set it before calling find_package():
# Select specific BLAS vendor
set(BLA_VENDOR OpenBLAS) # Force OpenBLAS
find_package(BLAS REQUIRED)
find_package(LAPACK REQUIRED)
# Common BLA_VENDOR values:
# All — Search all vendors (default)
# OpenBLAS — OpenBLAS (recommended for Linux)
# Intel10_64lp — Intel MKL 10+ 64-bit, LP64 interface
# Intel10_64ilp — Intel MKL 10+ 64-bit, ILP64 interface
# Intel10_64lp_seq — Intel MKL sequential (no threading)
# Apple — Apple Accelerate framework
# FlexiBLAS — FlexiBLAS wrapper
# FLAME — libFLAME
# ATLAS — ATLAS auto-tuned BLAS
# Generic — Generic reference BLAS
# Platform-adaptive vendor selection
if(APPLE)
set(BLA_VENDOR Apple) # Use Accelerate framework
elseif(WIN32)
set(BLA_VENDOR Intel10_64lp) # MKL on Windows
else()
set(BLA_VENDOR OpenBLAS) # OpenBLAS on Linux
endif()
find_package(BLAS REQUIRED)
find_package(LAPACK REQUIRED)
message(STATUS "Selected BLAS vendor: ${BLA_VENDOR}")
message(STATUS "BLAS libs: ${BLAS_LIBRARIES}")
BLA_VENDOR after find_package(BLAS) has no effect. The variable must be defined in cache or before the find call. Also, if the selected vendor isn't installed, the find will fail — provide a fallback.
Linking Strategies
BLAS/LAPACK linking varies significantly between vendors. Understanding the differences prevents obscure linker errors:
# Static vs shared BLAS
set(BLA_STATIC ON) # Prefer static libraries
set(BLA_VENDOR OpenBLAS)
find_package(BLAS REQUIRED)
# 64-bit integer interface (ILP64)
set(BLA_SIZEOF_INTEGER 8) # CMake 3.22+
find_package(BLAS REQUIRED)
# Combined example with proper error handling
set(BLA_VENDOR OpenBLAS)
find_package(BLAS QUIET)
if(NOT BLAS_FOUND)
message(STATUS "OpenBLAS not found, falling back to generic BLAS")
set(BLA_VENDOR "All")
find_package(BLAS REQUIRED)
endif()
find_package(LAPACK REQUIRED)
# OpenBLAS with threading control
find_package(BLAS REQUIRED)
add_executable(matrix_app matrix.cpp)
target_link_libraries(matrix_app PRIVATE ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES})
# Control OpenBLAS thread count at runtime
# Environment: OPENBLAS_NUM_THREADS=4
# Or programmatically via openblas_set_num_threads(4)
Intel MKL with CMake
Intel MKL (Math Kernel Library) provides the fastest BLAS/LAPACK on Intel hardware. CMake 3.20+ includes a dedicated FindMKL-like detection, and MKL ships its own config files:
# Method 1: MKL via BLA_VENDOR (simplest)
set(BLA_VENDOR Intel10_64lp)
find_package(BLAS REQUIRED)
find_package(LAPACK REQUIRED)
# Method 2: MKL's own CMake config (most control)
set(MKL_ROOT "/opt/intel/oneapi/mkl/latest")
find_package(MKL CONFIG REQUIRED)
add_executable(mkl_app mkl_example.cpp)
target_link_libraries(mkl_app PRIVATE MKL::MKL)
# Method 3: MKL with specific threading
find_package(MKL CONFIG REQUIRED)
target_compile_options(mkl_app PRIVATE $<TARGET_PROPERTY:MKL::MKL,INTERFACE_COMPILE_OPTIONS>)
target_include_directories(mkl_app PRIVATE $<TARGET_PROPERTY:MKL::MKL,INTERFACE_INCLUDE_DIRECTORIES>)
target_link_libraries(mkl_app PRIVATE $<LINK_ONLY:MKL::MKL>)
# MKL with explicit component selection
find_package(MKL CONFIG REQUIRED HINTS ${MKL_ROOT})
# Available MKL targets:
# MKL::MKL — Full MKL (auto-selects threading)
# MKL::mkl_intel_lp64 — LP64 interface (32-bit int)
# MKL::mkl_intel_ilp64 — ILP64 interface (64-bit int)
# MKL::mkl_sequential — Sequential (no threading)
# MKL::mkl_intel_thread — Intel OpenMP threading
# MKL::mkl_gnu_thread — GNU OpenMP threading
# MKL::mkl_tbb_thread — TBB threading
# CMake Preset for MKL
# {
# "name": "mkl-linux",
# "cacheVariables": {
# "MKL_ROOT": "$env{MKLROOT}",
# "BLA_VENDOR": "Intel10_64lp"
# }
# }
source /opt/intel/oneapi/setvars.sh. This sets MKLROOT, LD_LIBRARY_PATH, and other variables that MKL's CMake config needs.
Performance Benchmarking
Different BLAS implementations can have 2-10× performance differences depending on matrix size and hardware:
// benchmark_blas.cpp — Compare BLAS performance
#include <cstdlib>
#include <chrono>
#include <iostream>
#include <vector>
// BLAS dgemm declaration (C interface)
extern "C" void dgemm_(const char* transa, const char* transb,
const int* m, const int* n, const int* k,
const double* alpha, const double* A, const int* lda,
const double* B, const int* ldb,
const double* beta, double* C, const int* ldc);
int main() {
const int sizes[] = {128, 256, 512, 1024, 2048, 4096};
for (int N : sizes) {
std::vector<double> A(N * N), B(N * N), C(N * N, 0.0);
// Initialize with random values
for (int i = 0; i < N * N; i++) {
A[i] = (double)rand() / RAND_MAX;
B[i] = (double)rand() / RAND_MAX;
}
double alpha = 1.0, beta = 0.0;
char trans = 'N';
auto start = std::chrono::high_resolution_clock::now();
dgemm_(&trans, &trans, &N, &N, &N,
&alpha, A.data(), &N, B.data(), &N,
&beta, C.data(), &N);
auto end = std::chrono::high_resolution_clock::now();
double ms = std::chrono::duration<double, std::milli>(end - start).count();
double gflops = (2.0 * N * N * N) / (ms * 1e6);
std::cout << "N=" << N << ": " << ms << " ms ("
<< gflops << " GFLOPS)\n";
}
return 0;
}
# Benchmark build configuration
cmake_minimum_required(VERSION 3.20)
project(BLASBench LANGUAGES CXX Fortran)
find_package(BLAS REQUIRED)
find_package(LAPACK REQUIRED)
add_executable(bench benchmark_blas.cpp)
target_link_libraries(bench PRIVATE ${BLAS_LIBRARIES})
target_compile_options(bench PRIVATE -O3 -march=native)
LAPACKE & CBLAS C Interfaces
LAPACKE and CBLAS provide C-friendly wrappers around the Fortran LAPACK and BLAS APIs, eliminating the need for Fortran name mangling:
# Finding CBLAS and LAPACKE
cmake_minimum_required(VERSION 3.20)
project(CInterface LANGUAGES C CXX)
find_package(BLAS REQUIRED)
find_package(LAPACK REQUIRED)
# CBLAS header location
find_path(CBLAS_INCLUDE_DIR cblas.h
HINTS /usr/include /usr/local/include /opt/OpenBLAS/include
)
# LAPACKE header location
find_path(LAPACKE_INCLUDE_DIR lapacke.h
HINTS /usr/include /usr/local/include
PATH_SUFFIXES lapacke
)
add_executable(c_linalg c_linalg.c)
target_link_libraries(c_linalg PRIVATE ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES})
target_include_directories(c_linalg PRIVATE
${CBLAS_INCLUDE_DIR}
${LAPACKE_INCLUDE_DIR}
)
// c_linalg.c — Using CBLAS and LAPACKE C interfaces
#include <cblas.h>
#include <lapacke.h>
#include <stdio.h>
#include <stdlib.h>
int main() {
const int N = 3;
// Matrix A (column-major for LAPACK)
double A[] = {
6.80, -2.11, 5.66,
-6.05, -3.30, 5.36,
-0.45, 2.58, -2.70
};
// Right-hand side vector b
double b[] = {4.02, 6.19, -8.22};
int ipiv[3];
// Solve Ax = b using LU factorization (LAPACKE)
int info = LAPACKE_dgesv(LAPACK_ROW_MAJOR, N, 1, A, N, ipiv, b, 1);
if (info == 0) {
printf("Solution: [%.4f, %.4f, %.4f]\n", b[0], b[1], b[2]);
} else {
printf("LAPACKE_dgesv failed with info = %d\n", info);
}
// CBLAS matrix-vector multiply: y = alpha*A*x + beta*y
double M[] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
double x[] = {1, 1, 1};
double y[] = {0, 0, 0};
cblas_dgemv(CblasRowMajor, CblasNoTrans, 3, 3,
1.0, M, 3, x, 1, 0.0, y, 1);
printf("A*[1,1,1] = [%.0f, %.0f, %.0f]\n", y[0], y[1], y[2]);
return 0;
}
Fortran Interoperability
BLAS and LAPACK are originally Fortran libraries. Calling them from C/C++ requires handling name mangling and column-major layout:
# Enable Fortran for direct BLAS/LAPACK usage
cmake_minimum_required(VERSION 3.20)
project(FortranInterop LANGUAGES C CXX Fortran)
# CMake needs Fortran compiler to detect BLAS correctly
find_package(BLAS REQUIRED)
find_package(LAPACK REQUIRED)
# Check Fortran name mangling convention
include(FortranCInterface)
FortranCInterface_VERIFY()
FortranCInterface_HEADER(fc_mangle.h MACRO_NAMESPACE "FC_")
add_executable(interop_app interop.cpp custom_blas.f90)
target_link_libraries(interop_app PRIVATE ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES})
target_include_directories(interop_app PRIVATE ${CMAKE_BINARY_DIR})
# If Fortran is not available, use manual mangling
# Most Linux systems use lowercase + underscore: dgemm_
if(NOT CMAKE_Fortran_COMPILER)
# Skip Fortran, assume Linux convention
target_compile_definitions(app PRIVATE
BLAS_SYMBOL_SUFFIX=_
BLAS_SYMBOL_PREFIX=
)
endif()
'T' (transpose) flag to BLAS routines.
- Set
BLA_VENDORbeforefind_package(BLAS)for deterministic builds - Use platform-adaptive vendor selection (Apple Accelerate, MKL, OpenBLAS)
- Create imported targets from BLAS/LAPACK variables for cleaner CMake
- Prefer CBLAS/LAPACKE C interfaces over raw Fortran calls in C++ code
- Source Intel's
setvars.shbefore CMake when using MKL - Add
Fortranto project LANGUAGES for reliable BLAS detection - Benchmark your specific workload — optimal vendor depends on matrix size