Master C and assembly integration: call C library functions from assembly, write assembly functions callable from C, use GCC inline assembly, and understand ABI compliance for seamless interop.
; Declare external C functions
extern printf
extern malloc
extern free
section .data
fmt db "Value: %d", 10, 0
section .text
global main
main:
push rbp
mov rbp, rsp
; Call printf(fmt, 42)
lea rdi, [fmt] ; First arg: format string
mov rsi, 42 ; Second arg: value
xor eax, eax ; AL = 0 (no vector args)
call printf
xor eax, eax ; return 0
pop rbp
ret
printf_call.asmLinux
nasm -f elf64 printf_call.asm -o printf_call.o
gcc -no-pie printf_call.o -o printf_call
./printf_call
macOS (prefix main with underscore: global _main / _main:)
nasm -f macho64 printf_call.asm -o printf_call.o
gcc printf_call.o -o printf_call
Windows (use RCX, RDX instead of RDI, RSI for args)
nasm -f win64 printf_call.asm -o printf_call.obj
cl /nologo printf_call.obj /Fe:printf_call.exe legacy_stdio_definitions.lib
# Compile and link with libc
nasm -f elf64 program.asm -o program.o
gcc -no-pie program.o -o program
; asm_add.asm - Assembly function callable from C
section .text
global asm_add ; Export symbol
; int asm_add(int a, int b)
; Args: RDI = a, RSI = b (System V ABI)
; Returns: RAX
asm_add:
mov rax, rdi ; RAX = a
add rax, rsi ; RAX = a + b
ret
// main.c - C code calling assembly
#include <stdio.h>
extern int asm_add(int a, int b);
int main() {
int result = asm_add(10, 20);
printf("Result: %d\n", result); // Output: 30
return 0;
}
asm_add.asm + main.cLinux
nasm -f elf64 asm_add.asm -o asm_add.o
gcc -no-pie main.c asm_add.o -o asm_add_demo
./asm_add_demo
macOS (prefix function with _ in asm: global _asm_add / _asm_add:)
nasm -f macho64 asm_add.asm -o asm_add.o
gcc main.c asm_add.o -o asm_add_demo
Windows (uses Win64 calling convention: RCX, RDX instead of RDI, RSI)
nasm -f win64 asm_add.asm -o asm_add.obj
cl /nologo main.c asm_add.obj /Fe:asm_add_demo.exe
When your assembly function is called from C, you must follow the ABI rules or risk corrupting the caller's state. Think of it like a hotel room—you can rearrange furniture, but put everything back before checkout.
These registers must have the same value when your function returns:
| Register | Must Preserve? | Common Use |
|---|---|---|
RBX | Yes | General callee-saved |
RBP | Yes | Frame pointer |
R12-R15 | Yes | General callee-saved |
RSP | Yes | Stack pointer (critical!) |
RAX, RCX, RDX | No | Return value / scratch |
RDI, RSI | No | Arguments / scratch |
R8-R11 | No | Arguments / scratch |
; Proper callee-saved register handling
global my_function
my_function:
; Save callee-saved registers we'll use
push rbx
push r12
push r13
push rbp
; Now we can freely use RBX, R12, R13, RBP
mov rbx, rdi
mov r12, rsi
; ... do work ...
; Restore in reverse order
pop rbp
pop r13
pop r12
pop rbx
ret
call instruction. Since call pushes an 8-byte return address, RSP is misaligned upon function entry. If you push an odd number of 8-byte values, you realign it.
; Stack alignment examples
; BAD: Calling printf with misaligned stack
my_func:
; RSP is 8-byte misaligned after call instruction
mov rdi, format_str
call printf ; CRASH! Stack not 16-byte aligned
ret
; GOOD: Align stack before calling
my_func:
sub rsp, 8 ; Realign to 16 bytes
mov rdi, format_str
call printf
add rsp, 8
ret
; ALTERNATIVE: Use push for alignment
my_func:
push rbx ; Align + save callee-saved register
mov rdi, format_str
call printf
pop rbx
ret
// Basic inline assembly
int main() {
int a = 10, b = 20, result;
__asm__ (
"addl %1, %0"
: "=r" (result) // Output operand
: "r" (b), "0" (a) // Input operands
);
printf("Result: %d\n", result);
return 0;
}
Understanding how C types map to assembly sizes and registers is essential for correct interop. Here's the complete mapping for x86-64 Linux (System V ABI):
| C Type | Size | NASM Equivalent | Register | Access |
|---|---|---|---|---|
char | 1 byte | db | AL, DIL | byte [addr] |
short | 2 bytes | dw | AX, DI | word [addr] |
int | 4 bytes | dd | EAX, EDI | dword [addr] |
long | 8 bytes | dq | RAX, RDI | qword [addr] |
long long | 8 bytes | dq | RAX, RDI | qword [addr] |
void* | 8 bytes | dq | RAX, RDI | qword [addr] |
| C Type | Size | Register | Instructions |
|---|---|---|---|
float | 4 bytes | XMM0-XMM7 | movss, addss, mulss |
double | 8 bytes | XMM0-XMM7 | movsd, addsd, mulsd |
long double | 16 bytes* | ST(0) (x87) | fld, fstp |
*long double is 80-bit internally, padded to 16 bytes for alignment
; float add_floats(float a, float b)
; Arguments: XMM0 = a, XMM1 = b
; Return: XMM0
global add_floats
add_floats:
addss xmm0, xmm1 ; XMM0 = a + b
ret
; double multiply(double x, double y)
; Arguments: XMM0 = x, XMM1 = y
global multiply
multiply:
mulsd xmm0, xmm1 ; XMM0 = x * y
ret
// C function with mixed types
void mixed(int a, double b, int c, float d);
// a → EDI (integer slot 1)
// b → XMM0 (float slot 1)
// c → ESI (integer slot 2)
// d → XMM1 (float slot 2)
; Assembly calling mixed(10, 3.14, 20, 2.5)
section .data
double_val dq 3.14
float_val dd 2.5
section .text
mov edi, 10
movsd xmm0, [rel double_val]
mov esi, 20
movss xmm1, [rel float_val]
call mixed
C structures are laid out in memory with specific alignment rules. Understanding padding and offsets is crucial for accessing struct members from assembly.
// C struct definition
typedef struct {
char a; // Offset 0, 1 byte
// 3 bytes padding
int b; // Offset 4, 4 bytes (aligned to 4)
short c; // Offset 8, 2 bytes
// 6 bytes padding
long d; // Offset 16, 8 bytes (aligned to 8)
} Example; // Total: 24 bytes
// To check: printf("Size: %zu\n", sizeof(Example));
Memory Layout:
┌────────┬────────┬────────┬────────┐
│ a │ pad[3] │ b │ (4B) │ Bytes 0-7
└────────┴────────┴────────┴────────┘
┌────────┬──────────────────────────┐
│ c │ padding │ Bytes 8-15
└────────┴──────────────────────────┘
┌───────────────────────────────────┐
│ d │ Bytes 16-23
└───────────────────────────────────┘
; void process_struct(Example* ptr)
; Argument: RDI = pointer to Example struct
global process_struct
process_struct:
; Define offsets (match C struct layout)
%define OFFSET_A 0
%define OFFSET_B 4
%define OFFSET_C 8
%define OFFSET_D 16
; Read members
movzx eax, byte [rdi + OFFSET_A] ; Load char a (zero-extend)
mov ebx, [rdi + OFFSET_B] ; Load int b
movsx ecx, word [rdi + OFFSET_C] ; Load short c (sign-extend)
mov rdx, [rdi + OFFSET_D] ; Load long d
; Modify members
mov byte [rdi + OFFSET_A], 'X' ; Set a = 'X'
add dword [rdi + OFFSET_B], 100 ; b += 100
ret
; Define struct in NASM
struc Example
.a: resb 1
resb 3 ; Explicit padding
.b: resd 1
.c: resw 1
resb 6 ; Padding
.d: resq 1
endstruc
; Now use struct offsets
process_struct:
mov al, [rdi + Example.a] ; Load a
mov ebx, [rdi + Example.b] ; Load b
mov rcx, [rdi + Example.d] ; Load d
ret
; int sum_array(int* arr, int len)
; RDI = array pointer, ESI = length
global sum_array
sum_array:
xor eax, eax ; sum = 0
test esi, esi
jz .done
.loop:
add eax, [rdi] ; sum += arr[i]
add rdi, 4 ; Move to next int (4 bytes)
dec esi
jnz .loop
.done:
ret ; Return sum in EAX
Write an assembly function that takes a pointer to this C struct and returns the sum of all numeric members:
typedef struct {
int x; // Offset 0
int y; // Offset 4
double z; // Offset 8 (aligned to 8)
} Point; // Size: 16 bytes
// Prototype: double point_sum(Point* p);
Hint: Return float/double values in XMM0. Convert integers with cvtsi2sd.
pahole or offsetof() to verify struct layouts. Different compilers and platforms may pad differently!