Experiments

Ten guided labs. Each has C and Rust versions, clear steps, expected output. Do them. Reading about memory is not the same as seeing it.


Experiment 1: Print the Memory Layout

Verify the address space from Chapter 6.

// layout.c
#include <stdio.h>
#include <stdlib.h>
int global_init = 42;
int global_uninit;
int main(void) {
    int stack_var = 99;
    int *heap_var = malloc(sizeof(int));
    printf("Code  (main):     %p\n", (void *)main);
    printf("Data  (init):     %p\n", (void *)&global_init);
    printf("BSS   (uninit):   %p\n", (void *)&global_uninit);
    printf("Heap  (malloc):   %p\n", (void *)heap_var);
    printf("Stack (local):    %p\n", (void *)&stack_var);
    free(heap_var);
    system("cat /proc/self/maps");
    return 0;
}
static GLOBAL: i32 = 42;
fn main() {
    let stack_var: i32 = 99;
    let heap_var = Box::new(0i32);
    println!("Code:  {:p}", main as *const ());
    println!("Data:  {:p}", &GLOBAL);
    println!("Heap:  {:p}", &*heap_var);
    println!("Stack: {:p}", &stack_var);
}

Verify: Code < Data < BSS < Heap < ... gap ... < Stack.


Experiment 2: Stack Buffer Overflow

// overflow.c — compile: gcc -fno-stack-protector -g -o overflow overflow.c
#include <stdio.h>
#include <string.h>
void vulnerable(void) {
    char buffer[8];
    memset(buffer, 'A', 32);  // 32 bytes into 8-byte buffer
    printf("After overflow\n");
}
int main(void) { vulnerable(); return 0; }
$ ./overflow
Segmentation fault
$ gdb ./overflow -ex run -ex bt
#0  0x4141414141414141 in ?? ()    <-- return addr overwritten

Rust equivalent panics cleanly at the boundary:

fn main() {
    let mut buf = [0u8; 8];
    for i in 0..32 { buf[i] = b'A'; }  // panics at i=8
}

Experiment 3: Fork and Copy-on-Write

// cow.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/wait.h>
int main(void) {
    int *data = malloc(4096);
    *data = 42;
    printf("Before fork: %p = %d\n", (void*)data, *data);
    pid_t pid = fork();
    if (pid == 0) {
        printf("Child before write: %p = %d\n", (void*)data, *data);
        *data = 99;  // triggers copy-on-write
        printf("Child after write:  %p = %d\n", (void*)data, *data);
        free(data); _exit(0);
    }
    wait(NULL);
    printf("Parent after child:  %p = %d\n", (void*)data, *data);
    free(data);
}

Same virtual address in both, different values. Different physical pages after CoW.

What do you think happens?

Both print the same pointer. How can the values differ? (Different page tables, different physical frames.)


Experiment 4: mmap a File

// mmap_file.c
#include <stdio.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <string.h>
int main(void) {
    int fd = open("test.txt", O_RDWR | O_CREAT | O_TRUNC, 0644);
    write(fd, "Hello, mmap!\n", 13);
    char *m = mmap(NULL, 13, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
    close(fd);
    printf("Via mmap: %s", m);
    memcpy(m, "ZZZZZ", 5);
    msync(m, 13, MS_SYNC);
    munmap(m, 13);
    system("cat test.txt");  // prints "ZZZZZ mmap!\n"
}

The file and the memory are the same thing. Writing to the pointer writes to disk.


Experiment 5: A 50-Line Bump Allocator

// bump.c — simplest possible malloc
#include <stdio.h>
#define HEAP_SIZE 1024
static char heap[HEAP_SIZE];
static size_t offset = 0;

void *bump_alloc(size_t size) {
    size_t aligned = (size + 7) & ~7;
    if (offset + aligned > HEAP_SIZE) return NULL;
    void *ptr = &heap[offset];
    offset += aligned;
    return ptr;
}

int main(void) {
    int *a = bump_alloc(sizeof(int)); *a = 42;
    int *b = bump_alloc(sizeof(int)); *b = 99;
    printf("a=%d at %p, b=%d at %p\n", *a, (void*)a, *b, (void*)b);
    printf("Used: %zu / %d bytes\n", offset, HEAP_SIZE);
}
struct Bump { heap: [u8; 1024], offset: usize }
impl Bump {
    fn new() -> Self { Bump { heap: [0; 1024], offset: 0 } }
    fn alloc(&mut self, size: usize) -> Option<&mut [u8]> {
        let aligned = (size + 7) & !7;
        if self.offset + aligned > 1024 { return None; }
        let start = self.offset;
        self.offset += aligned;
        Some(&mut self.heap[start..start + size])
    }
}
fn main() {
    let mut a = Bump::new();
    let x = a.alloc(4).unwrap();
    x.copy_from_slice(&42i32.to_ne_bytes());
    println!("Allocated: {:?}, used: {}/1024", x, a.offset);
}

Rust returns Option — no null pointers, no forgetting to check.


Experiment 6: Trigger All 6 Segfault Types

// segfaults.c — compile: gcc -g -fno-stack-protector -o segfaults segfaults.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void null_deref(void)    { int *p = NULL; *p = 42; }
void stack_blow(void)    { stack_blow(); }
void use_after_free(void){ int *p = malloc(4); free(p); *p = 42; }
void write_rodata(void)  { char *s = "hello"; s[0] = 'H'; }
void exec_stack(void)    { char c[]={0xc3}; ((void(*)(void))c)(); }
void unmapped(void)      { *(int*)0xDEADBEEF = 42; }

int main(int argc, char **argv) {
    if (argc != 2) { printf("Usage: %s <1-6>\n", argv[0]); return 1; }
    switch(argv[1][0]) {
        case '1': null_deref(); break;    case '2': stack_blow(); break;
        case '3': use_after_free(); break; case '4': write_rodata(); break;
        case '5': exec_stack(); break;     case '6': unmapped(); break;
    }
}

Debug each with GDB: gdb ./segfaults -ex "run 1" -ex bt -ex "info registers rip".

For each case, note: which address faulted, what bt shows, which permission was violated (R/W/X).


Experiment 7: Compare ELF — C vs Rust "Hello World"

$ echo '#include <stdio.h>
int main(){ puts("hello"); }' > hello.c && gcc -o hello_c hello.c
$ echo 'fn main(){ println!("hello"); }' > hello.rs && rustc -o hello_rust hello.rs
$ ls -la hello_c hello_rust
$ size hello_c hello_rust
$ readelf -S hello_c | wc -l
$ readelf -S hello_rust | wc -l

Rust binary: 1-4 MB. C binary: ~16 KB. The difference: panic handling, unwinding tables, println! formatting. Try rustc -O then strip:

$ rustc -O -o hello_opt hello.rs && strip hello_opt
$ ls -la hello_c hello_rust hello_opt

Fun Fact

Most of a Rust binary's size is not your code. It is the standard library support for panics and formatting. Set panic = "abort" in Cargo.toml and the binary shrinks dramatically.


Experiment 8: Manual Linking

// math.c
int add(int a, int b) { return a + b; }
int mul(int a, int b) { return a * b; }
// main.c
#include <stdio.h>
extern int add(int, int);
extern int mul(int, int);
int main(void) { printf("3+4=%d, 3*4=%d\n", add(3,4), mul(3,4)); }
$ gcc -c math.c && gcc -c main.c
$ nm math.o        # T add, T mul  (defined)
$ nm main.o        # U add, U mul  (undefined — need linking)
$ gcc -o prog main.o math.o
$ nm prog | grep -E 'add|mul|main'

What do you think happens?

Link main.o without math.o. The linker error shows exactly how symbol resolution works.


Experiment 9: Cache Performance

// cache.c
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define N (16*1024*1024)
int main(void) {
    int *a = malloc(N * sizeof(int));
    for (int i = 0; i < N; i++) a[i] = i;
    clock_t t;
    volatile int sum = 0;
    t = clock();
    for (int i = 0; i < N; i++) sum += a[i];
    printf("Sequential: %.1f ms\n", 1000.0*(clock()-t)/CLOCKS_PER_SEC);

    srand(42);
    for (int i = N-1; i > 0; i--) {
        int j = rand() % (i+1);
        int tmp = a[i]; a[i] = a[j]; a[j] = tmp;
    }
    sum = 0; t = clock();
    int idx = 0;
    for (int i = 0; i < N; i++) { sum += a[idx]; idx = abs(a[idx]) % N; }
    printf("Random:     %.1f ms\n", 1000.0*(clock()-t)/CLOCKS_PER_SEC);
    free(a);
}
$ gcc -O2 -o cache cache.c && ./cache
Sequential:  7.0 ms
Random:     85.0 ms     <-- 10x+ slower, same data, same operations

The difference is cache misses. Use perf stat -e cache-misses,cache-references ./cache to see the numbers.


Experiment 10: Handwritten ELF (No Compiler)

A valid executable in 163 bytes. Just write(1, "Hi\n", 3) and exit(0).

#!/usr/bin/env python3
# tiny_elf.py
import struct, os
code = bytes([
    0x48,0xc7,0xc0,0x01,0x00,0x00,0x00,  # mov rax, 1 (write)
    0x48,0xc7,0xc7,0x01,0x00,0x00,0x00,  # mov rdi, 1 (stdout)
    0x48,0x8d,0x35,0x12,0x00,0x00,0x00,  # lea rsi, [rip+18]
    0x48,0xc7,0xc2,0x03,0x00,0x00,0x00,  # mov rdx, 3
    0x0f,0x05,                             # syscall
    0x48,0xc7,0xc0,0x3c,0x00,0x00,0x00,  # mov rax, 60 (exit)
    0x48,0x31,0xff,                        # xor rdi, rdi
    0x0f,0x05,                             # syscall
    0x48,0x69,0x0a,                        # "Hi\n"
])
LOAD = 0x400000; EH = 64; PH = 56
ENTRY = LOAD + EH + PH; FSIZE = EH + PH + len(code)
ehdr = struct.pack('<4sBBBBBxxxxxxx', b'\x7fELF', 2, 1, 1, 0, 0)
ehdr += struct.pack('<HHIQQQIHHHHHH', 2,0x3E,1,ENTRY,EH,0,0,EH,PH,1,0,0,0)
phdr = struct.pack('<IIQQQQQQ', 1, 5, 0, LOAD, LOAD, FSIZE, FSIZE, 0x1000)
with open('tiny','wb') as f: f.write(ehdr + phdr + code)
os.chmod('tiny', 0o755)
print(f"Created 'tiny' ({FSIZE} bytes). Run: ./tiny")
$ python3 tiny_elf.py && ./tiny
Created 'tiny' (163 bytes). Run: ./tiny
Hi
$ file tiny
tiny: ELF 64-bit LSB executable, x86-64, statically linked, no section header

No compiler, no libc, no linker. Pure bytes that the kernel understands.


Task

Complete at least 5 of these 10 experiments. For each:

  1. Run the code exactly as written.
  2. Modify one thing and predict the result before running.
  3. Write down what surprised you.

The goal is not to memorize. It is to build intuition. When you have seen the stack grow downward, you never forget which way it grows.