Environment and Configuration

Every Unix process inherits a block of key-value strings from its parent. This environment block controls program behavior without code changes. Understanding how it works -- and how to combine it with command-line arguments and configuration files -- is essential for writing well-behaved Unix tools.

The Environment Block

The kernel passes the environment to a new process on the stack, right after the argument strings. Each entry is a KEY=VALUE string.

/* print_env.c */
#include <stdio.h>

extern char **environ;  /* Global pointer to environment array */

int main(void)
{
    for (char **ep = environ; *ep != NULL; ep++) {
        printf("%s\n", *ep);
    }
    return 0;
}
$ gcc -o print_env print_env.c
$ ./print_env | head -5
SHELL=/bin/bash
HOME=/home/user
PATH=/usr/local/bin:/usr/bin:/bin
LANG=en_US.UTF-8
TERM=xterm-256color

The layout in memory:

Stack (high address)
+----------------------------+
| environment strings        |
| "HOME=/home/user\0"       |
| "PATH=/usr/bin:/bin\0"    |
| ...                        |
+----------------------------+
| environ[0] -> "HOME=..."  |
| environ[1] -> "PATH=..."  |
| environ[N] -> NULL         |
+----------------------------+
| argv strings               |
| argv[0] -> "./print_env"  |
| argv[1] -> NULL            |
+----------------------------+
| argc = 1                   |
+----------------------------+
        (stack grows down)

Reading and Writing the Environment

/* env_ops.c */
#include <stdio.h>
#include <stdlib.h>

int main(void)
{
    /* Read */
    const char *home = getenv("HOME");
    if (home)
        printf("HOME = %s\n", home);
    else
        printf("HOME not set\n");

    /* Write -- adds or overwrites */
    setenv("MY_APP_DEBUG", "1", 1);  /* 1 = overwrite if exists */
    printf("MY_APP_DEBUG = %s\n", getenv("MY_APP_DEBUG"));

    /* Write without overwrite */
    setenv("MY_APP_DEBUG", "2", 0);  /* 0 = do not overwrite */
    printf("MY_APP_DEBUG = %s\n", getenv("MY_APP_DEBUG"));  /* Still "1" */

    /* Remove */
    unsetenv("MY_APP_DEBUG");
    printf("After unsetenv: %s\n",
           getenv("MY_APP_DEBUG") ? getenv("MY_APP_DEBUG") : "(null)");

    return 0;
}

Caution: putenv() inserts a pointer to your string directly into the environment. If that string is on the stack, it becomes a dangling pointer when the function returns. Prefer setenv(), which copies the string.

Caution: None of the environment functions are thread-safe. Calling setenv() or getenv() from multiple threads without synchronization is undefined behavior.

PATH Resolution and exec

When you call execlp() or execvp() (the "p" variants), the kernel searches the directories listed in PATH for the binary.

/* path_search.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main(void)
{
    const char *path = getenv("PATH");
    if (!path) {
        printf("PATH not set\n");
        return 1;
    }

    printf("PATH directories:\n");

    /* strtok modifies the string, so copy it */
    char *copy = strdup(path);
    char *dir = strtok(copy, ":");

    int i = 0;
    while (dir) {
        printf("  [%d] %s\n", i++, dir);
        dir = strtok(NULL, ":");
    }

    free(copy);
    return 0;
}

The search order matters. If /usr/local/bin appears before /usr/bin, a binary in /usr/local/bin shadows the system version.

Caution: A PATH that includes . (current directory) or an empty component (like :/usr/bin -- note the leading colon) is a security risk. An attacker can place a malicious binary in the current directory.

Command-Line Parsing: getopt

getopt() is the traditional Unix way to parse command-line options.

/* getopt_demo.c */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

int main(int argc, char *argv[])
{
    int verbose = 0;
    int count = 1;
    const char *output = NULL;
    int opt;

    while ((opt = getopt(argc, argv, "vc:o:")) != -1) {
        switch (opt) {
        case 'v':
            verbose = 1;
            break;
        case 'c':
            count = atoi(optarg);
            break;
        case 'o':
            output = optarg;
            break;
        default:
            fprintf(stderr, "Usage: %s [-v] [-c count] [-o output] [files...]\n",
                    argv[0]);
            return 1;
        }
    }

    printf("verbose=%d count=%d output=%s\n",
           verbose, count, output ? output : "(none)");

    /* Remaining arguments (non-option) */
    for (int i = optind; i < argc; i++)
        printf("arg: %s\n", argv[i]);

    return 0;
}
$ ./getopt_demo -v -c 5 -o result.txt file1.txt file2.txt
verbose=1 count=5 output=result.txt
arg: file1.txt
arg: file2.txt

The option string "vc:o:" means: -v takes no argument, -c and -o each require one (indicated by the colon).

Long Options: getopt_long

For modern tools, long options like --verbose are expected.

/* getopt_long_demo.c */
#include <stdio.h>
#include <stdlib.h>
#include <getopt.h>

int main(int argc, char *argv[])
{
    int verbose = 0;
    int count = 1;
    const char *output = NULL;

    static struct option long_options[] = {
        {"verbose", no_argument,       NULL, 'v'},
        {"count",   required_argument, NULL, 'c'},
        {"output",  required_argument, NULL, 'o'},
        {"help",    no_argument,       NULL, 'h'},
        {NULL,      0,                 NULL,  0 }
    };

    int opt;
    while ((opt = getopt_long(argc, argv, "vc:o:h", long_options, NULL)) != -1) {
        switch (opt) {
        case 'v': verbose = 1; break;
        case 'c': count = atoi(optarg); break;
        case 'o': output = optarg; break;
        case 'h':
            printf("Usage: %s [--verbose] [--count N] [--output FILE]\n",
                   argv[0]);
            return 0;
        default:
            return 1;
        }
    }

    printf("verbose=%d count=%d output=%s\n",
           verbose, count, output ? output : "(none)");

    return 0;
}
$ ./getopt_long_demo --verbose --count 10 --output data.csv
verbose=1 count=10 output=data.csv

Rust: std::env

Rust's standard library provides safe environment access.

// env_demo.rs
use std::env;

fn main() {
    // Read
    match env::var("HOME") {
        Ok(val) => println!("HOME = {}", val),
        Err(_)  => println!("HOME not set"),
    }

    // Set
    env::set_var("MY_APP_DEBUG", "1");
    println!("MY_APP_DEBUG = {}", env::var("MY_APP_DEBUG").unwrap());

    // Remove
    env::remove_var("MY_APP_DEBUG");

    // Iterate all
    println!("\nAll environment variables:");
    for (key, value) in env::vars() {
        println!("  {}={}", key, value);
    }

    // PATH directories
    if let Some(path) = env::var_os("PATH") {
        println!("\nPATH directories:");
        for dir in env::split_paths(&path) {
            println!("  {}", dir.display());
        }
    }
}

Rust Note: env::set_var() and env::remove_var() are marked unsafe in Rust 1.66+ when used in multi-threaded programs. The Rust team recognized the same thread-safety issue that plagues C's setenv(). Prefer reading environment at startup and storing values in your own data structures.

Rust: Command-Line Parsing with clap

The clap crate is the standard Rust approach to argument parsing.

// clap_demo.rs
// Cargo.toml:
//   [dependencies]
//   clap = { version = "4", features = ["derive"] }

use clap::Parser;

/// A well-behaved Unix tool
#[derive(Parser, Debug)]
#[command(name = "mytool", version, about = "Does useful things")]
struct Args {
    /// Enable verbose output
    #[arg(short, long)]
    verbose: bool,

    /// Number of iterations
    #[arg(short, long, default_value_t = 1)]
    count: u32,

    /// Output file path
    #[arg(short, long)]
    output: Option<String>,

    /// Input files
    files: Vec<String>,
}

fn main() {
    let args = Args::parse();

    println!("verbose={} count={} output={:?}",
             args.verbose, args.count, args.output);

    for f in &args.files {
        println!("file: {}", f);
    }
}
$ cargo run -- --verbose --count 5 -o result.txt input1.dat input2.dat
verbose=true count=5 output=Some("result.txt")
file: input1.dat
file: input2.dat

The --help flag auto-generates usage text from the struct annotations.

Rust Note: clap with derive macros generates the help text, validation, and parsing code at compile time. The C equivalent requires writing all of this by hand or using a library like argp.

Configuration File Patterns

A well-behaved Unix tool checks configuration in this order (later overrides earlier):

1. Compiled-in defaults
2. System config:  /etc/myapp/config
3. User config:    ~/.config/myapp/config  (XDG_CONFIG_HOME)
4. Environment:    MYAPP_DEBUG=1
5. Command-line:   --debug

A minimal config file parser in C:

/* config_parse.c */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define MAX_LINE 256

struct config {
    int   port;
    int   verbose;
    char  logfile[256];
};

static void config_defaults(struct config *cfg)
{
    cfg->port = 8080;
    cfg->verbose = 0;
    strncpy(cfg->logfile, "/var/log/myapp.log", sizeof(cfg->logfile) - 1);
}

static int config_load(struct config *cfg, const char *path)
{
    FILE *f = fopen(path, "r");
    if (!f) return -1;

    char line[MAX_LINE];
    while (fgets(line, sizeof(line), f)) {
        /* Skip comments and empty lines */
        if (line[0] == '#' || line[0] == '\n')
            continue;

        char key[128], value[128];
        if (sscanf(line, "%127[^=]=%127[^\n]", key, value) == 2) {
            if (strcmp(key, "port") == 0)
                cfg->port = atoi(value);
            else if (strcmp(key, "verbose") == 0)
                cfg->verbose = atoi(value);
            else if (strcmp(key, "logfile") == 0)
                strncpy(cfg->logfile, value, sizeof(cfg->logfile) - 1);
        }
    }

    fclose(f);
    return 0;
}

int main(int argc, char *argv[])
{
    struct config cfg;
    config_defaults(&cfg);

    /* Try system config, then user config */
    config_load(&cfg, "/etc/myapp.conf");

    char user_conf[512];
    const char *home = getenv("HOME");
    if (home) {
        snprintf(user_conf, sizeof(user_conf), "%s/.myapp.conf", home);
        config_load(&cfg, user_conf);
    }

    /* Environment overrides */
    const char *env_port = getenv("MYAPP_PORT");
    if (env_port) cfg.port = atoi(env_port);

    printf("port=%d verbose=%d logfile=%s\n",
           cfg.port, cfg.verbose, cfg.logfile);

    return 0;
}

The /etc Convention

System-wide configuration lives under /etc. Per-application patterns:

PathPurpose
/etc/myapp.confSingle config file
/etc/myapp/Config directory
/etc/myapp/conf.d/Drop-in overrides (processed alphabetically)
/etc/default/myappDefault environment for init scripts

Putting It Together: A Well-Behaved Unix Tool

Here is a complete C program that follows all conventions:

/* wellbehaved.c -- a well-behaved Unix tool */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>
#include <errno.h>

static struct {
    int   verbose;
    int   count;
    const char *output;
} config = {
    .verbose = 0,
    .count   = 1,
    .output  = NULL,
};

static void usage(const char *prog)
{
    fprintf(stderr,
        "Usage: %s [OPTIONS] [FILE...]\n"
        "\n"
        "Options:\n"
        "  -v, --verbose       Enable verbose output\n"
        "  -c, --count=N       Number of iterations (default: 1)\n"
        "  -o, --output=FILE   Output file\n"
        "  -h, --help          Show this help\n"
        "\n"
        "Environment:\n"
        "  WELLBEHAVED_VERBOSE  Set to 1 for verbose mode\n"
        "  WELLBEHAVED_COUNT    Default iteration count\n",
        prog);
}

int main(int argc, char *argv[])
{
    /* 1. Environment */
    const char *env_v = getenv("WELLBEHAVED_VERBOSE");
    if (env_v && strcmp(env_v, "1") == 0)
        config.verbose = 1;

    const char *env_c = getenv("WELLBEHAVED_COUNT");
    if (env_c) config.count = atoi(env_c);

    /* 2. Command line (overrides environment) */
    static struct option long_opts[] = {
        {"verbose", no_argument,       NULL, 'v'},
        {"count",   required_argument, NULL, 'c'},
        {"output",  required_argument, NULL, 'o'},
        {"help",    no_argument,       NULL, 'h'},
        {NULL, 0, NULL, 0}
    };

    int opt;
    while ((opt = getopt_long(argc, argv, "vc:o:h", long_opts, NULL)) != -1) {
        switch (opt) {
        case 'v': config.verbose = 1; break;
        case 'c': config.count = atoi(optarg); break;
        case 'o': config.output = optarg; break;
        case 'h': usage(argv[0]); return 0;
        default:  usage(argv[0]); return 1;
        }
    }

    /* 3. Act on stdin if no files given (Unix filter convention) */
    if (optind >= argc) {
        if (config.verbose)
            fprintf(stderr, "Reading from stdin...\n");
        /* Process stdin here */
    }

    /* 4. Process each file argument */
    for (int i = optind; i < argc; i++) {
        if (config.verbose)
            fprintf(stderr, "Processing: %s\n", argv[i]);

        FILE *f = fopen(argv[i], "r");
        if (!f) {
            fprintf(stderr, "%s: %s: %s\n", argv[0], argv[i], strerror(errno));
            continue;  /* Keep going -- do not abort on one bad file */
        }
        /* Process file here */
        fclose(f);
    }

    /* 5. Diagnostic output to stderr, data output to stdout */
    if (config.verbose)
        fprintf(stderr, "Done. Processed %d iteration(s).\n", config.count);

    return 0;
}

Key conventions this follows:

  • Diagnostic messages go to stderr, data to stdout
  • Works as a filter (reads stdin when no files given)
  • Continues on error (does not abort for one bad file)
  • Documents environment variables in --help
  • Uses exit code 0 for success, nonzero for failure

Try It: Write the Rust equivalent of wellbehaved.c using clap and std::env. Make it read from stdin when no files are given, using std::io::stdin().

Driver Prep: Kernel modules receive configuration through module parameters (module_param() macro) and device tree entries, not environment variables. But user-space tools that load, configure, and test drivers rely heavily on environment and command-line patterns. Tools like modprobe read /etc/modprobe.d/ for configuration.

Knowledge Check

  1. What is the order of precedence when a program checks compiled-in defaults, environment variables, and command-line arguments?

  2. Why is putenv() dangerous compared to setenv()?

  3. What does a leading colon or dot in PATH mean, and why is it a security risk?

Common Pitfalls

  • Not checking getenv() return value: It returns NULL if the variable is not set. Passing NULL to strcmp() or printf("%s", ...) is undefined behavior.

  • Modifying the string returned by getenv(): The returned pointer may point into the environment block. Modifying it has undefined behavior. Copy it first.

  • Thread-unsafe environment access: setenv() and getenv() are not thread-safe. Read everything you need at startup.

  • Hardcoding paths: Use environment variables (HOME, XDG_CONFIG_HOME) or /etc conventions. Never assume a home directory path.

  • Ignoring stdin: Unix tools that accept files should also work as filters. If no files are given, read from stdin.

  • Error messages to stdout: Diagnostic output must go to stderr so it does not corrupt piped data.