Environment and Configuration
Every Unix process inherits a block of key-value strings from its parent. This environment block controls program behavior without code changes. Understanding how it works -- and how to combine it with command-line arguments and configuration files -- is essential for writing well-behaved Unix tools.
The Environment Block
The kernel passes the environment to a new process on the stack, right after the
argument strings. Each entry is a KEY=VALUE string.
/* print_env.c */
#include <stdio.h>
extern char **environ; /* Global pointer to environment array */
int main(void)
{
for (char **ep = environ; *ep != NULL; ep++) {
printf("%s\n", *ep);
}
return 0;
}
$ gcc -o print_env print_env.c
$ ./print_env | head -5
SHELL=/bin/bash
HOME=/home/user
PATH=/usr/local/bin:/usr/bin:/bin
LANG=en_US.UTF-8
TERM=xterm-256color
The layout in memory:
Stack (high address)
+----------------------------+
| environment strings |
| "HOME=/home/user\0" |
| "PATH=/usr/bin:/bin\0" |
| ... |
+----------------------------+
| environ[0] -> "HOME=..." |
| environ[1] -> "PATH=..." |
| environ[N] -> NULL |
+----------------------------+
| argv strings |
| argv[0] -> "./print_env" |
| argv[1] -> NULL |
+----------------------------+
| argc = 1 |
+----------------------------+
(stack grows down)
Reading and Writing the Environment
/* env_ops.c */
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
/* Read */
const char *home = getenv("HOME");
if (home)
printf("HOME = %s\n", home);
else
printf("HOME not set\n");
/* Write -- adds or overwrites */
setenv("MY_APP_DEBUG", "1", 1); /* 1 = overwrite if exists */
printf("MY_APP_DEBUG = %s\n", getenv("MY_APP_DEBUG"));
/* Write without overwrite */
setenv("MY_APP_DEBUG", "2", 0); /* 0 = do not overwrite */
printf("MY_APP_DEBUG = %s\n", getenv("MY_APP_DEBUG")); /* Still "1" */
/* Remove */
unsetenv("MY_APP_DEBUG");
printf("After unsetenv: %s\n",
getenv("MY_APP_DEBUG") ? getenv("MY_APP_DEBUG") : "(null)");
return 0;
}
Caution:
putenv()inserts a pointer to your string directly into the environment. If that string is on the stack, it becomes a dangling pointer when the function returns. Prefersetenv(), which copies the string.
Caution: None of the environment functions are thread-safe. Calling
setenv()orgetenv()from multiple threads without synchronization is undefined behavior.
PATH Resolution and exec
When you call execlp() or execvp() (the "p" variants), the kernel searches
the directories listed in PATH for the binary.
/* path_search.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void)
{
const char *path = getenv("PATH");
if (!path) {
printf("PATH not set\n");
return 1;
}
printf("PATH directories:\n");
/* strtok modifies the string, so copy it */
char *copy = strdup(path);
char *dir = strtok(copy, ":");
int i = 0;
while (dir) {
printf(" [%d] %s\n", i++, dir);
dir = strtok(NULL, ":");
}
free(copy);
return 0;
}
The search order matters. If /usr/local/bin appears before /usr/bin, a
binary in /usr/local/bin shadows the system version.
Caution: A PATH that includes
.(current directory) or an empty component (like:/usr/bin-- note the leading colon) is a security risk. An attacker can place a malicious binary in the current directory.
Command-Line Parsing: getopt
getopt() is the traditional Unix way to parse command-line options.
/* getopt_demo.c */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
int main(int argc, char *argv[])
{
int verbose = 0;
int count = 1;
const char *output = NULL;
int opt;
while ((opt = getopt(argc, argv, "vc:o:")) != -1) {
switch (opt) {
case 'v':
verbose = 1;
break;
case 'c':
count = atoi(optarg);
break;
case 'o':
output = optarg;
break;
default:
fprintf(stderr, "Usage: %s [-v] [-c count] [-o output] [files...]\n",
argv[0]);
return 1;
}
}
printf("verbose=%d count=%d output=%s\n",
verbose, count, output ? output : "(none)");
/* Remaining arguments (non-option) */
for (int i = optind; i < argc; i++)
printf("arg: %s\n", argv[i]);
return 0;
}
$ ./getopt_demo -v -c 5 -o result.txt file1.txt file2.txt
verbose=1 count=5 output=result.txt
arg: file1.txt
arg: file2.txt
The option string "vc:o:" means: -v takes no argument, -c and -o each
require one (indicated by the colon).
Long Options: getopt_long
For modern tools, long options like --verbose are expected.
/* getopt_long_demo.c */
#include <stdio.h>
#include <stdlib.h>
#include <getopt.h>
int main(int argc, char *argv[])
{
int verbose = 0;
int count = 1;
const char *output = NULL;
static struct option long_options[] = {
{"verbose", no_argument, NULL, 'v'},
{"count", required_argument, NULL, 'c'},
{"output", required_argument, NULL, 'o'},
{"help", no_argument, NULL, 'h'},
{NULL, 0, NULL, 0 }
};
int opt;
while ((opt = getopt_long(argc, argv, "vc:o:h", long_options, NULL)) != -1) {
switch (opt) {
case 'v': verbose = 1; break;
case 'c': count = atoi(optarg); break;
case 'o': output = optarg; break;
case 'h':
printf("Usage: %s [--verbose] [--count N] [--output FILE]\n",
argv[0]);
return 0;
default:
return 1;
}
}
printf("verbose=%d count=%d output=%s\n",
verbose, count, output ? output : "(none)");
return 0;
}
$ ./getopt_long_demo --verbose --count 10 --output data.csv
verbose=1 count=10 output=data.csv
Rust: std::env
Rust's standard library provides safe environment access.
// env_demo.rs use std::env; fn main() { // Read match env::var("HOME") { Ok(val) => println!("HOME = {}", val), Err(_) => println!("HOME not set"), } // Set env::set_var("MY_APP_DEBUG", "1"); println!("MY_APP_DEBUG = {}", env::var("MY_APP_DEBUG").unwrap()); // Remove env::remove_var("MY_APP_DEBUG"); // Iterate all println!("\nAll environment variables:"); for (key, value) in env::vars() { println!(" {}={}", key, value); } // PATH directories if let Some(path) = env::var_os("PATH") { println!("\nPATH directories:"); for dir in env::split_paths(&path) { println!(" {}", dir.display()); } } }
Rust Note:
env::set_var()andenv::remove_var()are markedunsafein Rust 1.66+ when used in multi-threaded programs. The Rust team recognized the same thread-safety issue that plagues C'ssetenv(). Prefer reading environment at startup and storing values in your own data structures.
Rust: Command-Line Parsing with clap
The clap crate is the standard Rust approach to argument parsing.
// clap_demo.rs // Cargo.toml: // [dependencies] // clap = { version = "4", features = ["derive"] } use clap::Parser; /// A well-behaved Unix tool #[derive(Parser, Debug)] #[command(name = "mytool", version, about = "Does useful things")] struct Args { /// Enable verbose output #[arg(short, long)] verbose: bool, /// Number of iterations #[arg(short, long, default_value_t = 1)] count: u32, /// Output file path #[arg(short, long)] output: Option<String>, /// Input files files: Vec<String>, } fn main() { let args = Args::parse(); println!("verbose={} count={} output={:?}", args.verbose, args.count, args.output); for f in &args.files { println!("file: {}", f); } }
$ cargo run -- --verbose --count 5 -o result.txt input1.dat input2.dat
verbose=true count=5 output=Some("result.txt")
file: input1.dat
file: input2.dat
The --help flag auto-generates usage text from the struct annotations.
Rust Note:
clapwith derive macros generates the help text, validation, and parsing code at compile time. The C equivalent requires writing all of this by hand or using a library likeargp.
Configuration File Patterns
A well-behaved Unix tool checks configuration in this order (later overrides earlier):
1. Compiled-in defaults
2. System config: /etc/myapp/config
3. User config: ~/.config/myapp/config (XDG_CONFIG_HOME)
4. Environment: MYAPP_DEBUG=1
5. Command-line: --debug
A minimal config file parser in C:
/* config_parse.c */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX_LINE 256
struct config {
int port;
int verbose;
char logfile[256];
};
static void config_defaults(struct config *cfg)
{
cfg->port = 8080;
cfg->verbose = 0;
strncpy(cfg->logfile, "/var/log/myapp.log", sizeof(cfg->logfile) - 1);
}
static int config_load(struct config *cfg, const char *path)
{
FILE *f = fopen(path, "r");
if (!f) return -1;
char line[MAX_LINE];
while (fgets(line, sizeof(line), f)) {
/* Skip comments and empty lines */
if (line[0] == '#' || line[0] == '\n')
continue;
char key[128], value[128];
if (sscanf(line, "%127[^=]=%127[^\n]", key, value) == 2) {
if (strcmp(key, "port") == 0)
cfg->port = atoi(value);
else if (strcmp(key, "verbose") == 0)
cfg->verbose = atoi(value);
else if (strcmp(key, "logfile") == 0)
strncpy(cfg->logfile, value, sizeof(cfg->logfile) - 1);
}
}
fclose(f);
return 0;
}
int main(int argc, char *argv[])
{
struct config cfg;
config_defaults(&cfg);
/* Try system config, then user config */
config_load(&cfg, "/etc/myapp.conf");
char user_conf[512];
const char *home = getenv("HOME");
if (home) {
snprintf(user_conf, sizeof(user_conf), "%s/.myapp.conf", home);
config_load(&cfg, user_conf);
}
/* Environment overrides */
const char *env_port = getenv("MYAPP_PORT");
if (env_port) cfg.port = atoi(env_port);
printf("port=%d verbose=%d logfile=%s\n",
cfg.port, cfg.verbose, cfg.logfile);
return 0;
}
The /etc Convention
System-wide configuration lives under /etc. Per-application patterns:
| Path | Purpose |
|---|---|
/etc/myapp.conf | Single config file |
/etc/myapp/ | Config directory |
/etc/myapp/conf.d/ | Drop-in overrides (processed alphabetically) |
/etc/default/myapp | Default environment for init scripts |
Putting It Together: A Well-Behaved Unix Tool
Here is a complete C program that follows all conventions:
/* wellbehaved.c -- a well-behaved Unix tool */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <getopt.h>
#include <errno.h>
static struct {
int verbose;
int count;
const char *output;
} config = {
.verbose = 0,
.count = 1,
.output = NULL,
};
static void usage(const char *prog)
{
fprintf(stderr,
"Usage: %s [OPTIONS] [FILE...]\n"
"\n"
"Options:\n"
" -v, --verbose Enable verbose output\n"
" -c, --count=N Number of iterations (default: 1)\n"
" -o, --output=FILE Output file\n"
" -h, --help Show this help\n"
"\n"
"Environment:\n"
" WELLBEHAVED_VERBOSE Set to 1 for verbose mode\n"
" WELLBEHAVED_COUNT Default iteration count\n",
prog);
}
int main(int argc, char *argv[])
{
/* 1. Environment */
const char *env_v = getenv("WELLBEHAVED_VERBOSE");
if (env_v && strcmp(env_v, "1") == 0)
config.verbose = 1;
const char *env_c = getenv("WELLBEHAVED_COUNT");
if (env_c) config.count = atoi(env_c);
/* 2. Command line (overrides environment) */
static struct option long_opts[] = {
{"verbose", no_argument, NULL, 'v'},
{"count", required_argument, NULL, 'c'},
{"output", required_argument, NULL, 'o'},
{"help", no_argument, NULL, 'h'},
{NULL, 0, NULL, 0}
};
int opt;
while ((opt = getopt_long(argc, argv, "vc:o:h", long_opts, NULL)) != -1) {
switch (opt) {
case 'v': config.verbose = 1; break;
case 'c': config.count = atoi(optarg); break;
case 'o': config.output = optarg; break;
case 'h': usage(argv[0]); return 0;
default: usage(argv[0]); return 1;
}
}
/* 3. Act on stdin if no files given (Unix filter convention) */
if (optind >= argc) {
if (config.verbose)
fprintf(stderr, "Reading from stdin...\n");
/* Process stdin here */
}
/* 4. Process each file argument */
for (int i = optind; i < argc; i++) {
if (config.verbose)
fprintf(stderr, "Processing: %s\n", argv[i]);
FILE *f = fopen(argv[i], "r");
if (!f) {
fprintf(stderr, "%s: %s: %s\n", argv[0], argv[i], strerror(errno));
continue; /* Keep going -- do not abort on one bad file */
}
/* Process file here */
fclose(f);
}
/* 5. Diagnostic output to stderr, data output to stdout */
if (config.verbose)
fprintf(stderr, "Done. Processed %d iteration(s).\n", config.count);
return 0;
}
Key conventions this follows:
- Diagnostic messages go to stderr, data to stdout
- Works as a filter (reads stdin when no files given)
- Continues on error (does not abort for one bad file)
- Documents environment variables in
--help - Uses exit code 0 for success, nonzero for failure
Try It: Write the Rust equivalent of
wellbehaved.cusingclapandstd::env. Make it read from stdin when no files are given, usingstd::io::stdin().
Driver Prep: Kernel modules receive configuration through module parameters (
module_param()macro) and device tree entries, not environment variables. But user-space tools that load, configure, and test drivers rely heavily on environment and command-line patterns. Tools likemodproberead/etc/modprobe.d/for configuration.
Knowledge Check
-
What is the order of precedence when a program checks compiled-in defaults, environment variables, and command-line arguments?
-
Why is
putenv()dangerous compared tosetenv()? -
What does a leading colon or dot in
PATHmean, and why is it a security risk?
Common Pitfalls
-
Not checking getenv() return value: It returns
NULLif the variable is not set. PassingNULLtostrcmp()orprintf("%s", ...)is undefined behavior. -
Modifying the string returned by getenv(): The returned pointer may point into the environment block. Modifying it has undefined behavior. Copy it first.
-
Thread-unsafe environment access:
setenv()andgetenv()are not thread-safe. Read everything you need at startup. -
Hardcoding paths: Use environment variables (
HOME,XDG_CONFIG_HOME) or/etcconventions. Never assume a home directory path. -
Ignoring stdin: Unix tools that accept files should also work as filters. If no files are given, read from stdin.
-
Error messages to stdout: Diagnostic output must go to stderr so it does not corrupt piped data.