Files
nanoclaw/src/container-runtime.ts
Lazer Cohen 2383bde80f fix(container): scope orphan reaper by install label so peers don't kill each other
Two installs on the same host could trash each other's containers: the
reaper used `docker ps --filter name=nanoclaw-`, a substring match that
picked up every install's containers. A crash-looping peer (e.g. a legacy
v1 plist respawning ~6k times) would call cleanupOrphans on every boot and
kill the healthy install's session containers within seconds of spawn.

- Stamp `--label nanoclaw-install=<slug>` onto every spawned container.
- cleanupOrphans filters by that label; healthy peers are left alone.
- Setup preflight enumerates `com.nanoclaw*` launchd plists / nanoclaw
  user systemd units, probes state/runs, and unloads any that are
  crash-looping (state != running AND runs > 10) before installing
  this install's service.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 12:12:30 +03:00

91 lines
3.6 KiB
TypeScript

/**
* Container runtime abstraction for NanoClaw.
* All runtime-specific logic lives here so swapping runtimes means changing one file.
*/
import { execSync } from 'child_process';
import os from 'os';
import { CONTAINER_INSTALL_LABEL } from './config.js';
import { log } from './log.js';
/** The container runtime binary name. */
export const CONTAINER_RUNTIME_BIN = 'docker';
/** CLI args needed for the container to resolve the host gateway. */
export function hostGatewayArgs(): string[] {
// On Linux, host.docker.internal isn't built-in — add it explicitly
if (os.platform() === 'linux') {
return ['--add-host=host.docker.internal:host-gateway'];
}
return [];
}
/** Returns CLI args for a readonly bind mount. */
export function readonlyMountArgs(hostPath: string, containerPath: string): string[] {
return ['-v', `${hostPath}:${containerPath}:ro`];
}
/** Stop a container by name. Uses execFileSync to avoid shell injection. */
export function stopContainer(name: string): void {
if (!/^[a-zA-Z0-9][a-zA-Z0-9_.-]*$/.test(name)) {
throw new Error(`Invalid container name: ${name}`);
}
execSync(`${CONTAINER_RUNTIME_BIN} stop -t 1 ${name}`, { stdio: 'pipe' });
}
/** Ensure the container runtime is running, starting it if needed. */
export function ensureContainerRuntimeRunning(): void {
try {
execSync(`${CONTAINER_RUNTIME_BIN} info`, {
stdio: 'pipe',
timeout: 10000,
});
log.debug('Container runtime already running');
} catch (err) {
log.error('Failed to reach container runtime', { err });
console.error('\n╔════════════════════════════════════════════════════════════════╗');
console.error('║ FATAL: Container runtime failed to start ║');
console.error('║ ║');
console.error('║ Agents cannot run without a container runtime. To fix: ║');
console.error('║ 1. Ensure Docker is installed and running ║');
console.error('║ 2. Run: docker info ║');
console.error('║ 3. Restart NanoClaw ║');
console.error('╚════════════════════════════════════════════════════════════════╝\n');
throw new Error('Container runtime is required but failed to start', {
cause: err,
});
}
}
/**
* Kill orphaned NanoClaw containers from THIS install's previous runs.
*
* Scoped by label `nanoclaw-install=<slug>` so a crash-looping peer install
* cannot reap our containers, and we cannot reap theirs. The label is
* stamped onto every container at spawn time — see container-runner.ts.
*/
export function cleanupOrphans(): void {
try {
const output = execSync(
`${CONTAINER_RUNTIME_BIN} ps --filter label=${CONTAINER_INSTALL_LABEL} --format '{{.Names}}'`,
{
stdio: ['pipe', 'pipe', 'pipe'],
encoding: 'utf-8',
},
);
const orphans = output.trim().split('\n').filter(Boolean);
for (const name of orphans) {
try {
stopContainer(name);
} catch {
/* already stopped */
}
}
if (orphans.length > 0) {
log.info('Stopped orphaned containers', { count: orphans.length, names: orphans });
}
} catch (err) {
log.warn('Failed to clean up orphaned containers', { err });
}
}