/** * Container Runner v2 * Spawns agent containers with session folder + agent group folder mounts. * The container runs the v2 agent-runner which polls the session DB. */ import { ChildProcess, execSync, spawn } from 'child_process'; import fs from 'fs'; import path from 'path'; import { OneCLI } from '@onecli-sh/sdk'; import { CONTAINER_IMAGE, DATA_DIR, GROUPS_DIR, IDLE_TIMEOUT, ONECLI_URL, TIMEZONE } from './config.js'; import { CONTAINER_RUNTIME_BIN, hostGatewayArgs, readonlyMountArgs, stopContainer } from './container-runtime.js'; import { getAgentGroup } from './db/agent-groups.js'; import { getMessagingGroup } from './db/messaging-groups.js'; import { initGroupFilesystem } from './group-init.js'; import { log } from './log.js'; import { validateAdditionalMounts } from './mount-security.js'; import { markContainerIdle, markContainerRunning, markContainerStopped, sessionDir, writeDestinations, writeSessionRouting, } from './session-manager.js'; import type { AgentGroup, Session } from './types.js'; const onecli = new OneCLI({ url: ONECLI_URL }); interface VolumeMount { hostPath: string; containerPath: string; readonly: boolean; } /** Active containers tracked by session ID. */ const activeContainers = new Map(); /** * In-flight wake promises, keyed by session id. Deduplicates concurrent * `wakeContainer` calls while the first spawn is still mid-setup (async * buildContainerArgs, OneCLI gateway apply, etc.) — otherwise a second * wake in that window passes the `activeContainers.has` check and spawns * a duplicate container against the same session directory, producing * racy double-replies. */ const wakePromises = new Map>(); export function getActiveContainerCount(): number { return activeContainers.size; } export function isContainerRunning(sessionId: string): boolean { return activeContainers.has(sessionId); } /** * Wake up a container for a session. If already running or mid-spawn, no-op * (the in-flight wake promise is reused). * * The container runs the v2 agent-runner which polls the session DB. */ export function wakeContainer(session: Session): Promise { if (activeContainers.has(session.id)) { log.debug('Container already running', { sessionId: session.id }); return Promise.resolve(); } const existing = wakePromises.get(session.id); if (existing) { log.debug('Container wake already in-flight — joining existing promise', { sessionId: session.id }); return existing; } const promise = spawnContainer(session).finally(() => { wakePromises.delete(session.id); }); wakePromises.set(session.id, promise); return promise; } async function spawnContainer(session: Session): Promise { const agentGroup = getAgentGroup(session.agent_group_id); if (!agentGroup) { log.error('Agent group not found', { agentGroupId: session.agent_group_id }); return; } // Refresh the destination map and default reply routing so any admin // changes take effect on wake. writeDestinations(agentGroup.id, session.id); writeSessionRouting(agentGroup.id, session.id); const mounts = buildMounts(agentGroup, session); const containerName = `nanoclaw-v2-${agentGroup.folder}-${Date.now()}`; // OneCLI agent identifier is the agent group id. The admin group uses OneCLI's // default agent (undefined), so unscoped credentials apply. Non-admin groups // use their stable ag-xxx id, which is reversible via getAgentGroup() for // approval-request routing. const agentIdentifier = agentGroup.is_admin ? undefined : agentGroup.id; const args = await buildContainerArgs(mounts, containerName, session, agentGroup, agentIdentifier); log.info('Spawning container', { sessionId: session.id, agentGroup: agentGroup.name, containerName }); const container = spawn(CONTAINER_RUNTIME_BIN, args, { stdio: ['ignore', 'pipe', 'pipe'] }); activeContainers.set(session.id, { process: container, containerName }); markContainerRunning(session.id); // Log stderr container.stderr?.on('data', (data) => { for (const line of data.toString().trim().split('\n')) { if (line) log.debug(line, { container: agentGroup.folder }); } }); // stdout is unused in v2 (all IO is via session DB) container.stdout?.on('data', () => {}); // Idle timeout: kill container after IDLE_TIMEOUT of no activity let idleTimer = setTimeout(() => killContainer(session.id, 'idle timeout'), IDLE_TIMEOUT); const resetIdle = () => { clearTimeout(idleTimer); idleTimer = setTimeout(() => killContainer(session.id, 'idle timeout'), IDLE_TIMEOUT); }; // Reset idle timer when the host detects new messages_out (called by delivery.ts) const entry = activeContainers.get(session.id); if (entry) { (entry as { resetIdle?: () => void }).resetIdle = resetIdle; } container.on('close', (code) => { clearTimeout(idleTimer); activeContainers.delete(session.id); markContainerStopped(session.id); log.info('Container exited', { sessionId: session.id, code, containerName }); }); container.on('error', (err) => { clearTimeout(idleTimer); activeContainers.delete(session.id); markContainerStopped(session.id); log.error('Container spawn error', { sessionId: session.id, err }); }); } /** Reset the idle timer for a session's container (called when messages_out are delivered). */ export function resetContainerIdleTimer(sessionId: string): void { const entry = activeContainers.get(sessionId) as { resetIdle?: () => void } | undefined; entry?.resetIdle?.(); } /** Kill a container for a session. */ export function killContainer(sessionId: string, reason: string): void { const entry = activeContainers.get(sessionId); if (!entry) return; log.info('Killing container', { sessionId, reason, containerName: entry.containerName }); try { stopContainer(entry.containerName); } catch { entry.process.kill('SIGKILL'); } } function buildMounts(agentGroup: AgentGroup, session: Session): VolumeMount[] { // Per-group filesystem state lives forever after first creation. Init is // idempotent: it only writes paths that don't already exist, so this call // is a no-op for groups that have spawned before. Pulling in upstream // built-in skill or agent-runner source updates is an explicit operation // (host-mediated tools), not something the spawn path does silently. initGroupFilesystem(agentGroup); const mounts: VolumeMount[] = []; const projectRoot = process.cwd(); const sessDir = sessionDir(agentGroup.id, session.id); const groupDir = path.resolve(GROUPS_DIR, agentGroup.folder); // Session folder at /workspace (contains inbound.db, outbound.db, outbox/, .claude/) mounts.push({ hostPath: sessDir, containerPath: '/workspace', readonly: false }); // Agent group folder at /workspace/agent mounts.push({ hostPath: groupDir, containerPath: '/workspace/agent', readonly: false }); // Global memory directory — read-only for non-admin so the @import // in each group's CLAUDE.md can resolve it without risk of being // overwritten by an agent in some other group. const globalDir = path.join(GROUPS_DIR, 'global'); if (fs.existsSync(globalDir)) { mounts.push({ hostPath: globalDir, containerPath: '/workspace/global', readonly: !agentGroup.is_admin }); } // Per-group .claude-shared at /home/node/.claude (Claude state, settings, // skills — initialized once at group creation, persistent thereafter) const claudeDir = path.join(DATA_DIR, 'v2-sessions', agentGroup.id, '.claude-shared'); mounts.push({ hostPath: claudeDir, containerPath: '/home/node/.claude', readonly: false }); // Per-group agent-runner source at /app/src (initialized once at group // creation, persistent thereafter — agents can modify their runner) const groupRunnerDir = path.join(DATA_DIR, 'v2-sessions', agentGroup.id, 'agent-runner-src'); mounts.push({ hostPath: groupRunnerDir, containerPath: '/app/src', readonly: false }); // Admin: mount project root read-only if (agentGroup.is_admin) { mounts.push({ hostPath: projectRoot, containerPath: '/workspace/project', readonly: true }); const envFile = path.join(projectRoot, '.env'); if (fs.existsSync(envFile)) { mounts.push({ hostPath: '/dev/null', containerPath: '/workspace/project/.env', readonly: true }); } } // Additional mounts from container config const containerConfig = agentGroup.container_config ? JSON.parse(agentGroup.container_config) : {}; if (containerConfig.additionalMounts) { const validated = validateAdditionalMounts( containerConfig.additionalMounts, agentGroup.name, !!agentGroup.is_admin, ); mounts.push(...validated); } return mounts; } async function buildContainerArgs( mounts: VolumeMount[], containerName: string, session: Session, agentGroup: AgentGroup, agentIdentifier?: string, ): Promise { const args: string[] = ['run', '--rm', '--name', containerName]; // Environment args.push('-e', `TZ=${TIMEZONE}`); args.push('-e', `AGENT_PROVIDER=${session.agent_provider || agentGroup.agent_provider || 'claude'}`); // Two-DB split: container reads inbound.db, writes outbound.db args.push('-e', 'SESSION_INBOUND_DB_PATH=/workspace/inbound.db'); args.push('-e', 'SESSION_OUTBOUND_DB_PATH=/workspace/outbound.db'); args.push('-e', 'SESSION_HEARTBEAT_PATH=/workspace/.heartbeat'); // Pass admin user ID and assistant name from messaging group/agent group if (session.messaging_group_id) { const mg = getMessagingGroup(session.messaging_group_id); if (mg?.admin_user_id) { args.push('-e', `NANOCLAW_ADMIN_USER_ID=${mg.admin_user_id}`); } } if (agentGroup.name) { args.push('-e', `NANOCLAW_ASSISTANT_NAME=${agentGroup.name}`); } args.push('-e', `NANOCLAW_AGENT_GROUP_ID=${agentGroup.id}`); args.push('-e', `NANOCLAW_AGENT_GROUP_NAME=${agentGroup.name}`); args.push('-e', `NANOCLAW_IS_ADMIN=${agentGroup.is_admin ? '1' : '0'}`); // OneCLI gateway — injects HTTPS_PROXY + certs so container API calls // are routed through the agent vault for credential injection. // Must ensureAgent first for non-admin groups, otherwise applyContainerConfig // rejects the unknown agent identifier and returns false. try { if (agentIdentifier) { await onecli.ensureAgent({ name: agentGroup.name, identifier: agentIdentifier }); } const onecliApplied = await onecli.applyContainerConfig(args, { addHostMapping: false, agent: agentIdentifier }); if (onecliApplied) { log.info('OneCLI gateway applied', { containerName }); } else { log.warn('OneCLI gateway not applied — container will have no credentials', { containerName }); } } catch (err) { log.warn('OneCLI gateway error — container will have no credentials', { containerName, err }); } // Host gateway args.push(...hostGatewayArgs()); // User mapping const hostUid = process.getuid?.(); const hostGid = process.getgid?.(); if (hostUid != null && hostUid !== 0 && hostUid !== 1000) { args.push('--user', `${hostUid}:${hostGid}`); args.push('-e', 'HOME=/home/node'); } // Volume mounts for (const mount of mounts) { if (mount.readonly) { args.push(...readonlyMountArgs(mount.hostPath, mount.containerPath)); } else { args.push('-v', `${mount.hostPath}:${mount.containerPath}`); } } // Pass additional MCP servers from container config const containerConfig = agentGroup.container_config ? JSON.parse(agentGroup.container_config) : {}; if (containerConfig.mcpServers && Object.keys(containerConfig.mcpServers).length > 0) { args.push('-e', `NANOCLAW_MCP_SERVERS=${JSON.stringify(containerConfig.mcpServers)}`); } // Override entrypoint: compile agent-runner source, run v2 entry point (no stdin) args.push('--entrypoint', 'bash'); // Use per-agent-group image if one has been built, otherwise base image const imageTag = containerConfig.imageTag || CONTAINER_IMAGE; args.push(imageTag); args.push( '-c', 'cd /app && npx tsc --outDir /tmp/dist 2>&1 >&2 && ln -sf /app/node_modules /tmp/dist/node_modules && node /tmp/dist/index.js', ); return args; } /** Build a per-agent-group Docker image with custom packages. */ export async function buildAgentGroupImage(agentGroupId: string): Promise { const agentGroup = getAgentGroup(agentGroupId); if (!agentGroup) throw new Error('Agent group not found'); const containerConfig = agentGroup.container_config ? JSON.parse(agentGroup.container_config) : {}; const packages = containerConfig.packages || { apt: [], npm: [] }; const aptPackages = (packages.apt || []) as string[]; const npmPackages = (packages.npm || []) as string[]; if (aptPackages.length === 0 && npmPackages.length === 0) { throw new Error('No packages to install. Use install_packages first.'); } let dockerfile = `FROM ${CONTAINER_IMAGE}\nUSER root\n`; if (aptPackages.length > 0) { dockerfile += `RUN apt-get update && apt-get install -y ${aptPackages.join(' ')} && rm -rf /var/lib/apt/lists/*\n`; } if (npmPackages.length > 0) { dockerfile += `RUN npm install -g ${npmPackages.join(' ')}\n`; } dockerfile += 'USER node\n'; const imageTag = `nanoclaw-agent:${agentGroupId}`; log.info('Building per-agent-group image', { agentGroupId, imageTag, apt: aptPackages, npm: npmPackages }); // Write Dockerfile to temp file and build const tmpDockerfile = path.join(DATA_DIR, `Dockerfile.${agentGroupId}`); fs.writeFileSync(tmpDockerfile, dockerfile); try { execSync(`${CONTAINER_RUNTIME_BIN} build -t ${imageTag} -f ${tmpDockerfile} .`, { cwd: DATA_DIR, stdio: 'pipe', timeout: 300_000, }); } finally { fs.unlinkSync(tmpDockerfile); } // Store the image tag in container_config containerConfig.imageTag = imageTag; const { updateAgentGroup } = await import('./db/agent-groups.js'); updateAgentGroup(agentGroupId, { container_config: JSON.stringify(containerConfig) }); log.info('Per-agent-group image built', { agentGroupId, imageTag }); }