Each group's on-disk state (CLAUDE.md, .claude-shared/, agent-runner-src/) is now initialized exactly once at group creation and owned by the group forever after. Spawn does only mounts — no copies, no settings.json overwrites, no skill clobbers, no source resyncs. Global memory composition switches from "host reads /workspace/global/CLAUDE.md at bootstrap and stuffs it into systemPrompt.append" to "group CLAUDE.md imports it via @/workspace/global/CLAUDE.md at the top." Edits to global propagate instantly through the existing read-only mount; no copy, no restart. - src/group-init.ts: new initGroupFilesystem(group, opts?) — idempotent, populates groups/<folder>/, .claude-shared/, agent-runner-src/ only when paths don't already exist. - src/container-runner.ts: buildMounts() calls init defensively at the top (catches existing groups on first spawn after this change), drops the inline settings.json write, skills cpSync loop, and agent-runner-src rm-then-copy. Just mounts now. - src/delivery.ts: create_agent flow uses initGroupFilesystem with optional instructions, replacing the inline mkdirSync + writeFileSync. - container/agent-runner/src/index.ts: drops GLOBAL_CLAUDE_MD reading. systemContext.instructions is now only the runtime-generated destinations addendum. - scripts/migrate-group-claude-md.ts: one-shot migration that prepends the @-import to existing groups' CLAUDE.md. Skips if global doesn't exist or if the @-import is already present (regex match on the @ form to avoid false positives from prose mentions of the path). - groups/main/CLAUDE.md: prepended by the migration. Existing groups need a one-time wipe of their agent-runner-src/ dir so init re-populates from current host source — done locally before this commit. Future host-side updates to container/skills/ or container/agent-runner/src/ won't auto-propagate; that's the trade-off for unconditional persistence and will be covered by host-mediated refresh tools in a follow-up. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
364 lines
14 KiB
TypeScript
364 lines
14 KiB
TypeScript
/**
|
|
* Container Runner v2
|
|
* Spawns agent containers with session folder + agent group folder mounts.
|
|
* The container runs the v2 agent-runner which polls the session DB.
|
|
*/
|
|
import { ChildProcess, execSync, spawn } from 'child_process';
|
|
import fs from 'fs';
|
|
import path from 'path';
|
|
|
|
import { OneCLI } from '@onecli-sh/sdk';
|
|
|
|
import { CONTAINER_IMAGE, DATA_DIR, GROUPS_DIR, IDLE_TIMEOUT, ONECLI_URL, TIMEZONE } from './config.js';
|
|
import { CONTAINER_RUNTIME_BIN, hostGatewayArgs, readonlyMountArgs, stopContainer } from './container-runtime.js';
|
|
import { getAgentGroup } from './db/agent-groups.js';
|
|
import { getMessagingGroup } from './db/messaging-groups.js';
|
|
import { initGroupFilesystem } from './group-init.js';
|
|
import { log } from './log.js';
|
|
import { validateAdditionalMounts } from './mount-security.js';
|
|
import {
|
|
markContainerIdle,
|
|
markContainerRunning,
|
|
markContainerStopped,
|
|
sessionDir,
|
|
writeDestinations,
|
|
writeSessionRouting,
|
|
} from './session-manager.js';
|
|
import type { AgentGroup, Session } from './types.js';
|
|
|
|
const onecli = new OneCLI({ url: ONECLI_URL });
|
|
|
|
interface VolumeMount {
|
|
hostPath: string;
|
|
containerPath: string;
|
|
readonly: boolean;
|
|
}
|
|
|
|
/** Active containers tracked by session ID. */
|
|
const activeContainers = new Map<string, { process: ChildProcess; containerName: string }>();
|
|
|
|
/**
|
|
* In-flight wake promises, keyed by session id. Deduplicates concurrent
|
|
* `wakeContainer` calls while the first spawn is still mid-setup (async
|
|
* buildContainerArgs, OneCLI gateway apply, etc.) — otherwise a second
|
|
* wake in that window passes the `activeContainers.has` check and spawns
|
|
* a duplicate container against the same session directory, producing
|
|
* racy double-replies.
|
|
*/
|
|
const wakePromises = new Map<string, Promise<void>>();
|
|
|
|
export function getActiveContainerCount(): number {
|
|
return activeContainers.size;
|
|
}
|
|
|
|
export function isContainerRunning(sessionId: string): boolean {
|
|
return activeContainers.has(sessionId);
|
|
}
|
|
|
|
/**
|
|
* Wake up a container for a session. If already running or mid-spawn, no-op
|
|
* (the in-flight wake promise is reused).
|
|
*
|
|
* The container runs the v2 agent-runner which polls the session DB.
|
|
*/
|
|
export function wakeContainer(session: Session): Promise<void> {
|
|
if (activeContainers.has(session.id)) {
|
|
log.debug('Container already running', { sessionId: session.id });
|
|
return Promise.resolve();
|
|
}
|
|
const existing = wakePromises.get(session.id);
|
|
if (existing) {
|
|
log.debug('Container wake already in-flight — joining existing promise', { sessionId: session.id });
|
|
return existing;
|
|
}
|
|
const promise = spawnContainer(session).finally(() => {
|
|
wakePromises.delete(session.id);
|
|
});
|
|
wakePromises.set(session.id, promise);
|
|
return promise;
|
|
}
|
|
|
|
async function spawnContainer(session: Session): Promise<void> {
|
|
const agentGroup = getAgentGroup(session.agent_group_id);
|
|
if (!agentGroup) {
|
|
log.error('Agent group not found', { agentGroupId: session.agent_group_id });
|
|
return;
|
|
}
|
|
|
|
// Refresh the destination map and default reply routing so any admin
|
|
// changes take effect on wake.
|
|
writeDestinations(agentGroup.id, session.id);
|
|
writeSessionRouting(agentGroup.id, session.id);
|
|
|
|
const mounts = buildMounts(agentGroup, session);
|
|
const containerName = `nanoclaw-v2-${agentGroup.folder}-${Date.now()}`;
|
|
// OneCLI agent identifier is the agent group id. The admin group uses OneCLI's
|
|
// default agent (undefined), so unscoped credentials apply. Non-admin groups
|
|
// use their stable ag-xxx id, which is reversible via getAgentGroup() for
|
|
// approval-request routing.
|
|
const agentIdentifier = agentGroup.is_admin ? undefined : agentGroup.id;
|
|
const args = await buildContainerArgs(mounts, containerName, session, agentGroup, agentIdentifier);
|
|
|
|
log.info('Spawning container', { sessionId: session.id, agentGroup: agentGroup.name, containerName });
|
|
|
|
const container = spawn(CONTAINER_RUNTIME_BIN, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
|
|
activeContainers.set(session.id, { process: container, containerName });
|
|
markContainerRunning(session.id);
|
|
|
|
// Log stderr
|
|
container.stderr?.on('data', (data) => {
|
|
for (const line of data.toString().trim().split('\n')) {
|
|
if (line) log.debug(line, { container: agentGroup.folder });
|
|
}
|
|
});
|
|
|
|
// stdout is unused in v2 (all IO is via session DB)
|
|
container.stdout?.on('data', () => {});
|
|
|
|
// Idle timeout: kill container after IDLE_TIMEOUT of no activity
|
|
let idleTimer = setTimeout(() => killContainer(session.id, 'idle timeout'), IDLE_TIMEOUT);
|
|
|
|
const resetIdle = () => {
|
|
clearTimeout(idleTimer);
|
|
idleTimer = setTimeout(() => killContainer(session.id, 'idle timeout'), IDLE_TIMEOUT);
|
|
};
|
|
|
|
// Reset idle timer when the host detects new messages_out (called by delivery.ts)
|
|
const entry = activeContainers.get(session.id);
|
|
if (entry) {
|
|
(entry as { resetIdle?: () => void }).resetIdle = resetIdle;
|
|
}
|
|
|
|
container.on('close', (code) => {
|
|
clearTimeout(idleTimer);
|
|
activeContainers.delete(session.id);
|
|
markContainerStopped(session.id);
|
|
log.info('Container exited', { sessionId: session.id, code, containerName });
|
|
});
|
|
|
|
container.on('error', (err) => {
|
|
clearTimeout(idleTimer);
|
|
activeContainers.delete(session.id);
|
|
markContainerStopped(session.id);
|
|
log.error('Container spawn error', { sessionId: session.id, err });
|
|
});
|
|
}
|
|
|
|
/** Reset the idle timer for a session's container (called when messages_out are delivered). */
|
|
export function resetContainerIdleTimer(sessionId: string): void {
|
|
const entry = activeContainers.get(sessionId) as { resetIdle?: () => void } | undefined;
|
|
entry?.resetIdle?.();
|
|
}
|
|
|
|
/** Kill a container for a session. */
|
|
export function killContainer(sessionId: string, reason: string): void {
|
|
const entry = activeContainers.get(sessionId);
|
|
if (!entry) return;
|
|
|
|
log.info('Killing container', { sessionId, reason, containerName: entry.containerName });
|
|
try {
|
|
stopContainer(entry.containerName);
|
|
} catch {
|
|
entry.process.kill('SIGKILL');
|
|
}
|
|
}
|
|
|
|
function buildMounts(agentGroup: AgentGroup, session: Session): VolumeMount[] {
|
|
// Per-group filesystem state lives forever after first creation. Init is
|
|
// idempotent: it only writes paths that don't already exist, so this call
|
|
// is a no-op for groups that have spawned before. Pulling in upstream
|
|
// built-in skill or agent-runner source updates is an explicit operation
|
|
// (host-mediated tools), not something the spawn path does silently.
|
|
initGroupFilesystem(agentGroup);
|
|
|
|
const mounts: VolumeMount[] = [];
|
|
const projectRoot = process.cwd();
|
|
const sessDir = sessionDir(agentGroup.id, session.id);
|
|
const groupDir = path.resolve(GROUPS_DIR, agentGroup.folder);
|
|
|
|
// Session folder at /workspace (contains inbound.db, outbound.db, outbox/, .claude/)
|
|
mounts.push({ hostPath: sessDir, containerPath: '/workspace', readonly: false });
|
|
|
|
// Agent group folder at /workspace/agent
|
|
mounts.push({ hostPath: groupDir, containerPath: '/workspace/agent', readonly: false });
|
|
|
|
// Global memory directory — read-only for non-admin so the @import
|
|
// in each group's CLAUDE.md can resolve it without risk of being
|
|
// overwritten by an agent in some other group.
|
|
const globalDir = path.join(GROUPS_DIR, 'global');
|
|
if (fs.existsSync(globalDir)) {
|
|
mounts.push({ hostPath: globalDir, containerPath: '/workspace/global', readonly: !agentGroup.is_admin });
|
|
}
|
|
|
|
// Per-group .claude-shared at /home/node/.claude (Claude state, settings,
|
|
// skills — initialized once at group creation, persistent thereafter)
|
|
const claudeDir = path.join(DATA_DIR, 'v2-sessions', agentGroup.id, '.claude-shared');
|
|
mounts.push({ hostPath: claudeDir, containerPath: '/home/node/.claude', readonly: false });
|
|
|
|
// Per-group agent-runner source at /app/src (initialized once at group
|
|
// creation, persistent thereafter — agents can modify their runner)
|
|
const groupRunnerDir = path.join(DATA_DIR, 'v2-sessions', agentGroup.id, 'agent-runner-src');
|
|
mounts.push({ hostPath: groupRunnerDir, containerPath: '/app/src', readonly: false });
|
|
|
|
// Admin: mount project root read-only
|
|
if (agentGroup.is_admin) {
|
|
mounts.push({ hostPath: projectRoot, containerPath: '/workspace/project', readonly: true });
|
|
const envFile = path.join(projectRoot, '.env');
|
|
if (fs.existsSync(envFile)) {
|
|
mounts.push({ hostPath: '/dev/null', containerPath: '/workspace/project/.env', readonly: true });
|
|
}
|
|
}
|
|
|
|
// Additional mounts from container config
|
|
const containerConfig = agentGroup.container_config ? JSON.parse(agentGroup.container_config) : {};
|
|
if (containerConfig.additionalMounts) {
|
|
const validated = validateAdditionalMounts(
|
|
containerConfig.additionalMounts,
|
|
agentGroup.name,
|
|
!!agentGroup.is_admin,
|
|
);
|
|
mounts.push(...validated);
|
|
}
|
|
|
|
return mounts;
|
|
}
|
|
|
|
async function buildContainerArgs(
|
|
mounts: VolumeMount[],
|
|
containerName: string,
|
|
session: Session,
|
|
agentGroup: AgentGroup,
|
|
agentIdentifier?: string,
|
|
): Promise<string[]> {
|
|
const args: string[] = ['run', '--rm', '--name', containerName];
|
|
|
|
// Environment
|
|
args.push('-e', `TZ=${TIMEZONE}`);
|
|
args.push('-e', `AGENT_PROVIDER=${session.agent_provider || agentGroup.agent_provider || 'claude'}`);
|
|
// Two-DB split: container reads inbound.db, writes outbound.db
|
|
args.push('-e', 'SESSION_INBOUND_DB_PATH=/workspace/inbound.db');
|
|
args.push('-e', 'SESSION_OUTBOUND_DB_PATH=/workspace/outbound.db');
|
|
args.push('-e', 'SESSION_HEARTBEAT_PATH=/workspace/.heartbeat');
|
|
|
|
// Pass admin user ID and assistant name from messaging group/agent group
|
|
if (session.messaging_group_id) {
|
|
const mg = getMessagingGroup(session.messaging_group_id);
|
|
if (mg?.admin_user_id) {
|
|
args.push('-e', `NANOCLAW_ADMIN_USER_ID=${mg.admin_user_id}`);
|
|
}
|
|
}
|
|
if (agentGroup.name) {
|
|
args.push('-e', `NANOCLAW_ASSISTANT_NAME=${agentGroup.name}`);
|
|
}
|
|
args.push('-e', `NANOCLAW_AGENT_GROUP_ID=${agentGroup.id}`);
|
|
args.push('-e', `NANOCLAW_AGENT_GROUP_NAME=${agentGroup.name}`);
|
|
args.push('-e', `NANOCLAW_IS_ADMIN=${agentGroup.is_admin ? '1' : '0'}`);
|
|
|
|
// OneCLI gateway — injects HTTPS_PROXY + certs so container API calls
|
|
// are routed through the agent vault for credential injection.
|
|
// Must ensureAgent first for non-admin groups, otherwise applyContainerConfig
|
|
// rejects the unknown agent identifier and returns false.
|
|
try {
|
|
if (agentIdentifier) {
|
|
await onecli.ensureAgent({ name: agentGroup.name, identifier: agentIdentifier });
|
|
}
|
|
const onecliApplied = await onecli.applyContainerConfig(args, { addHostMapping: false, agent: agentIdentifier });
|
|
if (onecliApplied) {
|
|
log.info('OneCLI gateway applied', { containerName });
|
|
} else {
|
|
log.warn('OneCLI gateway not applied — container will have no credentials', { containerName });
|
|
}
|
|
} catch (err) {
|
|
log.warn('OneCLI gateway error — container will have no credentials', { containerName, err });
|
|
}
|
|
|
|
// Host gateway
|
|
args.push(...hostGatewayArgs());
|
|
|
|
// User mapping
|
|
const hostUid = process.getuid?.();
|
|
const hostGid = process.getgid?.();
|
|
if (hostUid != null && hostUid !== 0 && hostUid !== 1000) {
|
|
args.push('--user', `${hostUid}:${hostGid}`);
|
|
args.push('-e', 'HOME=/home/node');
|
|
}
|
|
|
|
// Volume mounts
|
|
for (const mount of mounts) {
|
|
if (mount.readonly) {
|
|
args.push(...readonlyMountArgs(mount.hostPath, mount.containerPath));
|
|
} else {
|
|
args.push('-v', `${mount.hostPath}:${mount.containerPath}`);
|
|
}
|
|
}
|
|
|
|
// Pass additional MCP servers from container config
|
|
const containerConfig = agentGroup.container_config ? JSON.parse(agentGroup.container_config) : {};
|
|
if (containerConfig.mcpServers && Object.keys(containerConfig.mcpServers).length > 0) {
|
|
args.push('-e', `NANOCLAW_MCP_SERVERS=${JSON.stringify(containerConfig.mcpServers)}`);
|
|
}
|
|
|
|
// Override entrypoint: compile agent-runner source, run v2 entry point (no stdin)
|
|
args.push('--entrypoint', 'bash');
|
|
|
|
// Use per-agent-group image if one has been built, otherwise base image
|
|
const imageTag = containerConfig.imageTag || CONTAINER_IMAGE;
|
|
args.push(imageTag);
|
|
|
|
args.push(
|
|
'-c',
|
|
'cd /app && npx tsc --outDir /tmp/dist 2>&1 >&2 && ln -sf /app/node_modules /tmp/dist/node_modules && node /tmp/dist/index.js',
|
|
);
|
|
|
|
return args;
|
|
}
|
|
|
|
/** Build a per-agent-group Docker image with custom packages. */
|
|
export async function buildAgentGroupImage(agentGroupId: string): Promise<void> {
|
|
const agentGroup = getAgentGroup(agentGroupId);
|
|
if (!agentGroup) throw new Error('Agent group not found');
|
|
|
|
const containerConfig = agentGroup.container_config ? JSON.parse(agentGroup.container_config) : {};
|
|
const packages = containerConfig.packages || { apt: [], npm: [] };
|
|
const aptPackages = (packages.apt || []) as string[];
|
|
const npmPackages = (packages.npm || []) as string[];
|
|
|
|
if (aptPackages.length === 0 && npmPackages.length === 0) {
|
|
throw new Error('No packages to install. Use install_packages first.');
|
|
}
|
|
|
|
let dockerfile = `FROM ${CONTAINER_IMAGE}\nUSER root\n`;
|
|
if (aptPackages.length > 0) {
|
|
dockerfile += `RUN apt-get update && apt-get install -y ${aptPackages.join(' ')} && rm -rf /var/lib/apt/lists/*\n`;
|
|
}
|
|
if (npmPackages.length > 0) {
|
|
dockerfile += `RUN npm install -g ${npmPackages.join(' ')}\n`;
|
|
}
|
|
dockerfile += 'USER node\n';
|
|
|
|
const imageTag = `nanoclaw-agent:${agentGroupId}`;
|
|
|
|
log.info('Building per-agent-group image', { agentGroupId, imageTag, apt: aptPackages, npm: npmPackages });
|
|
|
|
// Write Dockerfile to temp file and build
|
|
const tmpDockerfile = path.join(DATA_DIR, `Dockerfile.${agentGroupId}`);
|
|
fs.writeFileSync(tmpDockerfile, dockerfile);
|
|
try {
|
|
execSync(`${CONTAINER_RUNTIME_BIN} build -t ${imageTag} -f ${tmpDockerfile} .`, {
|
|
cwd: DATA_DIR,
|
|
stdio: 'pipe',
|
|
timeout: 300_000,
|
|
});
|
|
} finally {
|
|
fs.unlinkSync(tmpDockerfile);
|
|
}
|
|
|
|
// Store the image tag in container_config
|
|
containerConfig.imageTag = imageTag;
|
|
const { updateAgentGroup } = await import('./db/agent-groups.js');
|
|
updateAgentGroup(agentGroupId, { container_config: JSON.stringify(containerConfig) });
|
|
|
|
log.info('Per-agent-group image built', { agentGroupId, imageTag });
|
|
}
|