refactor: shared source — replace per-group agent-runner copies with single RO mount

Replace the per-group agent-runner-src copy model with a single shared
read-only mount. Source and skills are now RO + shared; personality,
config, working files, and Claude state stay RW + per-group.

Key changes:
- Mount container/agent-runner/src/ RO at /app/src (all groups share one copy)
- Mount container/skills/ RO at /app/skills; per-group skill selection via
  symlinks in .claude-shared/skills/ based on container.json "skills" field
- Mount container.json as nested RO bind on top of RW group dir
- Move all NANOCLAW_* env vars to container.json (runner reads at startup)
- New runner config.ts module replaces process.env reads
- Move command gate (filtered/admin) from container to host router
- Dockerfile: remove source COPY, split CLI installs (claude-code last),
  move agent-runner deps above CLIs for better layer caching
- Add writeOutboundDirect for router denial responses
- Design doc at docs/shared-src.md

Not included (follow-up): DB migration to drop agent_provider columns,
cleanup of orphaned agent-runner-src directories.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
exe.dev user
2026-04-21 12:05:19 +00:00
committed by gavrielc
parent 596035be09
commit 8a12fa61ac
14 changed files with 715 additions and 249 deletions

70
src/command-gate.ts Normal file
View File

@@ -0,0 +1,70 @@
/**
* Host-side command gate. Classifies inbound slash commands and gates
* them before they reach the container.
*
* - Filtered commands: dropped silently (never reach the container)
* - Admin commands: checked against user_roles; denied senders get a
* "Permission denied" response written directly to messages_out
* - Normal messages: pass through unchanged
*/
import { getDb, hasTable } from './db/connection.js';
export type GateResult =
| { action: 'pass' }
| { action: 'filter' }
| { action: 'deny'; command: string };
const FILTERED_COMMANDS = new Set(['/help', '/login', '/logout', '/doctor', '/config', '/remote-control']);
const ADMIN_COMMANDS = new Set(['/clear', '/compact', '/context', '/cost', '/files']);
/**
* Classify a message and decide whether it should reach the container.
* Returns 'pass' for normal messages and authorized admin commands,
* 'filter' for silently-dropped commands, 'deny' for unauthorized
* admin commands.
*/
export function gateCommand(
content: string,
userId: string | null,
agentGroupId: string,
): GateResult {
let text: string;
try {
const parsed = JSON.parse(content);
text = (parsed.text || '').trim();
} catch {
text = content.trim();
}
if (!text.startsWith('/')) return { action: 'pass' };
const command = text.split(/\s/)[0].toLowerCase();
if (FILTERED_COMMANDS.has(command)) return { action: 'filter' };
if (ADMIN_COMMANDS.has(command)) {
if (isAdmin(userId, agentGroupId)) {
return { action: 'pass' };
}
return { action: 'deny', command };
}
// Unknown slash commands pass through (the agent/SDK handles them)
return { action: 'pass' };
}
function isAdmin(userId: string | null, agentGroupId: string): boolean {
if (!userId) return false;
if (!hasTable(getDb(), 'user_roles')) return true; // no permissions module = allow all
const db = getDb();
const row = db
.prepare(
`SELECT 1 FROM user_roles
WHERE user_id = ?
AND (role = 'owner' OR role = 'admin')
AND (agent_group_id IS NULL OR agent_group_id = ?)
LIMIT 1`,
)
.get(userId, agentGroupId);
return row != null;
}

View File

@@ -1,15 +1,8 @@
/**
* Per-group container config, stored as a plain JSON file at
* `groups/<folder>/container.json`. Replaces the former
* `agent_groups.container_config` DB column.
*
* Shape:
* {
* mcpServers: { [name]: { command, args, env } }
* packages: { apt: string[], npm: string[] }
* imageTag?: string // set by buildAgentGroupImage on rebuild
* additionalMounts?: Array<{hostPath, containerPath, readonly}>
* }
* `groups/<folder>/container.json`. Mounted read-only inside the container
* at `/workspace/agent/container.json` — the runner reads it at startup but
* cannot modify it. Config changes go through the self-mod approval flow.
*
* All fields are optional — a missing file or a partial file both resolve
* to sensible defaults. Writes are atomic-enough (write-then-rename is not
@@ -38,6 +31,18 @@ export interface ContainerConfig {
packages: { apt: string[]; npm: string[] };
imageTag?: string;
additionalMounts: AdditionalMountConfig[];
/** Which skills to enable — array of skill names or "all" (default). */
skills: string[] | 'all';
/** Agent provider name (e.g. "claude", "opencode"). Default: "claude". */
provider?: string;
/** Agent group display name (used in transcript archiving). */
groupName?: string;
/** Assistant display name (used in system prompt / responses). */
assistantName?: string;
/** Agent group ID — set by the host, read by the runner. */
agentGroupId?: string;
/** Max messages per prompt. Falls back to code default if unset. */
maxMessagesPerPrompt?: number;
}
function emptyConfig(): ContainerConfig {
@@ -45,6 +50,7 @@ function emptyConfig(): ContainerConfig {
mcpServers: {},
packages: { apt: [], npm: [] },
additionalMounts: [],
skills: 'all',
};
}
@@ -71,6 +77,12 @@ export function readContainerConfig(folder: string): ContainerConfig {
},
imageTag: raw.imageTag,
additionalMounts: raw.additionalMounts ?? [],
skills: raw.skills ?? 'all',
provider: raw.provider,
groupName: raw.groupName,
assistantName: raw.assistantName,
agentGroupId: raw.agentGroupId,
maxMessagesPerPrompt: raw.maxMessagesPerPrompt,
};
} catch (err) {
console.error(`[container-config] failed to parse ${p}: ${String(err)}`);

View File

@@ -9,7 +9,7 @@ import path from 'path';
import { OneCLI } from '@onecli-sh/sdk';
import { CONTAINER_IMAGE, DATA_DIR, GROUPS_DIR, MAX_MESSAGES_PER_PROMPT, ONECLI_URL, TIMEZONE } from './config.js';
import { CONTAINER_IMAGE, DATA_DIR, GROUPS_DIR, ONECLI_URL, TIMEZONE } from './config.js';
import { readContainerConfig, writeContainerConfig } from './container-config.js';
import { CONTAINER_RUNTIME_BIN, hostGatewayArgs, readonlyMountArgs, stopContainer } from './container-runtime.js';
import { getAgentGroup } from './db/agent-groups.js';
@@ -91,17 +91,25 @@ async function spawnContainer(session: Session): Promise<void> {
}
writeSessionRouting(agentGroup.id, session.id);
// Read container config once — threaded through provider resolution,
// buildMounts, and buildContainerArgs so we don't re-read the file.
const containerConfig = readContainerConfig(agentGroup.folder);
// Ensure container.json has the agent group identity fields the runner needs.
// Written at spawn time so the runner can read them from the RO mount.
ensureRuntimeFields(containerConfig, agentGroup);
// Resolve the effective provider + any host-side contribution it declares
// (extra mounts, env passthrough). Computed once and threaded through both
// buildMounts and buildContainerArgs so side effects (mkdir, etc.) fire once.
const { provider, contribution } = resolveProviderContribution(session, agentGroup);
const { provider, contribution } = resolveProviderContribution(session, agentGroup, containerConfig);
const mounts = buildMounts(agentGroup, session, contribution);
const mounts = buildMounts(agentGroup, session, containerConfig, contribution);
const containerName = `nanoclaw-v2-${agentGroup.folder}-${Date.now()}`;
// OneCLI agent identifier is always the agent group id — stable across
// sessions and reversible via getAgentGroup() for approval routing.
const agentIdentifier = agentGroup.id;
const args = await buildContainerArgs(mounts, containerName, agentGroup, provider, contribution, agentIdentifier);
const args = await buildContainerArgs(mounts, containerName, agentGroup, containerConfig, provider, contribution, agentIdentifier);
log.info('Spawning container', { sessionId: session.id, agentGroup: agentGroup.name, containerName });
@@ -156,8 +164,9 @@ export function killContainer(sessionId: string, reason: string): void {
function resolveProviderContribution(
session: Session,
agentGroup: AgentGroup,
containerConfig: import('./container-config.js').ContainerConfig,
): { provider: string; contribution: ProviderContainerContribution } {
const provider = (session.agent_provider || agentGroup.agent_provider || 'claude').toLowerCase();
const provider = (containerConfig.provider || 'claude').toLowerCase();
const fn = getProviderContainerConfig(provider);
const contribution = fn
? fn({
@@ -172,15 +181,20 @@ function resolveProviderContribution(
function buildMounts(
agentGroup: AgentGroup,
session: Session,
containerConfig: import('./container-config.js').ContainerConfig,
providerContribution: ProviderContainerContribution,
): VolumeMount[] {
const projectRoot = process.cwd();
// Per-group filesystem state lives forever after first creation. Init is
// idempotent: it only writes paths that don't already exist, so this call
// is a no-op for groups that have spawned before. Pulling in upstream
// built-in skill or agent-runner source updates is an explicit operation
// (host-mediated tools), not something the spawn path does silently.
// is a no-op for groups that have spawned before.
initGroupFilesystem(agentGroup);
// Sync skill symlinks based on container.json selection before mounting.
const claudeDir = path.join(DATA_DIR, 'v2-sessions', agentGroup.id, '.claude-shared');
syncSkillSymlinks(claudeDir, containerConfig);
const mounts: VolumeMount[] = [];
const sessDir = sessionDir(agentGroup.id, session.id);
const groupDir = path.resolve(GROUPS_DIR, agentGroup.folder);
@@ -188,28 +202,37 @@ function buildMounts(
// Session folder at /workspace (contains inbound.db, outbound.db, outbox/, .claude/)
mounts.push({ hostPath: sessDir, containerPath: '/workspace', readonly: false });
// Agent group folder at /workspace/agent
// Agent group folder at /workspace/agent (RW for working files + CLAUDE.md)
mounts.push({ hostPath: groupDir, containerPath: '/workspace/agent', readonly: false });
// Global memory directory — always read-only. Edits to global config
// happen through the approval flow, not by handing one workspace RW.
// container.json — nested RO mount on top of RW group dir so the agent
// can read its config but cannot modify it.
const containerJsonPath = path.join(groupDir, 'container.json');
if (fs.existsSync(containerJsonPath)) {
mounts.push({ hostPath: containerJsonPath, containerPath: '/workspace/agent/container.json', readonly: true });
}
// Global memory directory — always read-only.
const globalDir = path.join(GROUPS_DIR, 'global');
if (fs.existsSync(globalDir)) {
mounts.push({ hostPath: globalDir, containerPath: '/workspace/global', readonly: true });
}
// Per-group .claude-shared at /home/node/.claude (Claude state, settings,
// skills — initialized once at group creation, persistent thereafter)
const claudeDir = path.join(DATA_DIR, 'v2-sessions', agentGroup.id, '.claude-shared');
// skill symlinks)
mounts.push({ hostPath: claudeDir, containerPath: '/home/node/.claude', readonly: false });
// Per-group agent-runner source at /app/src (initialized once at group
// creation, persistent thereafter — agents can modify their runner)
const groupRunnerDir = path.join(DATA_DIR, 'v2-sessions', agentGroup.id, 'agent-runner-src');
mounts.push({ hostPath: groupRunnerDir, containerPath: '/app/src', readonly: false });
// Shared agent-runner source — read-only, same code for all groups.
const agentRunnerSrc = path.join(projectRoot, 'container', 'agent-runner', 'src');
mounts.push({ hostPath: agentRunnerSrc, containerPath: '/app/src', readonly: true });
// Additional mounts from container config (groups/<folder>/container.json)
const containerConfig = readContainerConfig(agentGroup.folder);
// Shared skills — read-only, symlinks in .claude-shared/skills/ point here.
const skillsSrc = path.join(projectRoot, 'container', 'skills');
if (fs.existsSync(skillsSrc)) {
mounts.push({ hostPath: skillsSrc, containerPath: '/app/skills', readonly: true });
}
// Additional mounts from container config
if (containerConfig.additionalMounts && containerConfig.additionalMounts.length > 0) {
const validated = validateAdditionalMounts(containerConfig.additionalMounts, agentGroup.name);
mounts.push(...validated);
@@ -223,32 +246,113 @@ function buildMounts(
return mounts;
}
/**
* Sync skill symlinks in .claude-shared/skills/ to match the container.json
* selection. Each symlink points to a container path (/app/skills/<name>)
* so it's dangling on the host but valid inside the container.
*/
function syncSkillSymlinks(
claudeDir: string,
containerConfig: import('./container-config.js').ContainerConfig,
): void {
const skillsDir = path.join(claudeDir, 'skills');
if (!fs.existsSync(skillsDir)) {
fs.mkdirSync(skillsDir, { recursive: true });
}
// Determine desired skill set
const projectRoot = process.cwd();
const sharedSkillsDir = path.join(projectRoot, 'container', 'skills');
let desired: string[];
if (containerConfig.skills === 'all') {
// Recompute from shared dir — newly-added upstream skills appear automatically
desired = fs.existsSync(sharedSkillsDir)
? fs.readdirSync(sharedSkillsDir).filter((e) => {
try {
return fs.statSync(path.join(sharedSkillsDir, e)).isDirectory();
} catch {
return false;
}
})
: [];
} else {
desired = containerConfig.skills;
}
const desiredSet = new Set(desired);
// Remove symlinks not in the desired set
for (const entry of fs.readdirSync(skillsDir)) {
const entryPath = path.join(skillsDir, entry);
let isSymlink = false;
try {
isSymlink = fs.lstatSync(entryPath).isSymbolicLink();
} catch {
continue;
}
if (isSymlink && !desiredSet.has(entry)) {
fs.unlinkSync(entryPath);
}
}
// Create symlinks for desired skills (container path targets)
for (const skill of desired) {
const linkPath = path.join(skillsDir, skill);
let exists = false;
try {
fs.lstatSync(linkPath);
exists = true;
} catch {
/* missing */
}
if (!exists) {
fs.symlinkSync(`/app/skills/${skill}`, linkPath);
}
}
}
/**
* Ensure container.json has the runtime identity fields the runner needs.
* Written at spawn time so they're always current even if the DB values
* change (e.g. group rename). Only writes if values differ to avoid
* unnecessary file churn.
*/
function ensureRuntimeFields(
containerConfig: import('./container-config.js').ContainerConfig,
agentGroup: AgentGroup,
): void {
let dirty = false;
if (containerConfig.agentGroupId !== agentGroup.id) {
containerConfig.agentGroupId = agentGroup.id;
dirty = true;
}
if (containerConfig.groupName !== agentGroup.name) {
containerConfig.groupName = agentGroup.name;
dirty = true;
}
if (containerConfig.assistantName !== agentGroup.name) {
containerConfig.assistantName = agentGroup.name;
dirty = true;
}
if (dirty) {
writeContainerConfig(agentGroup.folder, containerConfig);
}
}
async function buildContainerArgs(
mounts: VolumeMount[],
containerName: string,
agentGroup: AgentGroup,
containerConfig: import('./container-config.js').ContainerConfig,
provider: string,
providerContribution: ProviderContainerContribution,
agentIdentifier?: string,
): Promise<string[]> {
const args: string[] = ['run', '--rm', '--name', containerName];
// Environment
// Environment — only vars read by code we don't own.
// Everything NanoClaw-specific is in container.json (read by runner at startup).
args.push('-e', `TZ=${TIMEZONE}`);
args.push('-e', `AGENT_PROVIDER=${provider}`);
// Two-DB split: container reads inbound.db, writes outbound.db
args.push('-e', 'SESSION_INBOUND_DB_PATH=/workspace/inbound.db');
args.push('-e', 'SESSION_OUTBOUND_DB_PATH=/workspace/outbound.db');
args.push('-e', 'SESSION_HEARTBEAT_PATH=/workspace/.heartbeat');
if (agentGroup.name) {
args.push('-e', `NANOCLAW_ASSISTANT_NAME=${agentGroup.name}`);
}
args.push('-e', `NANOCLAW_AGENT_GROUP_ID=${agentGroup.id}`);
args.push('-e', `NANOCLAW_AGENT_GROUP_NAME=${agentGroup.name}`);
// Cap on how many pending messages reach one prompt. Accumulated context
// (trigger=0 rows) rides along with wake-eligible rows up to this cap.
args.push('-e', `NANOCLAW_MAX_MESSAGES_PER_PROMPT=${MAX_MESSAGES_PER_PROMPT}`);
// Provider-contributed env vars (e.g. XDG_DATA_HOME, OPENCODE_*, NO_PROXY).
if (providerContribution.env) {
@@ -257,39 +361,8 @@ async function buildContainerArgs(
}
}
// Users allowed to run admin commands (e.g. /clear) inside this container.
// Computed at wake time: owners + global admins + admins scoped to this
// agent group. Role changes take effect on next container spawn.
//
// SQL inlined to keep core independent of the permissions module — we
// guard on the `user_roles` table directly. If the permissions module
// isn't installed, the table doesn't exist and the set stays empty; the
// formatter treats an empty admin set as permissionless mode (every
// sender is admin).
const adminUserIds = new Set<string>();
if (hasTable(getDb(), 'user_roles')) {
const db = getDb();
const owners = db
.prepare("SELECT user_id FROM user_roles WHERE role = 'owner' AND agent_group_id IS NULL")
.all() as Array<{ user_id: string }>;
const globalAdmins = db
.prepare("SELECT user_id FROM user_roles WHERE role = 'admin' AND agent_group_id IS NULL")
.all() as Array<{ user_id: string }>;
const scopedAdmins = db
.prepare("SELECT user_id FROM user_roles WHERE role = 'admin' AND agent_group_id = ?")
.all(agentGroup.id) as Array<{ user_id: string }>;
for (const r of owners) adminUserIds.add(r.user_id);
for (const r of globalAdmins) adminUserIds.add(r.user_id);
for (const r of scopedAdmins) adminUserIds.add(r.user_id);
}
if (adminUserIds.size > 0) {
args.push('-e', `NANOCLAW_ADMIN_USER_IDS=${Array.from(adminUserIds).join(',')}`);
}
// OneCLI gateway — injects HTTPS_PROXY + certs so container API calls
// are routed through the agent vault for credential injection.
// Must ensureAgent first for non-admin groups, otherwise applyContainerConfig
// rejects the unknown agent identifier and returns false.
try {
if (agentIdentifier) {
await onecli.ensureAgent({ name: agentGroup.name, identifier: agentIdentifier });
@@ -324,16 +397,7 @@ async function buildContainerArgs(
}
}
// Pass additional MCP servers from container config (groups/<folder>/container.json)
const containerConfig = readContainerConfig(agentGroup.folder);
if (containerConfig.mcpServers && Object.keys(containerConfig.mcpServers).length > 0) {
args.push('-e', `NANOCLAW_MCP_SERVERS=${JSON.stringify(containerConfig.mcpServers)}`);
}
// Override entrypoint: run v2 entry point directly via Bun (no tsc, no stdin).
// The image's ENTRYPOINT (tini → entrypoint.sh) handles the stdin-piped
// invocation path; the host-spawned sessions don't need stdin because all
// IO flows through the mounted session DBs.
args.push('--entrypoint', 'bash');
// Use per-agent-group image if one has been built, otherwise base image

View File

@@ -37,12 +37,12 @@ const DEFAULT_SETTINGS_JSON =
* an already-initialized group is a no-op.
*
* Called once per group lifetime: at creation, or defensively from
* `buildMounts()` for groups that pre-date this code path. After init, the
* host never overwrites any of these paths automatically — agents own them.
* To pull in upstream changes, use the host-mediated reset/refresh tools.
* `buildMounts()` for groups that pre-date this code path.
*
* Source code and skills are shared RO mounts — not copied per-group.
* Skill symlinks are synced at spawn time by container-runner.ts.
*/
export function initGroupFilesystem(group: AgentGroup, opts?: { instructions?: string }): void {
const projectRoot = process.cwd();
const initialized: string[] = [];
// 1. groups/<folder>/ — group memory + working dir
@@ -97,23 +97,12 @@ export function initGroupFilesystem(group: AgentGroup, opts?: { instructions?: s
initialized.push('settings.json');
}
// Skills directory — created empty here; symlinks are synced at spawn
// time by container-runner.ts based on container.json skills selection.
const skillsDst = path.join(claudeDir, 'skills');
if (!fs.existsSync(skillsDst)) {
const skillsSrc = path.join(projectRoot, 'container', 'skills');
if (fs.existsSync(skillsSrc)) {
fs.cpSync(skillsSrc, skillsDst, { recursive: true });
initialized.push('skills/');
}
}
// 3. data/v2-sessions/<id>/agent-runner-src/ — per-group source copy
const groupRunnerDir = path.join(DATA_DIR, 'v2-sessions', group.id, 'agent-runner-src');
if (!fs.existsSync(groupRunnerDir)) {
const agentRunnerSrc = path.join(projectRoot, 'container', 'agent-runner', 'src');
if (fs.existsSync(agentRunnerSrc)) {
fs.cpSync(agentRunnerSrc, groupRunnerDir, { recursive: true });
initialized.push('agent-runner-src/');
}
fs.mkdirSync(skillsDst, { recursive: true });
initialized.push('skills/');
}
if (initialized.length > 0) {

View File

@@ -18,6 +18,7 @@
* for policy refusals.
*/
import { getChannelAdapter } from './channels/channel-registry.js';
import { gateCommand } from './command-gate.js';
import { getAgentGroup } from './db/agent-groups.js';
import { recordDroppedMessage } from './db/dropped-messages.js';
import {
@@ -28,7 +29,7 @@ import {
import { findSessionForAgent } from './db/sessions.js';
import { startTypingRefresh } from './modules/typing/index.js';
import { log } from './log.js';
import { resolveSession, writeSessionMessage } from './session-manager.js';
import { resolveSession, writeSessionMessage, writeOutboundDirect } from './session-manager.js';
import { wakeContainer } from './container-runner.js';
import { getSession } from './db/sessions.js';
import type { AgentGroup, MessagingGroup, MessagingGroupAgent } from './types.js';
@@ -398,6 +399,29 @@ async function deliverToAgent(
threadId: event.threadId,
};
// Command gate: classify slash commands before they reach the container.
// Filtered commands are dropped silently. Denied admin commands get a
// permission-denied response written directly to messages_out.
if (event.message.kind === 'chat' || event.message.kind === 'chat-sdk') {
const gate = gateCommand(event.message.content, userId, agent.agent_group_id);
if (gate.action === 'filter') {
log.debug('Filtered command dropped by gate', { agentGroupId: agent.agent_group_id });
return;
}
if (gate.action === 'deny') {
writeOutboundDirect(session.agent_group_id, session.id, {
id: `deny-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
kind: 'chat',
platformId: deliveryAddr.platformId,
channelType: deliveryAddr.channelType,
threadId: deliveryAddr.threadId,
content: JSON.stringify({ text: `Permission denied: ${gate.command} requires admin access.` }),
});
log.info('Admin command denied by gate', { command: gate.command, userId, agentGroupId: agent.agent_group_id });
return;
}
}
writeSessionMessage(session.agent_group_id, session.id, {
id: messageIdForAgent(event.message.id, agent.agent_group_id),
kind: event.message.kind,

View File

@@ -279,6 +279,34 @@ export function openOutboundDb(agentGroupId: string, sessionId: string): Databas
return openOutboundDbRaw(outboundDbPath(agentGroupId, sessionId));
}
/**
* Write a message directly to a session's outbound DB so the host delivery
* loop picks it up. Used by the command gate to send denial responses
* without waking a container.
*/
export function writeOutboundDirect(
agentGroupId: string,
sessionId: string,
message: {
id: string;
kind: string;
platformId: string | null;
channelType: string | null;
threadId: string | null;
content: string;
},
): void {
const db = openOutboundDb(agentGroupId, sessionId);
try {
db.prepare(
`INSERT OR IGNORE INTO messages_out (id, seq, timestamp, kind, platform_id, channel_type, thread_id, content)
VALUES (?, (SELECT COALESCE(MAX(seq), 0) + 2 FROM messages_out), datetime('now'), ?, ?, ?, ?, ?)`,
).run(message.id, message.kind, message.platformId, message.channelType, message.threadId, message.content);
} finally {
db.close();
}
}
/**
* @deprecated Use openInboundDb / openOutboundDb instead.
*/