refactor: shared source — replace per-group agent-runner copies with single RO mount
Replace the per-group agent-runner-src copy model with a single shared read-only mount. Source and skills are now RO + shared; personality, config, working files, and Claude state stay RW + per-group. Key changes: - Mount container/agent-runner/src/ RO at /app/src (all groups share one copy) - Mount container/skills/ RO at /app/skills; per-group skill selection via symlinks in .claude-shared/skills/ based on container.json "skills" field - Mount container.json as nested RO bind on top of RW group dir - Move all NANOCLAW_* env vars to container.json (runner reads at startup) - New runner config.ts module replaces process.env reads - Move command gate (filtered/admin) from container to host router - Dockerfile: remove source COPY, split CLI installs (claude-code last), move agent-runner deps above CLIs for better layer caching - Add writeOutboundDirect for router denial responses - Design doc at docs/shared-src.md Not included (follow-up): DB migration to drop agent_provider columns, cleanup of orphaned agent-runner-src directories. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3,8 +3,12 @@
|
||||
# Runs Claude Agent SDK in isolated Linux VM with browser automation.
|
||||
#
|
||||
# Runtime split:
|
||||
# - agent-runner (our TypeScript code): Bun
|
||||
# - agent-runner (our TypeScript code): Bun, mounted RO at /app/src by host
|
||||
# - globally-installed Node CLIs (claude-code, agent-browser, vercel): pnpm + Node
|
||||
#
|
||||
# Source is never baked in — /app/src is provided by a shared read-only
|
||||
# bind mount at runtime (see src/container-runner.ts). Source-only changes
|
||||
# never require an image rebuild.
|
||||
|
||||
FROM node:22-slim
|
||||
|
||||
@@ -66,36 +70,39 @@ RUN curl -fsSL https://bun.sh/install | bash -s "bun-v${BUN_VERSION}" && \
|
||||
install -m 0755 /root/.bun/bin/bun /usr/local/bin/bun && \
|
||||
rm -rf /root/.bun
|
||||
|
||||
# ---- pnpm + global Node CLIs -------------------------------------------------
|
||||
ENV PNPM_HOME="/pnpm"
|
||||
ENV PATH="$PNPM_HOME:$PATH"
|
||||
RUN corepack enable
|
||||
|
||||
# agent-browser has a postinstall build script — pnpm skips these by default.
|
||||
# Allowlist it via .npmrc so the install doesn't silently produce a broken
|
||||
# package. Pinned versions so every rebuild is reproducible.
|
||||
RUN --mount=type=cache,target=/root/.cache/pnpm \
|
||||
echo "only-built-dependencies[]=agent-browser" > /root/.npmrc && \
|
||||
echo "only-built-dependencies[]=@anthropic-ai/claude-code" >> /root/.npmrc && \
|
||||
pnpm install -g \
|
||||
"@anthropic-ai/claude-code@${CLAUDE_CODE_VERSION}" \
|
||||
"agent-browser@${AGENT_BROWSER_VERSION}" \
|
||||
"vercel@${VERCEL_VERSION}"
|
||||
|
||||
# ---- agent-runner ------------------------------------------------------------
|
||||
# ---- agent-runner deps -------------------------------------------------------
|
||||
# Deps are cached independently of CLI versions. Source is NOT baked in —
|
||||
# it's provided by the shared RO mount at runtime.
|
||||
WORKDIR /app
|
||||
|
||||
# Copy manifest + lockfile first so the install layer caches independently of
|
||||
# source edits.
|
||||
COPY agent-runner/package.json agent-runner/bun.lock ./
|
||||
|
||||
RUN --mount=type=cache,target=/root/.bun/install/cache \
|
||||
bun install --frozen-lockfile
|
||||
|
||||
# Source. Bun runs TS directly — no tsc build step. The host remounts this
|
||||
# path at runtime via `src/container-runner.ts` so source edits on the host
|
||||
# take effect without rebuilding the image; the baked copy is the fallback.
|
||||
COPY agent-runner/ ./
|
||||
# ---- pnpm + global Node CLIs -------------------------------------------------
|
||||
# Most stable first, most frequently bumped last. Bumping claude-code
|
||||
# (the most common change) only invalidates one layer.
|
||||
#
|
||||
# only-built-dependencies gates pnpm's supply-chain policy:
|
||||
# - agent-browser has a postinstall build step.
|
||||
# - @anthropic-ai/claude-code's postinstall downloads the native Claude
|
||||
# binary (linux-arm64 variant on our image). Without the allowlist
|
||||
# the SDK fails at spawn time with "native binary not found".
|
||||
ENV PNPM_HOME="/pnpm"
|
||||
ENV PATH="$PNPM_HOME:$PATH"
|
||||
RUN corepack enable
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pnpm \
|
||||
echo "only-built-dependencies[]=agent-browser" > /root/.npmrc && \
|
||||
echo "only-built-dependencies[]=@anthropic-ai/claude-code" >> /root/.npmrc && \
|
||||
pnpm install -g "vercel@${VERCEL_VERSION}"
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pnpm \
|
||||
pnpm install -g "agent-browser@${AGENT_BROWSER_VERSION}"
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/pnpm \
|
||||
pnpm install -g "@anthropic-ai/claude-code@${CLAUDE_CODE_VERSION}"
|
||||
|
||||
# ---- Entrypoint --------------------------------------------------------------
|
||||
COPY entrypoint.sh /app/entrypoint.sh
|
||||
|
||||
55
container/agent-runner/src/config.ts
Normal file
55
container/agent-runner/src/config.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
/**
|
||||
* Runner config — reads /workspace/agent/container.json at startup.
|
||||
*
|
||||
* This file is mounted read-only inside the container. The host writes it;
|
||||
* the runner only reads. All NanoClaw-specific configuration lives here
|
||||
* instead of environment variables.
|
||||
*/
|
||||
import fs from 'fs';
|
||||
|
||||
const CONFIG_PATH = '/workspace/agent/container.json';
|
||||
|
||||
export interface RunnerConfig {
|
||||
provider: string;
|
||||
assistantName: string;
|
||||
groupName: string;
|
||||
agentGroupId: string;
|
||||
maxMessagesPerPrompt: number;
|
||||
mcpServers: Record<string, { command: string; args: string[]; env: Record<string, string> }>;
|
||||
}
|
||||
|
||||
const DEFAULT_MAX_MESSAGES = 10;
|
||||
|
||||
let _config: RunnerConfig | null = null;
|
||||
|
||||
/**
|
||||
* Load config from container.json. Called once at startup.
|
||||
* Falls back to sensible defaults for any missing field.
|
||||
*/
|
||||
export function loadConfig(): RunnerConfig {
|
||||
if (_config) return _config;
|
||||
|
||||
let raw: Record<string, unknown> = {};
|
||||
try {
|
||||
raw = JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8'));
|
||||
} catch {
|
||||
console.error(`[config] Failed to read ${CONFIG_PATH}, using defaults`);
|
||||
}
|
||||
|
||||
_config = {
|
||||
provider: (raw.provider as string) || 'claude',
|
||||
assistantName: (raw.assistantName as string) || '',
|
||||
groupName: (raw.groupName as string) || '',
|
||||
agentGroupId: (raw.agentGroupId as string) || '',
|
||||
maxMessagesPerPrompt: (raw.maxMessagesPerPrompt as number) || DEFAULT_MAX_MESSAGES,
|
||||
mcpServers: (raw.mcpServers as RunnerConfig['mcpServers']) || {},
|
||||
};
|
||||
|
||||
return _config;
|
||||
}
|
||||
|
||||
/** Get the loaded config. Throws if loadConfig() hasn't been called. */
|
||||
export function getConfig(): RunnerConfig {
|
||||
if (!_config) throw new Error('Config not loaded — call loadConfig() first');
|
||||
return _config;
|
||||
}
|
||||
@@ -31,8 +31,7 @@ let _heartbeatPath: string = DEFAULT_HEARTBEAT_PATH;
|
||||
/** Inbound DB — container opens read-only (host is the sole writer). */
|
||||
export function getInboundDb(): Database {
|
||||
if (!_inbound) {
|
||||
const dbPath = process.env.SESSION_INBOUND_DB_PATH || DEFAULT_INBOUND_PATH;
|
||||
_inbound = new Database(dbPath, { readonly: true });
|
||||
_inbound = new Database(DEFAULT_INBOUND_PATH, { readonly: true });
|
||||
_inbound.exec('PRAGMA busy_timeout = 5000');
|
||||
}
|
||||
return _inbound;
|
||||
@@ -41,8 +40,7 @@ export function getInboundDb(): Database {
|
||||
/** Outbound DB — container owns this file (sole writer). */
|
||||
export function getOutboundDb(): Database {
|
||||
if (!_outbound) {
|
||||
const dbPath = process.env.SESSION_OUTBOUND_DB_PATH || DEFAULT_OUTBOUND_PATH;
|
||||
_outbound = new Database(dbPath);
|
||||
_outbound = new Database(DEFAULT_OUTBOUND_PATH);
|
||||
_outbound.exec('PRAGMA journal_mode = DELETE');
|
||||
_outbound.exec('PRAGMA busy_timeout = 5000');
|
||||
_outbound.exec('PRAGMA foreign_keys = ON');
|
||||
@@ -122,7 +120,7 @@ export function clearContainerToolInFlight(): void {
|
||||
* A file touch is cheaper and avoids cross-boundary DB write contention.
|
||||
*/
|
||||
export function touchHeartbeat(): void {
|
||||
const p = process.env.SESSION_HEARTBEAT_PATH || _heartbeatPath;
|
||||
const p = _heartbeatPath;
|
||||
const now = new Date();
|
||||
try {
|
||||
fs.utimesSync(p, now, now);
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
* The container never writes to inbound.db — all status tracking goes through
|
||||
* processing_ack. The host reads processing_ack to sync message lifecycle.
|
||||
*/
|
||||
import { getConfig } from '../config.js';
|
||||
import { getInboundDb, getOutboundDb } from './connection.js';
|
||||
|
||||
export interface MessageInRow {
|
||||
@@ -26,14 +27,16 @@ export interface MessageInRow {
|
||||
content: string;
|
||||
}
|
||||
|
||||
// Cap on how many messages reach the agent in one prompt, including any
|
||||
// accumulated-but-not-triggered context. Host controls the cap via the
|
||||
// NANOCLAW_MAX_MESSAGES_PER_PROMPT env var; default mirrors the host's
|
||||
// config.ts default of 10.
|
||||
const MAX_MESSAGES_PER_PROMPT = Math.max(
|
||||
1,
|
||||
parseInt(process.env.NANOCLAW_MAX_MESSAGES_PER_PROMPT || '10', 10) || 10,
|
||||
);
|
||||
// Cap on how many messages reach the agent in one prompt. Read from
|
||||
// container.json; falls back to 10.
|
||||
function getMaxMessagesPerPrompt(): number {
|
||||
try {
|
||||
return getConfig().maxMessagesPerPrompt;
|
||||
} catch {
|
||||
// Config not loaded yet (e.g. test harness) — use default
|
||||
return 10;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch pending messages that are due for processing.
|
||||
@@ -58,7 +61,7 @@ export function getPendingMessages(): MessageInRow[] {
|
||||
ORDER BY seq DESC
|
||||
LIMIT ?`,
|
||||
)
|
||||
.all(MAX_MESSAGES_PER_PROMPT) as MessageInRow[];
|
||||
.all(getMaxMessagesPerPrompt()) as MessageInRow[];
|
||||
|
||||
if (pending.length === 0) return [];
|
||||
|
||||
|
||||
@@ -55,6 +55,17 @@ export function categorizeMessage(msg: MessageInRow): CommandInfo {
|
||||
return { category: 'passthrough', command, text, senderId };
|
||||
}
|
||||
|
||||
/**
|
||||
* Narrow check for /clear — the only command the runner handles directly.
|
||||
* All other command gating (filtered, admin) is done by the host router
|
||||
* before messages reach the container.
|
||||
*/
|
||||
export function isClearCommand(msg: MessageInRow): boolean {
|
||||
const content = parseContent(msg.content);
|
||||
const text = (content.text || '').trim();
|
||||
return text.toLowerCase().startsWith('/clear');
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
function extractSenderId(msg: MessageInRow, content: any): string | null {
|
||||
const raw: string | null = content?.senderId || content?.author?.userId || null;
|
||||
|
||||
@@ -4,14 +4,8 @@
|
||||
* Runs inside a container. All IO goes through the session DB.
|
||||
* No stdin, no stdout markers, no IPC files.
|
||||
*
|
||||
* Config:
|
||||
* - SESSION_INBOUND_DB_PATH: path to host-owned inbound DB (default: /workspace/inbound.db)
|
||||
* - SESSION_OUTBOUND_DB_PATH: path to container-owned outbound DB (default: /workspace/outbound.db)
|
||||
* - SESSION_HEARTBEAT_PATH: heartbeat file path (default: /workspace/.heartbeat)
|
||||
* - AGENT_PROVIDER: any registered provider name (default: claude). The
|
||||
* set of registered providers is whatever `providers/index.ts` imports.
|
||||
* - NANOCLAW_ASSISTANT_NAME: assistant name for transcript archiving
|
||||
* - NANOCLAW_ADMIN_USER_IDS: comma-separated user IDs allowed to run admin commands
|
||||
* Config is read from /workspace/agent/container.json (mounted RO).
|
||||
* Only TZ and OneCLI networking vars come from env.
|
||||
*
|
||||
* Mount structure:
|
||||
* /workspace/
|
||||
@@ -19,14 +13,19 @@
|
||||
* outbound.db ← container-owned session DB
|
||||
* .heartbeat ← container touches for liveness detection
|
||||
* outbox/ ← outbound files
|
||||
* agent/ ← agent group folder (CLAUDE.md, skills, working files)
|
||||
* .claude/ ← Claude SDK session data
|
||||
* agent/ ← agent group folder (CLAUDE.md, container.json, working files)
|
||||
* container.json ← per-group config (RO nested mount)
|
||||
* global/ ← shared global memory (RO)
|
||||
* /app/src/ ← shared agent-runner source (RO)
|
||||
* /app/skills/ ← shared skills (RO)
|
||||
* /home/node/.claude/ ← Claude SDK state + skill symlinks (RW)
|
||||
*/
|
||||
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
import { loadConfig } from './config.js';
|
||||
import { buildSystemPromptAddendum } from './destinations.js';
|
||||
// Providers barrel — each enabled provider self-registers on import.
|
||||
// Provider skills append imports to providers/index.ts.
|
||||
@@ -41,21 +40,11 @@ function log(msg: string): void {
|
||||
const CWD = '/workspace/agent';
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const providerName = (process.env.AGENT_PROVIDER || 'claude').toLowerCase() as ProviderName;
|
||||
const assistantName = process.env.NANOCLAW_ASSISTANT_NAME;
|
||||
const adminUserIds = new Set(
|
||||
(process.env.NANOCLAW_ADMIN_USER_IDS || '')
|
||||
.split(',')
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean),
|
||||
);
|
||||
const config = loadConfig();
|
||||
const providerName = config.provider.toLowerCase() as ProviderName;
|
||||
|
||||
log(`Starting v2 agent-runner (provider: ${providerName})`);
|
||||
|
||||
// Destinations addendum is the only runtime-generated context we inject.
|
||||
// Global CLAUDE.md is loaded by Claude Code from /workspace/agent/CLAUDE.md
|
||||
// (which imports /workspace/global/CLAUDE.md via @-syntax) — no need to
|
||||
// read it manually anymore.
|
||||
const instructions = buildSystemPromptAddendum();
|
||||
|
||||
// Discover additional directories mounted at /workspace/extra/*
|
||||
@@ -77,34 +66,22 @@ async function main(): Promise<void> {
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const mcpServerPath = path.join(__dirname, 'mcp-tools', 'index.ts');
|
||||
|
||||
// Build MCP servers config: nanoclaw built-in + any additional from host
|
||||
// Build MCP servers config: nanoclaw built-in + any from container.json
|
||||
const mcpServers: Record<string, { command: string; args: string[]; env: Record<string, string> }> = {
|
||||
nanoclaw: {
|
||||
command: 'bun',
|
||||
args: ['run', mcpServerPath],
|
||||
env: {
|
||||
SESSION_INBOUND_DB_PATH: process.env.SESSION_INBOUND_DB_PATH || '/workspace/inbound.db',
|
||||
SESSION_OUTBOUND_DB_PATH: process.env.SESSION_OUTBOUND_DB_PATH || '/workspace/outbound.db',
|
||||
SESSION_HEARTBEAT_PATH: process.env.SESSION_HEARTBEAT_PATH || '/workspace/.heartbeat',
|
||||
},
|
||||
env: {},
|
||||
},
|
||||
};
|
||||
|
||||
// Merge additional MCP servers from host configuration
|
||||
if (process.env.NANOCLAW_MCP_SERVERS) {
|
||||
try {
|
||||
const additional = JSON.parse(process.env.NANOCLAW_MCP_SERVERS) as Record<string, { command: string; args: string[]; env: Record<string, string> }>;
|
||||
for (const [name, config] of Object.entries(additional)) {
|
||||
mcpServers[name] = config;
|
||||
log(`Additional MCP server: ${name} (${config.command})`);
|
||||
}
|
||||
} catch (e) {
|
||||
log(`Failed to parse NANOCLAW_MCP_SERVERS: ${e}`);
|
||||
}
|
||||
for (const [name, serverConfig] of Object.entries(config.mcpServers)) {
|
||||
mcpServers[name] = serverConfig;
|
||||
log(`Additional MCP server: ${name} (${serverConfig.command})`);
|
||||
}
|
||||
|
||||
const provider = createProvider(providerName, {
|
||||
assistantName,
|
||||
assistantName: config.assistantName || undefined,
|
||||
mcpServers,
|
||||
env: { ...process.env },
|
||||
additionalDirectories: additionalDirectories.length > 0 ? additionalDirectories : undefined,
|
||||
@@ -114,7 +91,6 @@ async function main(): Promise<void> {
|
||||
provider,
|
||||
cwd: CWD,
|
||||
systemContext: { instructions },
|
||||
adminUserIds,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ import { getPendingMessages, markProcessing, markCompleted, type MessageInRow }
|
||||
import { writeMessageOut } from './db/messages-out.js';
|
||||
import { touchHeartbeat, clearStaleProcessingAcks } from './db/connection.js';
|
||||
import { getStoredSessionId, setStoredSessionId, clearStoredSessionId } from './db/session-state.js';
|
||||
import { formatMessages, extractRouting, categorizeMessage, stripInternalTags, type RoutingContext } from './formatter.js';
|
||||
import { formatMessages, extractRouting, categorizeMessage, isClearCommand, stripInternalTags, type RoutingContext } from './formatter.js';
|
||||
import type { AgentProvider, AgentQuery, ProviderEvent } from './providers/types.js';
|
||||
|
||||
const POLL_INTERVAL_MS = 1000;
|
||||
@@ -23,12 +23,6 @@ export interface PollLoopConfig {
|
||||
systemContext?: {
|
||||
instructions?: string;
|
||||
};
|
||||
/**
|
||||
* Set of user IDs allowed to run admin commands (e.g. /clear) in this
|
||||
* agent group. Host populates from owners + global admins + scoped admins
|
||||
* at container wake time, so role changes take effect on next spawn.
|
||||
*/
|
||||
adminUserIds?: Set<string>;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -90,74 +84,36 @@ export async function runPollLoop(config: PollLoopConfig): Promise<void> {
|
||||
|
||||
const routing = extractRouting(messages);
|
||||
|
||||
// Handle commands: categorize chat messages
|
||||
const adminUserIds = config.adminUserIds ?? new Set<string>();
|
||||
const normalMessages = [];
|
||||
// Command handling: the host router gates filtered and unauthorized
|
||||
// admin commands before they reach the container. The only command
|
||||
// the runner handles directly is /clear (session reset).
|
||||
const normalMessages: MessageInRow[] = [];
|
||||
const commandIds: string[] = [];
|
||||
|
||||
for (const msg of messages) {
|
||||
if (msg.kind !== 'chat' && msg.kind !== 'chat-sdk') {
|
||||
normalMessages.push(msg);
|
||||
continue;
|
||||
}
|
||||
|
||||
const cmdInfo = categorizeMessage(msg);
|
||||
|
||||
if (cmdInfo.category === 'filtered') {
|
||||
// Silently drop — mark completed, don't process
|
||||
log(`Filtered command: ${cmdInfo.command} (msg: ${msg.id})`);
|
||||
if ((msg.kind === 'chat' || msg.kind === 'chat-sdk') && isClearCommand(msg)) {
|
||||
log('Clearing session (resetting continuation)');
|
||||
continuation = undefined;
|
||||
clearStoredSessionId();
|
||||
writeMessageOut({
|
||||
id: generateId(),
|
||||
kind: 'chat',
|
||||
platform_id: routing.platformId,
|
||||
channel_type: routing.channelType,
|
||||
thread_id: routing.threadId,
|
||||
content: JSON.stringify({ text: 'Session cleared.' }),
|
||||
});
|
||||
commandIds.push(msg.id);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cmdInfo.category === 'admin') {
|
||||
if (!cmdInfo.senderId || !adminUserIds.has(cmdInfo.senderId)) {
|
||||
log(`Admin command denied: ${cmdInfo.command} from ${cmdInfo.senderId} (msg: ${msg.id})`);
|
||||
writeMessageOut({
|
||||
id: generateId(),
|
||||
kind: 'chat',
|
||||
platform_id: routing.platformId,
|
||||
channel_type: routing.channelType,
|
||||
thread_id: routing.threadId,
|
||||
content: JSON.stringify({ text: `Permission denied: ${cmdInfo.command} requires admin access.` }),
|
||||
});
|
||||
commandIds.push(msg.id);
|
||||
continue;
|
||||
}
|
||||
// Handle admin commands directly
|
||||
if (cmdInfo.command === '/clear') {
|
||||
log('Clearing session (resetting continuation)');
|
||||
continuation = undefined;
|
||||
clearStoredSessionId();
|
||||
writeMessageOut({
|
||||
id: generateId(),
|
||||
kind: 'chat',
|
||||
platform_id: routing.platformId,
|
||||
channel_type: routing.channelType,
|
||||
thread_id: routing.threadId,
|
||||
content: JSON.stringify({ text: 'Session cleared.' }),
|
||||
});
|
||||
commandIds.push(msg.id);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Other admin commands — pass through to agent
|
||||
normalMessages.push(msg);
|
||||
continue;
|
||||
}
|
||||
|
||||
// passthrough or none
|
||||
normalMessages.push(msg);
|
||||
}
|
||||
|
||||
// Mark filtered/denied command messages as completed immediately
|
||||
if (commandIds.length > 0) {
|
||||
markCompleted(commandIds);
|
||||
}
|
||||
|
||||
// If all messages were filtered commands, skip processing
|
||||
if (normalMessages.length === 0) {
|
||||
// Mark remaining processing IDs as completed
|
||||
const remainingIds = ids.filter((id) => !commandIds.includes(id));
|
||||
if (remainingIds.length > 0) markCompleted(remainingIds);
|
||||
log(`All ${messages.length} message(s) were commands, skipping query`);
|
||||
@@ -289,17 +245,14 @@ async function processQuery(query: AgentQuery, routing: RoutingContext): Promise
|
||||
const pollHandle = setInterval(() => {
|
||||
if (done) return;
|
||||
|
||||
// Skip system messages (MCP tool responses) and admin commands (need fresh query).
|
||||
// Skip system messages (MCP tool responses) and /clear (needs fresh query).
|
||||
// Also defer messages whose thread_id differs from the active turn's routing
|
||||
// — mixing threads into one streaming turn would send the reply to the wrong
|
||||
// thread because `routing` is captured at turn start. The next turn will pick
|
||||
// them up with fresh routing.
|
||||
const newMessages = getPendingMessages().filter((m) => {
|
||||
if (m.kind === 'system') return false;
|
||||
if (m.kind === 'chat' || m.kind === 'chat-sdk') {
|
||||
const cmd = categorizeMessage(m);
|
||||
if (cmd.category === 'admin') return false;
|
||||
}
|
||||
if ((m.kind === 'chat' || m.kind === 'chat-sdk') && isClearCommand(m)) return false;
|
||||
if ((m.thread_id ?? null) !== (routing.threadId ?? null)) return false;
|
||||
return true;
|
||||
});
|
||||
|
||||
276
docs/shared-src.md
Normal file
276
docs/shared-src.md
Normal file
@@ -0,0 +1,276 @@
|
||||
# Shared Source
|
||||
|
||||
Replace per-group agent-runner-src copies with a single shared read-only mount.
|
||||
|
||||
## Problem
|
||||
|
||||
Each agent group gets a full copy of `container/agent-runner/src/` at creation time. This copy is mounted RW at `/app/src` in the container. Consequences:
|
||||
|
||||
- Bug fixes and features don't propagate to existing groups
|
||||
- Owner edits to `container/agent-runner/src/` silently don't apply to existing groups
|
||||
- No tooling to diff or detect drift between groups and upstream
|
||||
- The RW mount lets agents write to their own runtime source without approval
|
||||
- Cross-cutting changes (host + container) break down when container code is per-group
|
||||
- Skills have the same copy-and-drift problem
|
||||
|
||||
## Design
|
||||
|
||||
**Principle: RW is per-group, RO is shared.** Every mount is either read-only and shared across all groups, or read-write and scoped to one group. Source and skills become RO + shared. Personality, config, working files, and Claude state stay RW + per-group. This makes drift impossible by construction — no group can diverge from shared code because no group has write access to it.
|
||||
|
||||
### Shared source mount
|
||||
|
||||
Mount `container/agent-runner/src/` into all containers at `/app/src` as **read-only**.
|
||||
|
||||
```
|
||||
container/agent-runner/src/ → /app/src (RO, shared)
|
||||
```
|
||||
|
||||
Source is never baked into the image. `/app/src/` exists only via this mount — running without it is an intentional startup failure (entrypoint `bun run /app/src/index.ts` → ENOENT). Source-only changes never trigger image rebuilds; edits to `.ts` files take effect on next container spawn.
|
||||
|
||||
Image rebuilds are only needed for:
|
||||
- Agent-runner npm dependency changes (`package.json` / `bun.lock`)
|
||||
- System packages, runtime versions, global CLI version bumps
|
||||
- Dockerfile/entrypoint changes
|
||||
|
||||
### Shared skills mount
|
||||
|
||||
Mount `container/skills/` into all containers at `/app/skills/` as **read-only**.
|
||||
|
||||
Per-group skill selection via `container.json`:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"skills": ["welcome", "agent-browser", "self-customize"]
|
||||
// or "skills": "all" (default)
|
||||
}
|
||||
```
|
||||
|
||||
At every spawn, the host syncs symlinks in the group's `.claude-shared/skills/` directory to match the selected set. For `"all"`, the set is recomputed from the shared skills dir on each spawn — newly-added upstream skills appear without intervention. Symlinks for skills no longer in the set are removed.
|
||||
|
||||
Each symlink points to a container path:
|
||||
|
||||
```
|
||||
.claude-shared/skills/welcome → /app/skills/welcome
|
||||
.claude-shared/skills/agent-browser → /app/skills/agent-browser
|
||||
```
|
||||
|
||||
Claude Code scans `/home/node/.claude/skills/`, follows the symlinks, loads the selected skills. Same dangling-symlink-on-host pattern as `.claude-global.md` — host tools don't resolve the target, the container mount makes it valid at read time.
|
||||
|
||||
### Per-group customization surface
|
||||
|
||||
What remains per-group (unchanged):
|
||||
|
||||
| Resource | Location | Mechanism |
|
||||
|----------|----------|-----------|
|
||||
| Personality / instructions | `groups/<folder>/CLAUDE.md` | Mount at `/workspace/agent` (RW, live) |
|
||||
| MCP servers | `groups/<folder>/container.json` | Read by runner at startup |
|
||||
| apt/npm packages | `groups/<folder>/container.json` | Per-group image layer |
|
||||
| Skill selection | `groups/<folder>/container.json` | Symlinks at spawn |
|
||||
| Additional mounts | `groups/<folder>/container.json` | Validated bind mounts |
|
||||
| Agent provider / model | `groups/<folder>/container.json` | Read by runner at startup |
|
||||
| Claude Code settings | `.claude-shared/settings.json` | Mount at `/home/node/.claude` (RW) |
|
||||
| Working files | `groups/<folder>/` | Mount at `/workspace/agent` (RW) |
|
||||
|
||||
`container.json` is mounted **read-only** inside the container (separate RO mount at `/workspace/agent/container.json`). The agent can read its own config but cannot modify it — config changes go through the self-mod approval flow on the host. The parent group dir (`/workspace/agent/`) stays RW for working files and CLAUDE.md.
|
||||
|
||||
### Self-modification
|
||||
|
||||
Existing config-level self-mod tools (`install_packages`, `add_mcp_server`, `request_rebuild`) mutate `container.json` and per-group images, not source. The approval flow should ask whether to apply the change to the current group or all groups — users often expect packages and MCP servers installed for one agent to be available everywhere. "All groups" writes to each group's `container.json` and rebuilds per-group images where needed.
|
||||
|
||||
Source-level self-modification (not yet implemented) uses staging: edits happen against a copy of `container/agent-runner/src/`, reviewed and swapped in on approval. Owner can also edit source directly.
|
||||
|
||||
### Providers
|
||||
|
||||
Provider install skills (`/add-opencode`, `/add-ollama-provider`) add the provider module to the shared `container/agent-runner/src/providers/` tree. This is an instance-level change — owner/admin action, affects all groups. Which provider a group uses is per-group config (`"provider": "opencode"` in `container.json`). The shared source ships all installed provider modules; groups select.
|
||||
|
||||
## Environment variables
|
||||
|
||||
Env is for things read by code we don't own: glibc, Node's http agent, CLIs we shell out to. Everything NanoClaw-specific moves out of env.
|
||||
|
||||
**Stays in env (read by non-nanoclaw code):**
|
||||
|
||||
| Var | Reader |
|
||||
|---|---|
|
||||
| `TZ` | glibc, child processes |
|
||||
| `HTTPS_PROXY`, `NO_PROXY` | Node http agent, curl, git, etc. (OneCLI-injected) |
|
||||
| `NODE_EXTRA_CA_CERTS` | Node at startup (OneCLI-injected) |
|
||||
|
||||
**Moves to `container.json` (read by runner at startup):**
|
||||
|
||||
| Var | Reason |
|
||||
|---|---|
|
||||
| `AGENT_PROVIDER` | Per-group config; runner reads before importing provider module |
|
||||
| `NANOCLAW_AGENT_GROUP_NAME` | Per-group identity |
|
||||
| `NANOCLAW_ASSISTANT_NAME` | Per-group identity |
|
||||
| `NANOCLAW_MAX_MESSAGES_PER_PROMPT` | Config constant; per-group override possible |
|
||||
|
||||
**Deleted (admin gating moves to router):**
|
||||
|
||||
`NANOCLAW_ADMIN_USER_IDS` is removed entirely — not moved to a new location. The container no longer makes authorization decisions. See **Router command gate** below.
|
||||
|
||||
**Hardcoded as conventions:**
|
||||
|
||||
| Var | Convention |
|
||||
|---|---|
|
||||
| `SESSION_INBOUND_DB_PATH` | `/workspace/inbound.db` |
|
||||
| `SESSION_OUTBOUND_DB_PATH` | `/workspace/outbound.db` |
|
||||
| `SESSION_HEARTBEAT_PATH` | `/workspace/.heartbeat` |
|
||||
| `NANOCLAW_AGENT_GROUP_ID` | Read from `/workspace/agent/container.json` at startup |
|
||||
|
||||
### Runner startup order
|
||||
|
||||
The runner can no longer assume DB paths or provider identity are handed to it in env. Revised startup:
|
||||
|
||||
1. Set up logging.
|
||||
2. Read `/workspace/agent/container.json` (mounted RW but read-only here).
|
||||
3. Open `/workspace/inbound.db` and `/workspace/outbound.db` (fixed paths).
|
||||
4. Read bootstrap tables from `inbound.db` (destinations).
|
||||
5. Import the provider module selected by `container.json`.
|
||||
6. Enter the poll loop.
|
||||
|
||||
### Router command gate
|
||||
|
||||
The host router gates slash commands before writing to `messages_in`. The container still handles whatever reaches it; it just stops making authorization decisions.
|
||||
|
||||
1. **Filtered commands** (`/help`, `/login`, `/logout`, `/doctor`, `/config`, `/start`, `/remote-control`) → drop silently. Never reach the container.
|
||||
2. **Admin commands** (`/clear`, `/compact`, `/context`, `/cost`, `/files`) → check sender against `user_roles` (owners + global admins + admins scoped to this agent group).
|
||||
- Denied: write "Permission denied: `<cmd>` requires admin access." directly to `messages_out` in the same thread. Do not write to `messages_in`.
|
||||
- Allowed: pass through to container unchanged.
|
||||
3. **Normal messages** → pass through unchanged.
|
||||
|
||||
Admin commands that flow through continue to be handled the same way they are today:
|
||||
- `/clear` — container's existing handler in `poll-loop.ts` resets session continuation and writes "Session cleared."
|
||||
- `/compact`, `/context`, `/cost`, `/files` — container forwards them to Claude Code's native slash-command handler.
|
||||
|
||||
Container receives only authorized messages. The runner has no admin concept, no `adminUserIds` field, no admin-gate branch — but it still recognizes `/clear` to reset session state.
|
||||
|
||||
### Scope rules
|
||||
|
||||
Each channel answers a single scope question:
|
||||
|
||||
| Channel | Scope | What it holds |
|
||||
|---|---|---|
|
||||
| Env vars | Process | Things read by code we don't own (`TZ`, `HTTPS_PROXY`) |
|
||||
| `container.json` | Per-group | Per-group config (MCP, packages, provider, model, skills, mounts) |
|
||||
| `inbound.db` / `outbound.db` | Per-session | Messages, session state, and host-projected views of cross-group state (destinations) |
|
||||
| Central DB (`data/v2.db`) | Cross-group | Users, roles, wiring, messaging groups, sessions |
|
||||
|
||||
The runner reads from env (for external-convention vars), `container.json` (for its own group's config), and `inbound.db` (for messages + projected views). It never reads central DB directly — that's always host-projected through inbound.db first.
|
||||
|
||||
After this change, the spawn-time `-e` flags shrink from ~10 to ~3-5 (TZ + OneCLI networking). No `NANOCLAW_*` env var survives.
|
||||
|
||||
## Image layer strategy
|
||||
|
||||
Single Dockerfile with aggressive layer ordering: stable layers first, frequently-bumped layers last. BuildKit's layer cache handles "upstream layers unchanged" rebuilds efficiently — a separate base image isn't justified.
|
||||
|
||||
Two image tags exist at runtime:
|
||||
|
||||
```
|
||||
nanoclaw-agent:latest — shared base (rebuild: dep/CLI bumps + Dockerfile changes)
|
||||
└── nanoclaw-agent:<group> — per-group apt/npm packages (rebuild: per-group via install_packages)
|
||||
```
|
||||
|
||||
Layer order within the base:
|
||||
|
||||
```dockerfile
|
||||
FROM node:22-slim
|
||||
|
||||
# System deps (apt) — rarely change
|
||||
RUN apt-get install ...
|
||||
|
||||
# Bun — pinned version, rarely changes
|
||||
RUN ... bun
|
||||
|
||||
# Agent-runner deps — cached independently of CLI versions
|
||||
COPY agent-runner/package.json agent-runner/bun.lock /app/
|
||||
RUN cd /app && bun install --frozen-lockfile
|
||||
|
||||
# Global CLIs — most stable first, most frequently bumped last
|
||||
RUN pnpm install -g "vercel@${VERCEL_VERSION}"
|
||||
RUN pnpm install -g "agent-browser@${AGENT_BROWSER_VERSION}"
|
||||
RUN pnpm install -g "@anthropic-ai/claude-code@${CLAUDE_CODE_VERSION}"
|
||||
```
|
||||
|
||||
Bumping claude-code (the most common change) only rebuilds one layer. Agent-runner deps and other CLIs stay cached.
|
||||
|
||||
Source is never baked into the image — always provided by the shared RO mount at runtime.
|
||||
|
||||
### Agent-triggered version bumps
|
||||
|
||||
Agents can request a claude-code version bump via a new self-mod tool (`bump_claude_code`). Same fire-and-forget pattern as `install_packages`: agent requests → owner approves → host rebuilds base image → kill all running containers. Unlike `install_packages` (per-group image), this rebuilds the shared base image and affects all groups.
|
||||
|
||||
## Changes
|
||||
|
||||
### `group-init.ts`
|
||||
|
||||
- Remove the `agent-runner-src` copy block (lines 109–117)
|
||||
- Remove the `skills/` copy block (lines 100–107)
|
||||
- Skill symlinks are no longer created at init — sync is spawn-owned (see `container-runner.ts`)
|
||||
|
||||
### `container-runner.ts` `buildMounts()`
|
||||
|
||||
- Remove per-group `agent-runner-src` mount (lines 206–209)
|
||||
- Add shared RO mount: `container/agent-runner/src/` → `/app/src`
|
||||
- Add shared RO mount: `container/skills/` → `/app/skills`
|
||||
- Sync skill symlinks in `.claude-shared/skills/` at spawn: write desired set from `container.json` (`"all"` = every skill in the shared dir, recomputed per spawn), remove symlinks not in the set
|
||||
|
||||
### `container-runner.ts` `buildContainerArgs()`
|
||||
|
||||
- Remove `-e SESSION_INBOUND_DB_PATH`, `-e SESSION_OUTBOUND_DB_PATH`, `-e SESSION_HEARTBEAT_PATH` (hardcoded conventions now)
|
||||
- Remove `-e AGENT_PROVIDER` (moves to `container.json`)
|
||||
- Remove `-e NANOCLAW_ASSISTANT_NAME`, `-e NANOCLAW_AGENT_GROUP_ID`, `-e NANOCLAW_AGENT_GROUP_NAME`
|
||||
- Remove `-e NANOCLAW_MAX_MESSAGES_PER_PROMPT`
|
||||
- Remove the `user_roles` join + `-e NANOCLAW_ADMIN_USER_IDS` block (lines 269–287) entirely. Admin gating moves to the router — no admin data passed to the container.
|
||||
- Keep: `-e TZ`, OneCLI-contributed env (`HTTPS_PROXY`, `NODE_EXTRA_CA_CERTS`, `NO_PROXY`)
|
||||
|
||||
### `router.ts` (new command gate)
|
||||
|
||||
- Classify inbound slash commands before writing to `messages_in`: filtered / admin / normal.
|
||||
- Filtered (`/help`, `/login`, `/logout`, `/doctor`, `/config`, `/start`, `/remote-control`) → drop silently.
|
||||
- Admin commands (`/clear`, `/compact`, `/context`, `/cost`, `/files`) from non-admins → write "Permission denied" directly to `messages_out`, skip `messages_in`.
|
||||
- All authorized messages (admin commands from admins, and normal messages) → pass through unchanged to `messages_in`. Container handles them as today.
|
||||
- The `ADMIN_COMMANDS` and `FILTERED_COMMANDS` lists move from `container/agent-runner/src/formatter.ts` to a host-side module.
|
||||
|
||||
### `container/agent-runner/src/` (runner)
|
||||
|
||||
- New `config.ts` module: loads `/workspace/agent/container.json` at startup, exposes a typed config singleton. All previous `process.env.NANOCLAW_*` reads go through this.
|
||||
- `db/connection.ts`: use hardcoded paths `/workspace/inbound.db` and `/workspace/outbound.db`; drop `SESSION_*_DB_PATH` lookups.
|
||||
- `formatter.ts`: remove `ADMIN_COMMANDS`, `FILTERED_COMMANDS`, and the `filtered` / admin-gate categorization. Keep enough to recognize `/clear` so `poll-loop.ts` can route it (e.g., a narrow `isClearCommand(msg)` helper).
|
||||
- `poll-loop.ts`: remove `adminUserIds` field from config type and the admin-gate branch (lines 113–126). Keep the `/clear` handler (lines 128–142) — `/clear` still flows through from the router.
|
||||
- Provider selection (`providers/index.ts` or equivalent): read provider from config singleton, not env.
|
||||
|
||||
### `container-config.ts`
|
||||
|
||||
- Add `skills` field to `ContainerConfig` (`string[] | "all"`, default `"all"`)
|
||||
- Add fields: `provider`, `groupName`, `assistantName`, `maxMessagesPerPrompt` (optional, falls back to code default)
|
||||
|
||||
### `.env` / `.env.example`
|
||||
|
||||
- Remove any `NANOCLAW_*` entries that were documented as tunables. Update `.env.example` to list only TZ and OneCLI-related vars as valid overrides.
|
||||
|
||||
### DB migration
|
||||
|
||||
- Drop `agent_groups.agent_provider` column and `sessions.agent_provider` column. Source of truth becomes `container.json.provider`.
|
||||
- One-time data migration reads existing values and writes them to each group's `container.json`. Sessions lose any per-session provider override — provider is a per-group property now.
|
||||
|
||||
### Migration
|
||||
|
||||
**This is a breaking change.** Host restart kills all running containers. No gradual rollout. Any code referencing dropped columns or removed env vars must be updated before the migration runs.
|
||||
|
||||
- Provider install skills (`/add-opencode`, `/add-ollama-provider`) write to the shared `container/agent-runner/src/providers/` tree. Per-group provider overlays are removed. Existing provider code in any per-group `agent-runner-src/providers/` must be moved to the shared tree before cutover.
|
||||
- Delete existing `data/v2-sessions/<id>/agent-runner-src/` directories on first run after cutover.
|
||||
- Existing `.claude-shared/skills/` directories get replaced with symlinks on next spawn.
|
||||
- DB migration (see above) reads `agent_provider` columns and projects into `container.json`, then drops the columns.
|
||||
|
||||
## What triggers what
|
||||
|
||||
| Change | Action needed | Scope |
|
||||
|--------|--------------|-------|
|
||||
| Agent-runner `.ts` source | Kill running containers | All groups |
|
||||
| Agent-runner npm deps | Rebuild `nanoclaw-agent` + kill all | All groups |
|
||||
| System deps, Bun, Node | Rebuild `nanoclaw-agent` + kill all | All groups |
|
||||
| Claude-code version bump | Rebuild `nanoclaw-agent` + kill all | All groups (agent-triggerable) |
|
||||
| Skill content | Kill running containers | All groups |
|
||||
| Per-group apt/npm packages | `buildAgentGroupImage()` + kill | One group |
|
||||
| Per-group config (MCP, mounts, provider, model, skills) | Kill that group's containers | One group |
|
||||
| CLAUDE.md, working files | Nothing (live via RW mount) | One group |
|
||||
70
src/command-gate.ts
Normal file
70
src/command-gate.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
/**
|
||||
* Host-side command gate. Classifies inbound slash commands and gates
|
||||
* them before they reach the container.
|
||||
*
|
||||
* - Filtered commands: dropped silently (never reach the container)
|
||||
* - Admin commands: checked against user_roles; denied senders get a
|
||||
* "Permission denied" response written directly to messages_out
|
||||
* - Normal messages: pass through unchanged
|
||||
*/
|
||||
import { getDb, hasTable } from './db/connection.js';
|
||||
|
||||
export type GateResult =
|
||||
| { action: 'pass' }
|
||||
| { action: 'filter' }
|
||||
| { action: 'deny'; command: string };
|
||||
|
||||
const FILTERED_COMMANDS = new Set(['/help', '/login', '/logout', '/doctor', '/config', '/remote-control']);
|
||||
const ADMIN_COMMANDS = new Set(['/clear', '/compact', '/context', '/cost', '/files']);
|
||||
|
||||
/**
|
||||
* Classify a message and decide whether it should reach the container.
|
||||
* Returns 'pass' for normal messages and authorized admin commands,
|
||||
* 'filter' for silently-dropped commands, 'deny' for unauthorized
|
||||
* admin commands.
|
||||
*/
|
||||
export function gateCommand(
|
||||
content: string,
|
||||
userId: string | null,
|
||||
agentGroupId: string,
|
||||
): GateResult {
|
||||
let text: string;
|
||||
try {
|
||||
const parsed = JSON.parse(content);
|
||||
text = (parsed.text || '').trim();
|
||||
} catch {
|
||||
text = content.trim();
|
||||
}
|
||||
|
||||
if (!text.startsWith('/')) return { action: 'pass' };
|
||||
|
||||
const command = text.split(/\s/)[0].toLowerCase();
|
||||
|
||||
if (FILTERED_COMMANDS.has(command)) return { action: 'filter' };
|
||||
|
||||
if (ADMIN_COMMANDS.has(command)) {
|
||||
if (isAdmin(userId, agentGroupId)) {
|
||||
return { action: 'pass' };
|
||||
}
|
||||
return { action: 'deny', command };
|
||||
}
|
||||
|
||||
// Unknown slash commands pass through (the agent/SDK handles them)
|
||||
return { action: 'pass' };
|
||||
}
|
||||
|
||||
function isAdmin(userId: string | null, agentGroupId: string): boolean {
|
||||
if (!userId) return false;
|
||||
if (!hasTable(getDb(), 'user_roles')) return true; // no permissions module = allow all
|
||||
const db = getDb();
|
||||
const row = db
|
||||
.prepare(
|
||||
`SELECT 1 FROM user_roles
|
||||
WHERE user_id = ?
|
||||
AND (role = 'owner' OR role = 'admin')
|
||||
AND (agent_group_id IS NULL OR agent_group_id = ?)
|
||||
LIMIT 1`,
|
||||
)
|
||||
.get(userId, agentGroupId);
|
||||
return row != null;
|
||||
}
|
||||
@@ -1,15 +1,8 @@
|
||||
/**
|
||||
* Per-group container config, stored as a plain JSON file at
|
||||
* `groups/<folder>/container.json`. Replaces the former
|
||||
* `agent_groups.container_config` DB column.
|
||||
*
|
||||
* Shape:
|
||||
* {
|
||||
* mcpServers: { [name]: { command, args, env } }
|
||||
* packages: { apt: string[], npm: string[] }
|
||||
* imageTag?: string // set by buildAgentGroupImage on rebuild
|
||||
* additionalMounts?: Array<{hostPath, containerPath, readonly}>
|
||||
* }
|
||||
* `groups/<folder>/container.json`. Mounted read-only inside the container
|
||||
* at `/workspace/agent/container.json` — the runner reads it at startup but
|
||||
* cannot modify it. Config changes go through the self-mod approval flow.
|
||||
*
|
||||
* All fields are optional — a missing file or a partial file both resolve
|
||||
* to sensible defaults. Writes are atomic-enough (write-then-rename is not
|
||||
@@ -38,6 +31,18 @@ export interface ContainerConfig {
|
||||
packages: { apt: string[]; npm: string[] };
|
||||
imageTag?: string;
|
||||
additionalMounts: AdditionalMountConfig[];
|
||||
/** Which skills to enable — array of skill names or "all" (default). */
|
||||
skills: string[] | 'all';
|
||||
/** Agent provider name (e.g. "claude", "opencode"). Default: "claude". */
|
||||
provider?: string;
|
||||
/** Agent group display name (used in transcript archiving). */
|
||||
groupName?: string;
|
||||
/** Assistant display name (used in system prompt / responses). */
|
||||
assistantName?: string;
|
||||
/** Agent group ID — set by the host, read by the runner. */
|
||||
agentGroupId?: string;
|
||||
/** Max messages per prompt. Falls back to code default if unset. */
|
||||
maxMessagesPerPrompt?: number;
|
||||
}
|
||||
|
||||
function emptyConfig(): ContainerConfig {
|
||||
@@ -45,6 +50,7 @@ function emptyConfig(): ContainerConfig {
|
||||
mcpServers: {},
|
||||
packages: { apt: [], npm: [] },
|
||||
additionalMounts: [],
|
||||
skills: 'all',
|
||||
};
|
||||
}
|
||||
|
||||
@@ -71,6 +77,12 @@ export function readContainerConfig(folder: string): ContainerConfig {
|
||||
},
|
||||
imageTag: raw.imageTag,
|
||||
additionalMounts: raw.additionalMounts ?? [],
|
||||
skills: raw.skills ?? 'all',
|
||||
provider: raw.provider,
|
||||
groupName: raw.groupName,
|
||||
assistantName: raw.assistantName,
|
||||
agentGroupId: raw.agentGroupId,
|
||||
maxMessagesPerPrompt: raw.maxMessagesPerPrompt,
|
||||
};
|
||||
} catch (err) {
|
||||
console.error(`[container-config] failed to parse ${p}: ${String(err)}`);
|
||||
|
||||
@@ -9,7 +9,7 @@ import path from 'path';
|
||||
|
||||
import { OneCLI } from '@onecli-sh/sdk';
|
||||
|
||||
import { CONTAINER_IMAGE, DATA_DIR, GROUPS_DIR, MAX_MESSAGES_PER_PROMPT, ONECLI_URL, TIMEZONE } from './config.js';
|
||||
import { CONTAINER_IMAGE, DATA_DIR, GROUPS_DIR, ONECLI_URL, TIMEZONE } from './config.js';
|
||||
import { readContainerConfig, writeContainerConfig } from './container-config.js';
|
||||
import { CONTAINER_RUNTIME_BIN, hostGatewayArgs, readonlyMountArgs, stopContainer } from './container-runtime.js';
|
||||
import { getAgentGroup } from './db/agent-groups.js';
|
||||
@@ -91,17 +91,25 @@ async function spawnContainer(session: Session): Promise<void> {
|
||||
}
|
||||
writeSessionRouting(agentGroup.id, session.id);
|
||||
|
||||
// Read container config once — threaded through provider resolution,
|
||||
// buildMounts, and buildContainerArgs so we don't re-read the file.
|
||||
const containerConfig = readContainerConfig(agentGroup.folder);
|
||||
|
||||
// Ensure container.json has the agent group identity fields the runner needs.
|
||||
// Written at spawn time so the runner can read them from the RO mount.
|
||||
ensureRuntimeFields(containerConfig, agentGroup);
|
||||
|
||||
// Resolve the effective provider + any host-side contribution it declares
|
||||
// (extra mounts, env passthrough). Computed once and threaded through both
|
||||
// buildMounts and buildContainerArgs so side effects (mkdir, etc.) fire once.
|
||||
const { provider, contribution } = resolveProviderContribution(session, agentGroup);
|
||||
const { provider, contribution } = resolveProviderContribution(session, agentGroup, containerConfig);
|
||||
|
||||
const mounts = buildMounts(agentGroup, session, contribution);
|
||||
const mounts = buildMounts(agentGroup, session, containerConfig, contribution);
|
||||
const containerName = `nanoclaw-v2-${agentGroup.folder}-${Date.now()}`;
|
||||
// OneCLI agent identifier is always the agent group id — stable across
|
||||
// sessions and reversible via getAgentGroup() for approval routing.
|
||||
const agentIdentifier = agentGroup.id;
|
||||
const args = await buildContainerArgs(mounts, containerName, agentGroup, provider, contribution, agentIdentifier);
|
||||
const args = await buildContainerArgs(mounts, containerName, agentGroup, containerConfig, provider, contribution, agentIdentifier);
|
||||
|
||||
log.info('Spawning container', { sessionId: session.id, agentGroup: agentGroup.name, containerName });
|
||||
|
||||
@@ -156,8 +164,9 @@ export function killContainer(sessionId: string, reason: string): void {
|
||||
function resolveProviderContribution(
|
||||
session: Session,
|
||||
agentGroup: AgentGroup,
|
||||
containerConfig: import('./container-config.js').ContainerConfig,
|
||||
): { provider: string; contribution: ProviderContainerContribution } {
|
||||
const provider = (session.agent_provider || agentGroup.agent_provider || 'claude').toLowerCase();
|
||||
const provider = (containerConfig.provider || 'claude').toLowerCase();
|
||||
const fn = getProviderContainerConfig(provider);
|
||||
const contribution = fn
|
||||
? fn({
|
||||
@@ -172,15 +181,20 @@ function resolveProviderContribution(
|
||||
function buildMounts(
|
||||
agentGroup: AgentGroup,
|
||||
session: Session,
|
||||
containerConfig: import('./container-config.js').ContainerConfig,
|
||||
providerContribution: ProviderContainerContribution,
|
||||
): VolumeMount[] {
|
||||
const projectRoot = process.cwd();
|
||||
|
||||
// Per-group filesystem state lives forever after first creation. Init is
|
||||
// idempotent: it only writes paths that don't already exist, so this call
|
||||
// is a no-op for groups that have spawned before. Pulling in upstream
|
||||
// built-in skill or agent-runner source updates is an explicit operation
|
||||
// (host-mediated tools), not something the spawn path does silently.
|
||||
// is a no-op for groups that have spawned before.
|
||||
initGroupFilesystem(agentGroup);
|
||||
|
||||
// Sync skill symlinks based on container.json selection before mounting.
|
||||
const claudeDir = path.join(DATA_DIR, 'v2-sessions', agentGroup.id, '.claude-shared');
|
||||
syncSkillSymlinks(claudeDir, containerConfig);
|
||||
|
||||
const mounts: VolumeMount[] = [];
|
||||
const sessDir = sessionDir(agentGroup.id, session.id);
|
||||
const groupDir = path.resolve(GROUPS_DIR, agentGroup.folder);
|
||||
@@ -188,28 +202,37 @@ function buildMounts(
|
||||
// Session folder at /workspace (contains inbound.db, outbound.db, outbox/, .claude/)
|
||||
mounts.push({ hostPath: sessDir, containerPath: '/workspace', readonly: false });
|
||||
|
||||
// Agent group folder at /workspace/agent
|
||||
// Agent group folder at /workspace/agent (RW for working files + CLAUDE.md)
|
||||
mounts.push({ hostPath: groupDir, containerPath: '/workspace/agent', readonly: false });
|
||||
|
||||
// Global memory directory — always read-only. Edits to global config
|
||||
// happen through the approval flow, not by handing one workspace RW.
|
||||
// container.json — nested RO mount on top of RW group dir so the agent
|
||||
// can read its config but cannot modify it.
|
||||
const containerJsonPath = path.join(groupDir, 'container.json');
|
||||
if (fs.existsSync(containerJsonPath)) {
|
||||
mounts.push({ hostPath: containerJsonPath, containerPath: '/workspace/agent/container.json', readonly: true });
|
||||
}
|
||||
|
||||
// Global memory directory — always read-only.
|
||||
const globalDir = path.join(GROUPS_DIR, 'global');
|
||||
if (fs.existsSync(globalDir)) {
|
||||
mounts.push({ hostPath: globalDir, containerPath: '/workspace/global', readonly: true });
|
||||
}
|
||||
|
||||
// Per-group .claude-shared at /home/node/.claude (Claude state, settings,
|
||||
// skills — initialized once at group creation, persistent thereafter)
|
||||
const claudeDir = path.join(DATA_DIR, 'v2-sessions', agentGroup.id, '.claude-shared');
|
||||
// skill symlinks)
|
||||
mounts.push({ hostPath: claudeDir, containerPath: '/home/node/.claude', readonly: false });
|
||||
|
||||
// Per-group agent-runner source at /app/src (initialized once at group
|
||||
// creation, persistent thereafter — agents can modify their runner)
|
||||
const groupRunnerDir = path.join(DATA_DIR, 'v2-sessions', agentGroup.id, 'agent-runner-src');
|
||||
mounts.push({ hostPath: groupRunnerDir, containerPath: '/app/src', readonly: false });
|
||||
// Shared agent-runner source — read-only, same code for all groups.
|
||||
const agentRunnerSrc = path.join(projectRoot, 'container', 'agent-runner', 'src');
|
||||
mounts.push({ hostPath: agentRunnerSrc, containerPath: '/app/src', readonly: true });
|
||||
|
||||
// Additional mounts from container config (groups/<folder>/container.json)
|
||||
const containerConfig = readContainerConfig(agentGroup.folder);
|
||||
// Shared skills — read-only, symlinks in .claude-shared/skills/ point here.
|
||||
const skillsSrc = path.join(projectRoot, 'container', 'skills');
|
||||
if (fs.existsSync(skillsSrc)) {
|
||||
mounts.push({ hostPath: skillsSrc, containerPath: '/app/skills', readonly: true });
|
||||
}
|
||||
|
||||
// Additional mounts from container config
|
||||
if (containerConfig.additionalMounts && containerConfig.additionalMounts.length > 0) {
|
||||
const validated = validateAdditionalMounts(containerConfig.additionalMounts, agentGroup.name);
|
||||
mounts.push(...validated);
|
||||
@@ -223,32 +246,113 @@ function buildMounts(
|
||||
return mounts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sync skill symlinks in .claude-shared/skills/ to match the container.json
|
||||
* selection. Each symlink points to a container path (/app/skills/<name>)
|
||||
* so it's dangling on the host but valid inside the container.
|
||||
*/
|
||||
function syncSkillSymlinks(
|
||||
claudeDir: string,
|
||||
containerConfig: import('./container-config.js').ContainerConfig,
|
||||
): void {
|
||||
const skillsDir = path.join(claudeDir, 'skills');
|
||||
if (!fs.existsSync(skillsDir)) {
|
||||
fs.mkdirSync(skillsDir, { recursive: true });
|
||||
}
|
||||
|
||||
// Determine desired skill set
|
||||
const projectRoot = process.cwd();
|
||||
const sharedSkillsDir = path.join(projectRoot, 'container', 'skills');
|
||||
let desired: string[];
|
||||
if (containerConfig.skills === 'all') {
|
||||
// Recompute from shared dir — newly-added upstream skills appear automatically
|
||||
desired = fs.existsSync(sharedSkillsDir)
|
||||
? fs.readdirSync(sharedSkillsDir).filter((e) => {
|
||||
try {
|
||||
return fs.statSync(path.join(sharedSkillsDir, e)).isDirectory();
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
})
|
||||
: [];
|
||||
} else {
|
||||
desired = containerConfig.skills;
|
||||
}
|
||||
|
||||
const desiredSet = new Set(desired);
|
||||
|
||||
// Remove symlinks not in the desired set
|
||||
for (const entry of fs.readdirSync(skillsDir)) {
|
||||
const entryPath = path.join(skillsDir, entry);
|
||||
let isSymlink = false;
|
||||
try {
|
||||
isSymlink = fs.lstatSync(entryPath).isSymbolicLink();
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
if (isSymlink && !desiredSet.has(entry)) {
|
||||
fs.unlinkSync(entryPath);
|
||||
}
|
||||
}
|
||||
|
||||
// Create symlinks for desired skills (container path targets)
|
||||
for (const skill of desired) {
|
||||
const linkPath = path.join(skillsDir, skill);
|
||||
let exists = false;
|
||||
try {
|
||||
fs.lstatSync(linkPath);
|
||||
exists = true;
|
||||
} catch {
|
||||
/* missing */
|
||||
}
|
||||
if (!exists) {
|
||||
fs.symlinkSync(`/app/skills/${skill}`, linkPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure container.json has the runtime identity fields the runner needs.
|
||||
* Written at spawn time so they're always current even if the DB values
|
||||
* change (e.g. group rename). Only writes if values differ to avoid
|
||||
* unnecessary file churn.
|
||||
*/
|
||||
function ensureRuntimeFields(
|
||||
containerConfig: import('./container-config.js').ContainerConfig,
|
||||
agentGroup: AgentGroup,
|
||||
): void {
|
||||
let dirty = false;
|
||||
if (containerConfig.agentGroupId !== agentGroup.id) {
|
||||
containerConfig.agentGroupId = agentGroup.id;
|
||||
dirty = true;
|
||||
}
|
||||
if (containerConfig.groupName !== agentGroup.name) {
|
||||
containerConfig.groupName = agentGroup.name;
|
||||
dirty = true;
|
||||
}
|
||||
if (containerConfig.assistantName !== agentGroup.name) {
|
||||
containerConfig.assistantName = agentGroup.name;
|
||||
dirty = true;
|
||||
}
|
||||
if (dirty) {
|
||||
writeContainerConfig(agentGroup.folder, containerConfig);
|
||||
}
|
||||
}
|
||||
|
||||
async function buildContainerArgs(
|
||||
mounts: VolumeMount[],
|
||||
containerName: string,
|
||||
agentGroup: AgentGroup,
|
||||
containerConfig: import('./container-config.js').ContainerConfig,
|
||||
provider: string,
|
||||
providerContribution: ProviderContainerContribution,
|
||||
agentIdentifier?: string,
|
||||
): Promise<string[]> {
|
||||
const args: string[] = ['run', '--rm', '--name', containerName];
|
||||
|
||||
// Environment
|
||||
// Environment — only vars read by code we don't own.
|
||||
// Everything NanoClaw-specific is in container.json (read by runner at startup).
|
||||
args.push('-e', `TZ=${TIMEZONE}`);
|
||||
args.push('-e', `AGENT_PROVIDER=${provider}`);
|
||||
// Two-DB split: container reads inbound.db, writes outbound.db
|
||||
args.push('-e', 'SESSION_INBOUND_DB_PATH=/workspace/inbound.db');
|
||||
args.push('-e', 'SESSION_OUTBOUND_DB_PATH=/workspace/outbound.db');
|
||||
args.push('-e', 'SESSION_HEARTBEAT_PATH=/workspace/.heartbeat');
|
||||
|
||||
if (agentGroup.name) {
|
||||
args.push('-e', `NANOCLAW_ASSISTANT_NAME=${agentGroup.name}`);
|
||||
}
|
||||
args.push('-e', `NANOCLAW_AGENT_GROUP_ID=${agentGroup.id}`);
|
||||
args.push('-e', `NANOCLAW_AGENT_GROUP_NAME=${agentGroup.name}`);
|
||||
// Cap on how many pending messages reach one prompt. Accumulated context
|
||||
// (trigger=0 rows) rides along with wake-eligible rows up to this cap.
|
||||
args.push('-e', `NANOCLAW_MAX_MESSAGES_PER_PROMPT=${MAX_MESSAGES_PER_PROMPT}`);
|
||||
|
||||
// Provider-contributed env vars (e.g. XDG_DATA_HOME, OPENCODE_*, NO_PROXY).
|
||||
if (providerContribution.env) {
|
||||
@@ -257,39 +361,8 @@ async function buildContainerArgs(
|
||||
}
|
||||
}
|
||||
|
||||
// Users allowed to run admin commands (e.g. /clear) inside this container.
|
||||
// Computed at wake time: owners + global admins + admins scoped to this
|
||||
// agent group. Role changes take effect on next container spawn.
|
||||
//
|
||||
// SQL inlined to keep core independent of the permissions module — we
|
||||
// guard on the `user_roles` table directly. If the permissions module
|
||||
// isn't installed, the table doesn't exist and the set stays empty; the
|
||||
// formatter treats an empty admin set as permissionless mode (every
|
||||
// sender is admin).
|
||||
const adminUserIds = new Set<string>();
|
||||
if (hasTable(getDb(), 'user_roles')) {
|
||||
const db = getDb();
|
||||
const owners = db
|
||||
.prepare("SELECT user_id FROM user_roles WHERE role = 'owner' AND agent_group_id IS NULL")
|
||||
.all() as Array<{ user_id: string }>;
|
||||
const globalAdmins = db
|
||||
.prepare("SELECT user_id FROM user_roles WHERE role = 'admin' AND agent_group_id IS NULL")
|
||||
.all() as Array<{ user_id: string }>;
|
||||
const scopedAdmins = db
|
||||
.prepare("SELECT user_id FROM user_roles WHERE role = 'admin' AND agent_group_id = ?")
|
||||
.all(agentGroup.id) as Array<{ user_id: string }>;
|
||||
for (const r of owners) adminUserIds.add(r.user_id);
|
||||
for (const r of globalAdmins) adminUserIds.add(r.user_id);
|
||||
for (const r of scopedAdmins) adminUserIds.add(r.user_id);
|
||||
}
|
||||
if (adminUserIds.size > 0) {
|
||||
args.push('-e', `NANOCLAW_ADMIN_USER_IDS=${Array.from(adminUserIds).join(',')}`);
|
||||
}
|
||||
|
||||
// OneCLI gateway — injects HTTPS_PROXY + certs so container API calls
|
||||
// are routed through the agent vault for credential injection.
|
||||
// Must ensureAgent first for non-admin groups, otherwise applyContainerConfig
|
||||
// rejects the unknown agent identifier and returns false.
|
||||
try {
|
||||
if (agentIdentifier) {
|
||||
await onecli.ensureAgent({ name: agentGroup.name, identifier: agentIdentifier });
|
||||
@@ -324,16 +397,7 @@ async function buildContainerArgs(
|
||||
}
|
||||
}
|
||||
|
||||
// Pass additional MCP servers from container config (groups/<folder>/container.json)
|
||||
const containerConfig = readContainerConfig(agentGroup.folder);
|
||||
if (containerConfig.mcpServers && Object.keys(containerConfig.mcpServers).length > 0) {
|
||||
args.push('-e', `NANOCLAW_MCP_SERVERS=${JSON.stringify(containerConfig.mcpServers)}`);
|
||||
}
|
||||
|
||||
// Override entrypoint: run v2 entry point directly via Bun (no tsc, no stdin).
|
||||
// The image's ENTRYPOINT (tini → entrypoint.sh) handles the stdin-piped
|
||||
// invocation path; the host-spawned sessions don't need stdin because all
|
||||
// IO flows through the mounted session DBs.
|
||||
args.push('--entrypoint', 'bash');
|
||||
|
||||
// Use per-agent-group image if one has been built, otherwise base image
|
||||
|
||||
@@ -37,12 +37,12 @@ const DEFAULT_SETTINGS_JSON =
|
||||
* an already-initialized group is a no-op.
|
||||
*
|
||||
* Called once per group lifetime: at creation, or defensively from
|
||||
* `buildMounts()` for groups that pre-date this code path. After init, the
|
||||
* host never overwrites any of these paths automatically — agents own them.
|
||||
* To pull in upstream changes, use the host-mediated reset/refresh tools.
|
||||
* `buildMounts()` for groups that pre-date this code path.
|
||||
*
|
||||
* Source code and skills are shared RO mounts — not copied per-group.
|
||||
* Skill symlinks are synced at spawn time by container-runner.ts.
|
||||
*/
|
||||
export function initGroupFilesystem(group: AgentGroup, opts?: { instructions?: string }): void {
|
||||
const projectRoot = process.cwd();
|
||||
const initialized: string[] = [];
|
||||
|
||||
// 1. groups/<folder>/ — group memory + working dir
|
||||
@@ -97,23 +97,12 @@ export function initGroupFilesystem(group: AgentGroup, opts?: { instructions?: s
|
||||
initialized.push('settings.json');
|
||||
}
|
||||
|
||||
// Skills directory — created empty here; symlinks are synced at spawn
|
||||
// time by container-runner.ts based on container.json skills selection.
|
||||
const skillsDst = path.join(claudeDir, 'skills');
|
||||
if (!fs.existsSync(skillsDst)) {
|
||||
const skillsSrc = path.join(projectRoot, 'container', 'skills');
|
||||
if (fs.existsSync(skillsSrc)) {
|
||||
fs.cpSync(skillsSrc, skillsDst, { recursive: true });
|
||||
initialized.push('skills/');
|
||||
}
|
||||
}
|
||||
|
||||
// 3. data/v2-sessions/<id>/agent-runner-src/ — per-group source copy
|
||||
const groupRunnerDir = path.join(DATA_DIR, 'v2-sessions', group.id, 'agent-runner-src');
|
||||
if (!fs.existsSync(groupRunnerDir)) {
|
||||
const agentRunnerSrc = path.join(projectRoot, 'container', 'agent-runner', 'src');
|
||||
if (fs.existsSync(agentRunnerSrc)) {
|
||||
fs.cpSync(agentRunnerSrc, groupRunnerDir, { recursive: true });
|
||||
initialized.push('agent-runner-src/');
|
||||
}
|
||||
fs.mkdirSync(skillsDst, { recursive: true });
|
||||
initialized.push('skills/');
|
||||
}
|
||||
|
||||
if (initialized.length > 0) {
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
* for policy refusals.
|
||||
*/
|
||||
import { getChannelAdapter } from './channels/channel-registry.js';
|
||||
import { gateCommand } from './command-gate.js';
|
||||
import { getAgentGroup } from './db/agent-groups.js';
|
||||
import { recordDroppedMessage } from './db/dropped-messages.js';
|
||||
import {
|
||||
@@ -28,7 +29,7 @@ import {
|
||||
import { findSessionForAgent } from './db/sessions.js';
|
||||
import { startTypingRefresh } from './modules/typing/index.js';
|
||||
import { log } from './log.js';
|
||||
import { resolveSession, writeSessionMessage } from './session-manager.js';
|
||||
import { resolveSession, writeSessionMessage, writeOutboundDirect } from './session-manager.js';
|
||||
import { wakeContainer } from './container-runner.js';
|
||||
import { getSession } from './db/sessions.js';
|
||||
import type { AgentGroup, MessagingGroup, MessagingGroupAgent } from './types.js';
|
||||
@@ -398,6 +399,29 @@ async function deliverToAgent(
|
||||
threadId: event.threadId,
|
||||
};
|
||||
|
||||
// Command gate: classify slash commands before they reach the container.
|
||||
// Filtered commands are dropped silently. Denied admin commands get a
|
||||
// permission-denied response written directly to messages_out.
|
||||
if (event.message.kind === 'chat' || event.message.kind === 'chat-sdk') {
|
||||
const gate = gateCommand(event.message.content, userId, agent.agent_group_id);
|
||||
if (gate.action === 'filter') {
|
||||
log.debug('Filtered command dropped by gate', { agentGroupId: agent.agent_group_id });
|
||||
return;
|
||||
}
|
||||
if (gate.action === 'deny') {
|
||||
writeOutboundDirect(session.agent_group_id, session.id, {
|
||||
id: `deny-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
||||
kind: 'chat',
|
||||
platformId: deliveryAddr.platformId,
|
||||
channelType: deliveryAddr.channelType,
|
||||
threadId: deliveryAddr.threadId,
|
||||
content: JSON.stringify({ text: `Permission denied: ${gate.command} requires admin access.` }),
|
||||
});
|
||||
log.info('Admin command denied by gate', { command: gate.command, userId, agentGroupId: agent.agent_group_id });
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
writeSessionMessage(session.agent_group_id, session.id, {
|
||||
id: messageIdForAgent(event.message.id, agent.agent_group_id),
|
||||
kind: event.message.kind,
|
||||
|
||||
@@ -279,6 +279,34 @@ export function openOutboundDb(agentGroupId: string, sessionId: string): Databas
|
||||
return openOutboundDbRaw(outboundDbPath(agentGroupId, sessionId));
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a message directly to a session's outbound DB so the host delivery
|
||||
* loop picks it up. Used by the command gate to send denial responses
|
||||
* without waking a container.
|
||||
*/
|
||||
export function writeOutboundDirect(
|
||||
agentGroupId: string,
|
||||
sessionId: string,
|
||||
message: {
|
||||
id: string;
|
||||
kind: string;
|
||||
platformId: string | null;
|
||||
channelType: string | null;
|
||||
threadId: string | null;
|
||||
content: string;
|
||||
},
|
||||
): void {
|
||||
const db = openOutboundDb(agentGroupId, sessionId);
|
||||
try {
|
||||
db.prepare(
|
||||
`INSERT OR IGNORE INTO messages_out (id, seq, timestamp, kind, platform_id, channel_type, thread_id, content)
|
||||
VALUES (?, (SELECT COALESCE(MAX(seq), 0) + 2 FROM messages_out), datetime('now'), ?, ?, ?, ?, ?)`,
|
||||
).run(message.id, message.kind, message.platformId, message.channelType, message.threadId, message.content);
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use openInboundDb / openOutboundDb instead.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user