feat(v2): builder-agent self-modification WIP + container-config as per-group file

Checkpoints the builder-agent dev-agent/worktree/swap flow (create_dev_agent,
request_swap, classifier, deadman, promote) before pivoting to a unified
draft-activate approach with OS-level RO enforcement. Lifts container_config
out of the agent_groups row into groups/<folder>/container.json so install_packages,
add_mcp_server, and rebuild flows can eventually route through the same draft
path as source edits.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
gavrielc
2026-04-15 18:42:10 +03:00
parent c54c779834
commit 75c2fde2b5
48 changed files with 4385 additions and 134 deletions

View File

@@ -4,11 +4,27 @@
* Thin orchestrator: init DB, run migrations, start channel adapters,
* start delivery polls, start sweep, handle shutdown.
*/
import { execFileSync } from 'child_process';
import path from 'path';
import { setSwapApprovalDelivery } from './builder-agent/approval.js';
import { handleSwapConfirmationResponse, setDeadmanDelivery, startDeadman } from './builder-agent/deadman.js';
import { handlePromoteResponse, setPromoteDelivery } from './builder-agent/promote.js';
import { runBuilderAgentStartupSweep } from './builder-agent/startup.js';
import {
applySwapFiles,
bailSwapForRetry,
captureSwapPreState,
commitSwap,
isHostLevelSwap,
parseSwapSummary,
requiresFullHostRebuild,
} from './builder-agent/swap.js';
import { removeDevWorktree } from './builder-agent/worktree.js';
import { DATA_DIR } from './config.js';
import { initDb } from './db/connection.js';
import { runMigrations } from './db/migrations/index.js';
import { getPendingSwap, updatePendingSwapStatus } from './db/pending-swaps.js';
import { getMessagingGroupsByChannel, getMessagingGroupAgents } from './db/messaging-groups.js';
import { ensureContainerRuntimeRunning, cleanupOrphans } from './container-runtime.js';
import { startActiveDeliveryPoll, startSweepDeliveryPoll, setDeliveryAdapter, stopDeliveryPolls } from './delivery.js';
@@ -34,7 +50,8 @@ import {
deletePendingApproval,
getSession,
} from './db/sessions.js';
import { getAgentGroup, updateAgentGroup } from './db/agent-groups.js';
import { getAgentGroup } from './db/agent-groups.js';
import { updateContainerConfig } from './container-config.js';
import { writeSessionMessage } from './session-manager.js';
import { wakeContainer, buildAgentGroupImage, killContainer } from './container-runner.js';
import { log } from './log.js';
@@ -55,6 +72,12 @@ async function main(): Promise<void> {
runMigrations(db);
log.info('Central DB ready', { path: dbPath });
// 1b. Builder-agent startup sweep — resumes any in-flight deadmans (from a
// host-level swap restart or an unexpected host crash) and cleans up
// orphan worktrees. Must run before channel adapters start so any
// rollback path-exit happens cleanly without partial startup state.
await runBuilderAgentStartupSweep();
// 2. Container runtime
ensureContainerRuntimeRunning();
cleanupOrphans();
@@ -135,6 +158,9 @@ async function main(): Promise<void> {
};
setDeliveryAdapter(deliveryAdapter);
setCredentialDeliveryAdapter(deliveryAdapter);
setSwapApprovalDelivery(deliveryAdapter);
setDeadmanDelivery(deliveryAdapter);
setPromoteDelivery(deliveryAdapter);
// 5. Start delivery polls
startActiveDeliveryPoll();
@@ -240,6 +266,33 @@ async function handleApprovalResponse(
selectedOption: string,
userId: string,
): Promise<void> {
// Builder-agent actions are handled out-of-band from the install_packages
// family: their session linkage is different and swap_confirmation doesn't
// use `payload.session_id` at all (the session is derived from the swap's
// originating_group_id). Dispatch them first.
if (approval.action === 'swap_confirmation') {
const payload = JSON.parse(approval.payload) as { swapRequestId?: string };
if (payload.swapRequestId) {
await handleSwapConfirmationResponse(approval.approval_id, payload.swapRequestId, selectedOption);
} else {
deletePendingApproval(approval.approval_id);
}
return;
}
if (approval.action === 'swap_request') {
await handleSwapRequestApproval(approval, selectedOption, userId);
return;
}
if (approval.action === 'promote_template') {
const payload = JSON.parse(approval.payload) as { swapRequestId?: string };
if (payload.swapRequestId) {
await handlePromoteResponse(approval.approval_id, payload.swapRequestId, selectedOption);
} else {
deletePendingApproval(approval.approval_id);
}
return;
}
if (!approval.session_id) {
deletePendingApproval(approval.approval_id);
return;
@@ -274,11 +327,14 @@ async function handleApprovalResponse(
if (approval.action === 'install_packages') {
const agentGroup = getAgentGroup(session.agent_group_id);
const containerConfig = agentGroup?.container_config ? JSON.parse(agentGroup.container_config) : {};
if (!containerConfig.packages) containerConfig.packages = { apt: [], npm: [] };
if (payload.apt) containerConfig.packages.apt.push(...payload.apt);
if (payload.npm) containerConfig.packages.npm.push(...payload.npm);
updateAgentGroup(session.agent_group_id, { container_config: JSON.stringify(containerConfig) });
if (!agentGroup) {
notify('install_packages approved but agent group missing.');
return;
}
updateContainerConfig(agentGroup.folder, (cfg) => {
if (payload.apt) cfg.packages.apt.push(...(payload.apt as string[]));
if (payload.npm) cfg.packages.npm.push(...(payload.npm as string[]));
});
const pkgs = [...(payload.apt || []), ...(payload.npm || [])].join(', ');
log.info('Package install approved', { approvalId: approval.approval_id, userId });
@@ -324,14 +380,17 @@ async function handleApprovalResponse(
}
} else if (approval.action === 'add_mcp_server') {
const agentGroup = getAgentGroup(session.agent_group_id);
const containerConfig = agentGroup?.container_config ? JSON.parse(agentGroup.container_config) : {};
if (!containerConfig.mcpServers) containerConfig.mcpServers = {};
containerConfig.mcpServers[payload.name] = {
command: payload.command,
args: payload.args || [],
env: payload.env || {},
};
updateAgentGroup(session.agent_group_id, { container_config: JSON.stringify(containerConfig) });
if (!agentGroup) {
notify('add_mcp_server approved but agent group missing.');
return;
}
updateContainerConfig(agentGroup.folder, (cfg) => {
cfg.mcpServers[payload.name as string] = {
command: payload.command as string,
args: (payload.args as string[]) || [],
env: (payload.env as Record<string, string>) || {},
};
});
// Kill the container so next wake loads the new MCP server config
killContainer(session.id, 'mcp server added');
@@ -343,6 +402,140 @@ async function handleApprovalResponse(
await wakeContainer(session);
}
/**
* Handle an approver's response to a builder-agent `swap_request` card.
* Approve → capture pre-state, apply files, commit, rebuild if needed,
* restart, start deadman. Reject → teardown worktree + dev agent, notify.
*
* Kept separate from the install_packages / request_rebuild flow because:
* - Host-level swaps require `process.exit(0)` for supervisor respawn,
* which the other flows never do.
* - Swap state lives in `pending_swaps`, not `pending_approvals.payload`.
*/
async function handleSwapRequestApproval(
approval: import('./types.js').PendingApproval,
selectedOption: string,
userId: string,
): Promise<void> {
const payload = JSON.parse(approval.payload) as { swapRequestId?: string };
const swapRequestId = payload.swapRequestId;
if (!swapRequestId) {
deletePendingApproval(approval.approval_id);
return;
}
const swap = getPendingSwap(swapRequestId);
if (!swap) {
deletePendingApproval(approval.approval_id);
return;
}
// Notify the dev agent's session about the outcome. Uses the existing
// session for the dev agent group so the dev agent sees it as an inbound
// chat message with sender=system.
const { findSessionByAgentGroup } = await import('./db/sessions.js');
const devSession = findSessionByAgentGroup(swap.dev_agent_id);
const notifyDev = (text: string): void => {
if (!devSession) return;
writeSessionMessage(devSession.agent_group_id, devSession.id, {
id: `appr-note-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
kind: 'chat',
timestamp: new Date().toISOString(),
platformId: devSession.agent_group_id,
channelType: 'agent',
threadId: null,
content: JSON.stringify({ text, sender: 'system', senderId: 'system' }),
});
};
if (selectedOption !== 'approve') {
notifyDev(`Your proposed code change was rejected by ${userId}.`);
log.info('Swap request rejected', { requestId: swapRequestId, userId, selectedOption });
updatePendingSwapStatus(swapRequestId, 'rejected');
try {
removeDevWorktree(swapRequestId);
} catch (err) {
log.warn('Failed to remove worktree after rejection', { swapRequestId, err });
}
deletePendingApproval(approval.approval_id);
return;
}
log.info('Swap request approved — executing swap dance', { requestId: swapRequestId, userId });
// Swap execution. Any failure inside the try (captureSwapPreState,
// applySwapFiles, commitSwap, npm run build, startDeadman, restart
// orchestration) triggers a unified retryable-bail: revert any on-disk
// changes via git, reset the pending_swaps row back to pending_approval,
// leave the dev agent + worktree ALIVE so the dev agent can fix the
// issue and call request_swap again. Only explicit rejection tears
// down the dev agent.
try {
// 1. Capture pre-state (pre_swap_sha + DB snapshot).
await captureSwapPreState(swapRequestId);
// 2. Apply files from worktree to swap targets.
const touchedAbs = applySwapFiles(swapRequestId);
// 3. Commit the swap to main.
const summary = parseSwapSummary(swap);
commitSwap(swapRequestId, touchedAbs, summary.overallSummary || 'no summary');
// 4. Host-level rebuild. If the diff touched host code that compiles
// to dist/ (src/**, package.json, etc.), run `npm run build` now so
// the respawned host process runs the new compiled output rather
// than stale dist/. Group-level swaps need no rebuild — /app/src is
// runtime-compiled inside each container on spawn, skills/CLAUDE.md
// are mounted.
if (requiresFullHostRebuild(touchedAbs)) {
notifyDev('Code change applied and committed. Running `npm run build` before the host restart…');
try {
execFileSync('npm', ['run', 'build'], { cwd: process.cwd(), stdio: 'inherit' });
log.info('npm run build succeeded for host-level swap', { requestId: swapRequestId });
} catch (buildErr) {
const msg = buildErr instanceof Error ? buildErr.message : String(buildErr);
// Wrap with context and re-throw so the outer catch runs the
// unified bail path.
throw new Error(`npm run build failed: ${msg}`);
}
}
// 5. Start the deadman. This sets status=awaiting_confirmation, posts
// the handshake card, and schedules the timer. For host-level swaps
// we then exit so the supervisor respawns the host on the new code;
// the startup sweep will resume this deadman after restart.
await startDeadman(swapRequestId);
if (isHostLevelSwap(swap)) {
notifyDev('Code change applied and committed. Triggering host restart so the new code takes effect. Awaiting user confirmation after restart.');
log.warn('Host-level swap triggering process exit for supervisor respawn', {
requestId: swapRequestId,
});
// Give log sinks and the deadman card delivery a moment to flush
// before exiting.
setTimeout(() => process.exit(0), 500);
} else {
// Group-level: kill the originating agent's active container so its
// next wake respawns it with the new per-group runner/skills mounted.
const originatingSession = findSessionByAgentGroup(swap.originating_group_id);
if (originatingSession) {
killContainer(originatingSession.id, 'swap applied');
}
notifyDev('Code change applied and committed. The originating agent will restart on its next message. Awaiting user confirmation.');
}
} catch (err) {
const errMsg = err instanceof Error ? err.message : String(err);
log.error('Swap execution failed — bailing for retry', { requestId: swapRequestId, err });
bailSwapForRetry(swapRequestId);
notifyDev(
`❌ Code change failed: ${errMsg}\n\n` +
`Your worktree and dev-agent group are still alive. Review the error above, ` +
`fix the issue in /worktree, commit, and call \`request_swap\` again to retry.`,
);
}
deletePendingApproval(approval.approval_id);
}
/** Graceful shutdown. */
async function shutdown(signal: string): Promise<void> {
log.info('Shutdown signal received', { signal });