From ceb0b9cf5f30360ab83bc1b097f747149a9c38d9 Mon Sep 17 00:00:00 2001 From: Mike Nolet Date: Sat, 2 May 2026 08:45:23 +0200 Subject: [PATCH 01/11] fix(test-infra): openInboundDb honors in-memory test DB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit initTestSessionDb() creates an in-memory inbound singleton, but openInboundDb() always opened the hardcoded /workspace/inbound.db path. Every test that exercised getPendingMessages — directly, or via test fixtures that load data through it (e.g. poll-loop.test.ts:29 loads formatter test rows via getPendingMessages) — failed with SQLITE_CANTOPEN under `bun test` outside a real container. Baseline on main: 34 pass, 25 fail across 6 files. After this fix: 59 pass, 0 fail. In test mode, openInboundDb returns the in-memory singleton. The singleton's .close() is no-op'd in initTestSessionDb so caller try/finally cleanup doesn't tear down the shared DB; closeSessionDb invokes the saved original close to do the real teardown. Production behavior is unchanged — _inboundIsTest only flips inside initTestSessionDb, which is never called outside the test runner. Co-Authored-By: Claude Opus 4.7 (1M context) --- container/agent-runner/src/db/connection.ts | 27 ++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/container/agent-runner/src/db/connection.ts b/container/agent-runner/src/db/connection.ts index 3ca44a8..ac563fa 100644 --- a/container/agent-runner/src/db/connection.ts +++ b/container/agent-runner/src/db/connection.ts @@ -27,6 +27,13 @@ const DEFAULT_HEARTBEAT_PATH = '/workspace/.heartbeat'; let _inbound: Database | null = null; let _outbound: Database | null = null; let _heartbeatPath: string = DEFAULT_HEARTBEAT_PATH; +// True when initTestSessionDb() set _inbound to an in-memory DB. Used by +// openInboundDb() so tests don't try to open the missing /workspace path. +let _inboundIsTest = false; +// Saved real close() for the in-memory inbound singleton. We no-op the +// public .close() during tests so caller try/finally doesn't tear down +// the shared DB; closeSessionDb() invokes this to do the real teardown. +let _inboundOriginalClose: (() => void) | null = null; /** * Avoid all cached db reads; open inbound.db read-only with mmap and page cache disabled. @@ -42,6 +49,12 @@ let _heartbeatPath: string = DEFAULT_HEARTBEAT_PATH; * Cost is microseconds per query, so safe for universal use. */ export function openInboundDb(): Database { + // In test mode the inbound DB is an in-memory singleton — there is no + // file at DEFAULT_INBOUND_PATH. Return the singleton directly; its + // .close() was no-op'd in initTestSessionDb so caller try/finally + // cleanup doesn't tear down the shared DB. + if (_inboundIsTest && _inbound) return _inbound; + const db = new Database(DEFAULT_INBOUND_PATH, { readonly: true }); db.exec('PRAGMA busy_timeout = 5000'); db.exec('PRAGMA mmap_size = 0'); @@ -171,6 +184,12 @@ export function clearStaleProcessingAcks(): void { /** For tests — creates in-memory DBs with the session schemas. */ export function initTestSessionDb(): { inbound: Database; outbound: Database } { _inbound = new Database(':memory:'); + _inboundIsTest = true; + // No-op .close() so callers using openInboundDb()'s try/finally pattern + // don't tear down our shared singleton. closeSessionDb() does the real + // teardown via the saved original. + _inboundOriginalClose = _inbound.close.bind(_inbound); + _inbound.close = () => {}; _inbound.exec('PRAGMA foreign_keys = ON'); _inbound.exec(` CREATE TABLE messages_in ( @@ -244,8 +263,14 @@ export function initTestSessionDb(): { inbound: Database; outbound: Database } { } export function closeSessionDb(): void { - _inbound?.close(); + if (_inboundOriginalClose) { + _inboundOriginalClose(); + _inboundOriginalClose = null; + } else { + _inbound?.close(); + } _inbound = null; + _inboundIsTest = false; _outbound?.close(); _outbound = null; } From e4181f5451f1ac514c94ab6d4e737bfad5cb5076 Mon Sep 17 00:00:00 2001 From: Charlie Savage Date: Sat, 2 May 2026 22:45:23 -0700 Subject: [PATCH 02/11] =?UTF-8?q?fix(host-sweep):=20regression=20in=20#218?= =?UTF-8?q?3=20=E2=80=94=20orphan-claim=20delete=20missed=20in=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #2183 added orphan-claim cleanup that reopens `outbound.db` by session path (`openOutboundDbRw(session.agent_group_id, session.id)`) so the delete runs against a writable handle even when callers pass a readonly one. That works for the production caller — there's a real on-disk session DB at the expected path. The test wrapper `_resetStuckProcessingRowsForTesting` (introduced in the same series, #2151) is called with in-memory DBs that have no on-disk path. The reopen creates a fresh empty file at `/v2-sessions/ag-test/sess-test/outbound.db`, runs the delete against that, and leaves the in-memory `outDb` (which the test reads afterward) untouched. The two `resetStuckProcessingRows — orphan claim cleanup` tests assert `getProcessingClaims(outDb).toEqual([])` after the call and fail on the row that's still there. Fix: drop the `_…ForTesting` wrapper, export `resetStuckProcessingRows` directly with an optional `writableOutDb` parameter. When omitted (production), the function reopens `outbound.db` RW by session path — existing behavior, existing safety guarantee. When provided (tests, or any future caller that already holds a writable handle), the function uses it directly and skips the reopen. The optional parameter has a real meaning, not a "for tests" hack. Public API surface change: `_resetStuckProcessingRowsForTesting` is gone, `resetStuckProcessingRows` is now exported. No other callers inside the repo besides the test. --- src/host-sweep.test.ts | 11 +++-------- src/host-sweep.ts | 40 +++++++++++++++++++++++----------------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/src/host-sweep.test.ts b/src/host-sweep.test.ts index bd2e233..155b1b1 100644 --- a/src/host-sweep.test.ts +++ b/src/host-sweep.test.ts @@ -7,12 +7,7 @@ import Database from 'better-sqlite3'; import { describe, expect, it } from 'vitest'; import { deleteOrphanProcessingClaims, getProcessingClaims } from './db/session-db.js'; -import { - ABSOLUTE_CEILING_MS, - CLAIM_STUCK_MS, - _resetStuckProcessingRowsForTesting, - decideStuckAction, -} from './host-sweep.js'; +import { ABSOLUTE_CEILING_MS, CLAIM_STUCK_MS, resetStuckProcessingRows, decideStuckAction } from './host-sweep.js'; import type { Session } from './types.js'; const BASE = Date.parse('2026-04-20T12:00:00.000Z'); @@ -253,7 +248,7 @@ describe('resetStuckProcessingRows — orphan claim cleanup', () => { // Sanity: the orphan claim is what would trip claim-stuck. expect(getProcessingClaims(outDb)).toHaveLength(1); - _resetStuckProcessingRowsForTesting(inDb, outDb, fakeSession(), 'absolute-ceiling'); + resetStuckProcessingRows(inDb, outDb, fakeSession(), 'absolute-ceiling', outDb); // Regression assertion: orphan claim is gone — next sweep tick will see // an empty claims list and not kill the freshly respawned container. @@ -285,7 +280,7 @@ describe('resetStuckProcessingRows — orphan claim cleanup', () => { .run(claimedAt, future); outDb.prepare("INSERT INTO processing_ack VALUES ('m-2', 'processing', ?)").run(claimedAt); - _resetStuckProcessingRowsForTesting(inDb, outDb, fakeSession(), 'claim-stuck'); + resetStuckProcessingRows(inDb, outDb, fakeSession(), 'claim-stuck', outDb); expect(getProcessingClaims(outDb)).toEqual([]); const row = inDb.prepare('SELECT tries FROM messages_in WHERE id = ?').get('m-2') as { tries: number }; diff --git a/src/host-sweep.ts b/src/host-sweep.ts index 09c82ac..b10ee0d 100644 --- a/src/host-sweep.ts +++ b/src/host-sweep.ts @@ -250,20 +250,28 @@ function enforceRunningContainerSla( resetStuckProcessingRows(inDb, outDb, session, 'claim-stuck'); } -export function _resetStuckProcessingRowsForTesting( - inDb: Database.Database, - outDb: Database.Database, - session: Session, - reason: string, -): void { - resetStuckProcessingRows(inDb, outDb, session, reason); -} - -function resetStuckProcessingRows( +/** + * Reset retries on inbound rows the container claimed but never acked, and + * delete the orphan `processing_ack` rows so the next sweep tick doesn't + * see them. + * + * Safe to call only when the container that owned `outbound.db` is dead — + * production callers invoke this either in the `!alive` branch or right + * after `killContainer`. Without that guarantee, the orphan-claim delete + * would race the container's own writer. + * + * `writableOutDb` is the same handle outbound writes go through. When + * omitted (typical production path) the function reopens `outbound.db` + * read-write by session path for the delete and closes that handle on + * exit. Callers that already hold a writable handle — including tests + * using in-memory DBs — can pass it in to skip the reopen. + */ +export function resetStuckProcessingRows( inDb: Database.Database, outDb: Database.Database, session: Session, reason: string, + writableOutDb?: Database.Database, ): void { const claims = getProcessingClaims(outDb); const now = Date.now(); @@ -300,19 +308,17 @@ function resetStuckProcessingRows( // would re-read them, see the old status_changed timestamp, conclude the // freshly respawned container is stuck, and SIGKILL it before its // agent-runner has a chance to run clearStaleProcessingAcks() on startup. - // We're safe to write outbound.db here because we just killed the container - // that owned it (or it crashed and left no writer behind). - // outDb was opened readonly for reads above; reopen with write access for this delete. - let outDbRw: Database.Database | null = null; + const ownsDb = !writableOutDb; + let useDb: Database.Database | null = writableOutDb ?? null; try { - outDbRw = openOutboundDbRw(session.agent_group_id, session.id); - const cleared = deleteOrphanProcessingClaims(outDbRw); + if (!useDb) useDb = openOutboundDbRw(session.agent_group_id, session.id); + const cleared = deleteOrphanProcessingClaims(useDb); if (cleared > 0) { log.info('Cleared orphan processing claims', { sessionId: session.id, cleared, reason }); } } catch (err) { log.warn('Failed to clear orphan processing claims', { sessionId: session.id, err }); } finally { - outDbRw?.close(); + if (ownsDb) useDb?.close(); } } From 30a898508af2ccef8aa8eee2b3ea07a4d01a9e77 Mon Sep 17 00:00:00 2001 From: "exe.dev user" Date: Mon, 4 May 2026 21:58:57 +0000 Subject: [PATCH 03/11] fix(migrate): drop WhatsApp LID dual-row migration step MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove step 2d (whatsapp-resolve-lids.ts) which pre-created duplicate messaging_groups rows keyed by @lid alongside the phone-keyed rows. This caused split sessions — the same contact got separate sessions depending on which JID format arrived. With the Baileys v7 upgrade (PR #2259 on channels), the adapter resolves every LID to a phone JID via extractAddressingContext before the message reaches the router, making dual rows unnecessary. Co-Authored-By: Claude Opus 4.6 (1M context) --- migrate-v2.sh | 20 +-- setup/migrate-v2/whatsapp-resolve-lids.ts | 192 ---------------------- 2 files changed, 6 insertions(+), 206 deletions(-) delete mode 100644 setup/migrate-v2/whatsapp-resolve-lids.ts diff --git a/migrate-v2.sh b/migrate-v2.sh index f06a548..2325edd 100644 --- a/migrate-v2.sh +++ b/migrate-v2.sh @@ -408,20 +408,12 @@ else fi done - # 2d. WhatsApp LID resolution. After whatsapp is installed (so Baileys - # is on disk) and auth files have been copied (so we can connect with - # the migrated identity), boot Baileys briefly to learn LID↔phone - # mappings during initial sync, then write paired LID-keyed - # messaging_groups. Best-effort: any failure degrades to runtime - # approval flow, which the WA adapter's isMention=true on DMs handles. - for ch in "${SELECTED_CHANNELS[@]}"; do - if [ "$ch" = "whatsapp" ]; then - run_step "2d-whatsapp-lids" \ - "Resolve WhatsApp LIDs for migrated DMs" \ - "setup/migrate-v2/whatsapp-resolve-lids.ts" - break - fi - done + # 2d. (Removed) WhatsApp LID resolution was previously needed because the + # v6 adapter couldn't reliably translate LID→phone JIDs, so the migration + # pre-created dual messaging_groups rows. With Baileys v7, the adapter + # resolves LIDs via extractAddressingContext + signalRepository.lidMapping + # on every inbound message, so dual rows are unnecessary and were causing + # split sessions. fi echo diff --git a/setup/migrate-v2/whatsapp-resolve-lids.ts b/setup/migrate-v2/whatsapp-resolve-lids.ts deleted file mode 100644 index 7a5eb8b..0000000 --- a/setup/migrate-v2/whatsapp-resolve-lids.ts +++ /dev/null @@ -1,192 +0,0 @@ -/** - * migrate-v2 step: resolve WhatsApp LIDs for migrated DM messaging_groups. - * - * Why this exists - * ─────────────── - * v1 stored every WhatsApp DM as `@s.whatsapp.net`. v2's WA adapter - * sometimes resolves the chat to `@lid` instead — when WhatsApp - * delivers a message via the LID protocol and Baileys hasn't yet learned - * a LID→phone mapping for that contact (cold cache after migration). The - * router then can't find the phone-keyed messaging_group and silently - * drops the message at router.ts:184 — until the LID is learned (which - * happens lazily, message-by-message, via `chats.phoneNumberShare`). - * - * Baileys persists LID↔phone mappings to disk as - * `store/auth/lid-mapping-_reverse.json` (LID → phone) and - * `lid-mapping-.json` (phone → LID). v1 will already have populated - * these for every contact it talked to. This step parses the reverse - * files and writes paired LID-keyed `messaging_groups` + - * `messaging_group_agents` rows so both `@s.whatsapp.net` and - * `@lid` route to the same agent_group with the same engage rules. - * - * No Baileys boot, no network — pure filesystem read. If store/auth is - * missing or has no reverse mappings, exits 0 with a SKIPPED. Runtime - * fallback (WA adapter sets isMention=true on DMs → router auto-creates - * with `unknown_sender_policy=request_approval`) handles anything we - * miss. - * - * Usage: pnpm exec tsx setup/migrate-v2/whatsapp-resolve-lids.ts - */ -import fs from 'fs'; -import path from 'path'; - -import { DATA_DIR } from '../../src/config.js'; -import { initDb } from '../../src/db/connection.js'; -import { - createMessagingGroup, - createMessagingGroupAgent, - getMessagingGroupAgentByPair, - getMessagingGroupByPlatform, -} from '../../src/db/messaging-groups.js'; -import { runMigrations } from '../../src/db/migrations/index.js'; -import { generateId } from './shared.js'; - -interface RawMessagingGroup { - id: string; - channel_type: string; - platform_id: string; -} - -interface RawWiring { - id: string; - messaging_group_id: string; - agent_group_id: string; - engage_mode: string; - engage_pattern: string | null; - sender_scope: string; - ignored_message_policy: string; - session_mode: string; - priority: number; -} - -const REVERSE_FILE_RE = /^lid-mapping-(\d+)_reverse\.json$/; - -/** - * Read store/auth/lid-mapping-*_reverse.json into a Map. - * Returns an empty Map if the directory doesn't exist. - */ -function readReverseMappings(authDir: string): Map { - const out = new Map(); - if (!fs.existsSync(authDir)) return out; - for (const entry of fs.readdirSync(authDir)) { - const m = REVERSE_FILE_RE.exec(entry); - if (!m) continue; - const lidUser = m[1]; - try { - const raw = fs.readFileSync(path.join(authDir, entry), 'utf-8').trim(); - // The file content is a JSON-encoded string: `""` - const phoneUser = JSON.parse(raw); - if (typeof phoneUser !== 'string' || phoneUser.length === 0) continue; - out.set(lidUser, phoneUser); - } catch { - // Skip malformed entries — best-effort. - } - } - return out; -} - -function phoneUserOf(jid: string): string { - return jid.split('@')[0].split(':')[0]; -} - -function main(): void { - const authDir = path.join(process.cwd(), 'store', 'auth'); - const reverse = readReverseMappings(authDir); - - if (reverse.size === 0) { - console.log('SKIPPED:no lid-mapping-*_reverse.json files in store/auth'); - process.exit(0); - } - - // phoneUser → lidJid (the form we'll write to messaging_groups) - const phoneUserToLidJid = new Map(); - for (const [lidUser, phoneUser] of reverse) { - phoneUserToLidJid.set(phoneUser, `${lidUser}@lid`); - } - - const v2DbPath = path.join(DATA_DIR, 'v2.db'); - if (!fs.existsSync(v2DbPath)) { - console.error('FAIL:v2.db not found — run db step first'); - process.exit(1); - } - - const v2Db = initDb(v2DbPath); - runMigrations(v2Db); - - const phoneRows = v2Db - .prepare( - `SELECT id, channel_type, platform_id FROM messaging_groups - WHERE channel_type='whatsapp' AND platform_id LIKE '%@s.whatsapp.net'`, - ) - .all() as RawMessagingGroup[]; - - if (phoneRows.length === 0) { - console.log('SKIPPED:no whatsapp DM messaging_groups to resolve'); - v2Db.close(); - process.exit(0); - } - - // Pull existing wirings so each new alias gets the same agent_group + - // engage rules as the phone-keyed row. - const placeholders = phoneRows.map(() => '?').join(','); - const wiringRows = v2Db - .prepare(`SELECT * FROM messaging_group_agents WHERE messaging_group_id IN (${placeholders})`) - .all(...phoneRows.map((r) => r.id)) as RawWiring[]; - - const wiringsByMg = new Map(); - for (const w of wiringRows) { - const arr = wiringsByMg.get(w.messaging_group_id) ?? []; - arr.push(w); - wiringsByMg.set(w.messaging_group_id, arr); - } - - let resolved = 0; - let aliased = 0; - const createdAt = new Date().toISOString(); - - for (const row of phoneRows) { - const phoneUser = phoneUserOf(row.platform_id); - const lidJid = phoneUserToLidJid.get(phoneUser); - if (!lidJid) continue; - resolved++; - - let lidMg = getMessagingGroupByPlatform('whatsapp', lidJid); - if (!lidMg) { - createMessagingGroup({ - id: generateId('mg'), - channel_type: 'whatsapp', - platform_id: lidJid, - name: null, - is_group: 0, - unknown_sender_policy: 'public', - created_at: createdAt, - }); - lidMg = getMessagingGroupByPlatform('whatsapp', lidJid)!; - } - - const wirings = wiringsByMg.get(row.id) ?? []; - for (const w of wirings) { - if (getMessagingGroupAgentByPair(lidMg.id, w.agent_group_id)) continue; - createMessagingGroupAgent({ - id: generateId('mga'), - messaging_group_id: lidMg.id, - agent_group_id: w.agent_group_id, - engage_mode: w.engage_mode as 'pattern' | 'mention' | 'mention-sticky', - engage_pattern: w.engage_pattern, - sender_scope: w.sender_scope as 'all' | 'admins', - ignored_message_policy: w.ignored_message_policy as 'drop' | 'queue', - session_mode: w.session_mode as 'shared' | 'thread', - priority: w.priority, - created_at: createdAt, - }); - aliased++; - } - } - - v2Db.close(); - console.log( - `OK:reverse_mappings=${reverse.size},phone_dms=${phoneRows.length},lids_resolved=${resolved},aliased=${aliased}`, - ); -} - -main(); From e753d09e64fe668bc6caf118794237192b75daae Mon Sep 17 00:00:00 2001 From: koshkoshinsk Date: Tue, 5 May 2026 07:01:04 +0000 Subject: [PATCH 04/11] setup: drop disk-space pre-flight check, keep RAM only The disk threshold was unreliable on hosts with separate /home or /var mounts where df underreports free space. Simplify the pre-flight to a RAM-only check. Co-Authored-By: Claude Opus 4.7 (1M context) --- nanoclaw.sh | 39 +++++++++++++-------------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/nanoclaw.sh b/nanoclaw.sh index c17966e..bcf4e49 100755 --- a/nanoclaw.sh +++ b/nanoclaw.sh @@ -138,16 +138,13 @@ write_header cat "$PROJECT_ROOT/assets/setup-splash.txt" # ─── pre-flight: minimum hardware specs ──────────────────────────────── -# NanoClaw runs an agent container per session. Below these thresholds the -# host + container + agent will struggle (OOM under load, image + session -# DBs filling the disk). Soft warn — `df` only sees the partition that -# $PROJECT_ROOT lives on, which can underreport on hosts with separate -# /home or /var mounts, so the user can override. +# NanoClaw runs an agent container per session. Below this threshold the +# host + container + agent will struggle (OOM under load). Soft warn — the +# user can override. # RAM floor is set below 4 GB because "4 GB" VMs typically report 3700–3900 MB # after kernel reserves (e.g. Hetzner CX21 ≈ 3814, AWS t3.medium ≈ 3800). MIN_MEM_MB=3700 -MIN_DISK_GB=20 detect_mem_mb() { case "$(uname -s)" in @@ -162,39 +159,29 @@ detect_mem_mb() { esac } -detect_disk_gb() { - # -P: POSIX format (no line-wrapping); -k: 1024-byte blocks. Avail is col 4. - df -Pk "$PROJECT_ROOT" 2>/dev/null \ - | awk 'NR==2 { printf "%d", $4 / 1024 / 1024 }' -} - MEM_MB=$(detect_mem_mb) -DISK_GB=$(detect_disk_gb) : "${MEM_MB:=0}" -: "${DISK_GB:=0}" -LOW_MEM=false; LOW_DISK=false -[ "$MEM_MB" -gt 0 ] && [ "$MEM_MB" -lt "$MIN_MEM_MB" ] && LOW_MEM=true -[ "$DISK_GB" -gt 0 ] && [ "$DISK_GB" -lt "$MIN_DISK_GB" ] && LOW_DISK=true +LOW_MEM=false +[ "$MEM_MB" -gt 0 ] && [ "$MEM_MB" -lt "$MIN_MEM_MB" ] && LOW_MEM=true -if [ "$LOW_MEM" = true ] || [ "$LOW_DISK" = true ]; then +if [ "$LOW_MEM" = true ]; then printf ' %s\n' "$(red 'Warning: this machine likely cannot run NanoClaw.')" - printf ' %s\n' "$(dim 'NanoClaw recommends a 4 GB+ machine with 20 GB+ free disk. Below this,')" - printf ' %s\n' "$(dim 'the host + agent container will run out of memory or disk under most')" - printf ' %s\n' "$(dim 'workloads. A stronger machine is strongly recommended.')" - [ "$LOW_MEM" = true ] && printf ' %s\n' "$(dim " · Detected RAM: ${MEM_MB} MB")" - [ "$LOW_DISK" = true ] && printf ' %s\n' "$(dim " · Free disk on $PROJECT_ROOT: ${DISK_GB} GB")" + printf ' %s\n' "$(dim 'NanoClaw recommends a 4 GB+ RAM machine. Below this, the host + agent')" + printf ' %s\n' "$(dim 'container will run out of memory under most workloads. A stronger')" + printf ' %s\n' "$(dim 'machine is strongly recommended.')" + printf ' %s\n' "$(dim " · Detected RAM: ${MEM_MB} MB")" printf '\n' read -r -p " $(bold 'Try anyway?') [y/N] " SPECS_ANS Date: Tue, 5 May 2026 07:11:26 +0000 Subject: [PATCH 05/11] improve node install to use uvx --- setup/install-node.sh | 52 ++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/setup/install-node.sh b/setup/install-node.sh index e100ccd..4ecb1c5 100755 --- a/setup/install-node.sh +++ b/setup/install-node.sh @@ -17,30 +17,40 @@ if command -v node >/dev/null 2>&1; then exit 0 fi -case "$(uname -s)" in - Darwin) - echo "STEP: brew-install-node" - if ! command -v brew >/dev/null 2>&1; then +if command -v uvx >/dev/null 2>&1; then + echo "STEP: uvx-nodeenv" + uvx nodeenv -n lts ~/node + mkdir -p ~/.local/bin + ln -sf ~/node/bin/node ~/.local/bin/node + ln -sf ~/node/bin/npm ~/.local/bin/npm + ln -sf ~/node/bin/npx ~/.local/bin/npx + ln -sf ~/node/bin/pnpm ~/.local/bin/pnpm +else + case "$(uname -s)" in + Darwin) + echo "STEP: brew-install-node" + if ! command -v brew >/dev/null 2>&1; then + echo "STATUS: failed" + echo "ERROR: Homebrew not installed. Install brew first (https://brew.sh) then re-run." + echo "=== END ===" + exit 1 + fi + brew install node@22 + ;; + Linux) + echo "STEP: nodesource-setup" + curl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash - + echo "STEP: apt-install-nodejs" + sudo apt-get install -y nodejs + ;; + *) echo "STATUS: failed" - echo "ERROR: Homebrew not installed. Install brew first (https://brew.sh) then re-run." + echo "ERROR: Unsupported platform: $(uname -s)" echo "=== END ===" exit 1 - fi - brew install node@22 - ;; - Linux) - echo "STEP: nodesource-setup" - curl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash - - echo "STEP: apt-install-nodejs" - sudo apt-get install -y nodejs - ;; - *) - echo "STATUS: failed" - echo "ERROR: Unsupported platform: $(uname -s)" - echo "=== END ===" - exit 1 - ;; -esac + ;; + esac +fi if ! command -v node >/dev/null 2>&1; then echo "STATUS: failed" From 3c5ae96cdd63ef673be8d8d908f63f248bb11ea4 Mon Sep 17 00:00:00 2001 From: gavrielc Date: Tue, 5 May 2026 07:23:37 +0000 Subject: [PATCH 06/11] use node 22 with nvx --- setup/install-node.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup/install-node.sh b/setup/install-node.sh index 4ecb1c5..229f7db 100755 --- a/setup/install-node.sh +++ b/setup/install-node.sh @@ -19,7 +19,7 @@ fi if command -v uvx >/dev/null 2>&1; then echo "STEP: uvx-nodeenv" - uvx nodeenv -n lts ~/node + uvx nodeenv -n 22 ~/node mkdir -p ~/.local/bin ln -sf ~/node/bin/node ~/.local/bin/node ln -sf ~/node/bin/npm ~/.local/bin/npm From 948a0dcadad423fac9b1d7eae7b79a7dfce91e77 Mon Sep 17 00:00:00 2001 From: gavrielc Date: Tue, 5 May 2026 07:28:48 +0000 Subject: [PATCH 07/11] fix: use nodeenv lts instead of pinned node 22 nodeenv doesn't support major-only version specifiers. Use lts which resolves to the latest LTS release. Co-Authored-By: Claude Opus 4.6 (1M context) --- setup/install-node.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup/install-node.sh b/setup/install-node.sh index 229f7db..4ecb1c5 100755 --- a/setup/install-node.sh +++ b/setup/install-node.sh @@ -19,7 +19,7 @@ fi if command -v uvx >/dev/null 2>&1; then echo "STEP: uvx-nodeenv" - uvx nodeenv -n 22 ~/node + uvx nodeenv -n lts ~/node mkdir -p ~/.local/bin ln -sf ~/node/bin/node ~/.local/bin/node ln -sf ~/node/bin/npm ~/.local/bin/npm From a870e7ebf24f2aface4a4359d75955f9ab79917b Mon Sep 17 00:00:00 2001 From: gavrielc Date: Tue, 5 May 2026 15:56:08 +0300 Subject: [PATCH 08/11] fix: keep resetStuckProcessingRows private, restore test wrapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test wrapper forwards the in-memory outDb as the writable handle, avoiding the filesystem reopen that fails in CI. The function stays private — the optional writableOutDb param is an internal detail, not a public API. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/host-sweep.test.ts | 11 ++++++++--- src/host-sweep.ts | 27 ++++++++++----------------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/src/host-sweep.test.ts b/src/host-sweep.test.ts index 155b1b1..bd2e233 100644 --- a/src/host-sweep.test.ts +++ b/src/host-sweep.test.ts @@ -7,7 +7,12 @@ import Database from 'better-sqlite3'; import { describe, expect, it } from 'vitest'; import { deleteOrphanProcessingClaims, getProcessingClaims } from './db/session-db.js'; -import { ABSOLUTE_CEILING_MS, CLAIM_STUCK_MS, resetStuckProcessingRows, decideStuckAction } from './host-sweep.js'; +import { + ABSOLUTE_CEILING_MS, + CLAIM_STUCK_MS, + _resetStuckProcessingRowsForTesting, + decideStuckAction, +} from './host-sweep.js'; import type { Session } from './types.js'; const BASE = Date.parse('2026-04-20T12:00:00.000Z'); @@ -248,7 +253,7 @@ describe('resetStuckProcessingRows — orphan claim cleanup', () => { // Sanity: the orphan claim is what would trip claim-stuck. expect(getProcessingClaims(outDb)).toHaveLength(1); - resetStuckProcessingRows(inDb, outDb, fakeSession(), 'absolute-ceiling', outDb); + _resetStuckProcessingRowsForTesting(inDb, outDb, fakeSession(), 'absolute-ceiling'); // Regression assertion: orphan claim is gone — next sweep tick will see // an empty claims list and not kill the freshly respawned container. @@ -280,7 +285,7 @@ describe('resetStuckProcessingRows — orphan claim cleanup', () => { .run(claimedAt, future); outDb.prepare("INSERT INTO processing_ack VALUES ('m-2', 'processing', ?)").run(claimedAt); - resetStuckProcessingRows(inDb, outDb, fakeSession(), 'claim-stuck', outDb); + _resetStuckProcessingRowsForTesting(inDb, outDb, fakeSession(), 'claim-stuck'); expect(getProcessingClaims(outDb)).toEqual([]); const row = inDb.prepare('SELECT tries FROM messages_in WHERE id = ?').get('m-2') as { tries: number }; diff --git a/src/host-sweep.ts b/src/host-sweep.ts index b10ee0d..93a7e87 100644 --- a/src/host-sweep.ts +++ b/src/host-sweep.ts @@ -250,23 +250,16 @@ function enforceRunningContainerSla( resetStuckProcessingRows(inDb, outDb, session, 'claim-stuck'); } -/** - * Reset retries on inbound rows the container claimed but never acked, and - * delete the orphan `processing_ack` rows so the next sweep tick doesn't - * see them. - * - * Safe to call only when the container that owned `outbound.db` is dead — - * production callers invoke this either in the `!alive` branch or right - * after `killContainer`. Without that guarantee, the orphan-claim delete - * would race the container's own writer. - * - * `writableOutDb` is the same handle outbound writes go through. When - * omitted (typical production path) the function reopens `outbound.db` - * read-write by session path for the delete and closes that handle on - * exit. Callers that already hold a writable handle — including tests - * using in-memory DBs — can pass it in to skip the reopen. - */ -export function resetStuckProcessingRows( +export function _resetStuckProcessingRowsForTesting( + inDb: Database.Database, + outDb: Database.Database, + session: Session, + reason: string, +): void { + resetStuckProcessingRows(inDb, outDb, session, reason, outDb); +} + +function resetStuckProcessingRows( inDb: Database.Database, outDb: Database.Database, session: Session, From 9ac1e6fd7bdd86366436f98aec237269a05b6252 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 5 May 2026 12:57:49 +0000 Subject: [PATCH 09/11] chore: bump version to 2.0.31 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index f92ed88..35856b7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "nanoclaw", - "version": "2.0.30", + "version": "2.0.31", "description": "Personal Claude assistant. Lightweight, secure, customizable.", "type": "module", "packageManager": "pnpm@10.33.0", From 6d6584d1207e7ae55ab20c068c11be65a7a58426 Mon Sep 17 00:00:00 2001 From: gavrielc Date: Tue, 5 May 2026 16:02:10 +0300 Subject: [PATCH 10/11] fix(test-infra): openInboundDb honors in-memory test DB openInboundDb() always opened /workspace/inbound.db which doesn't exist in CI. In test mode, return a thin wrapper over the in-memory singleton that delegates prepare/exec but no-ops close(), so callers' try/finally cleanup doesn't destroy the shared DB mid-test. One flag (_testMode), no monkey-patching, no saved-close bookkeeping. Co-Authored-By: Claude Opus 4.6 (1M context) --- container/agent-runner/src/db/connection.ts | 45 +++++++-------------- 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/container/agent-runner/src/db/connection.ts b/container/agent-runner/src/db/connection.ts index ac563fa..871e43a 100644 --- a/container/agent-runner/src/db/connection.ts +++ b/container/agent-runner/src/db/connection.ts @@ -27,34 +27,29 @@ const DEFAULT_HEARTBEAT_PATH = '/workspace/.heartbeat'; let _inbound: Database | null = null; let _outbound: Database | null = null; let _heartbeatPath: string = DEFAULT_HEARTBEAT_PATH; -// True when initTestSessionDb() set _inbound to an in-memory DB. Used by -// openInboundDb() so tests don't try to open the missing /workspace path. -let _inboundIsTest = false; -// Saved real close() for the in-memory inbound singleton. We no-op the -// public .close() during tests so caller try/finally doesn't tear down -// the shared DB; closeSessionDb() invokes this to do the real teardown. -let _inboundOriginalClose: (() => void) | null = null; +let _testMode = false; /** - * Avoid all cached db reads; open inbound.db read-only with mmap and page cache disabled. - * + * Avoid all cached db reads; open inbound.db read-only with mmap and page cache disabled. + * * Use this (not getInboundDb) for readers that need to see host-written rows * promptly — e.g. messages_in polling. Caller must .close() the returned * connection (try/finally). * * Needed for mounts where host writes don't reliably invalidate * SQLite's caches: virtiofs (Colima, Lima, Podman Machine, Apple - * Container), NFS. - * + * Container), NFS. + * * Cost is microseconds per query, so safe for universal use. */ export function openInboundDb(): Database { - // In test mode the inbound DB is an in-memory singleton — there is no - // file at DEFAULT_INBOUND_PATH. Return the singleton directly; its - // .close() was no-op'd in initTestSessionDb so caller try/finally - // cleanup doesn't tear down the shared DB. - if (_inboundIsTest && _inbound) return _inbound; - + // In test mode return a thin wrapper over the in-memory singleton. + // Callers do try/finally { db.close() } — the wrapper no-ops close() + // so the singleton survives for the rest of the test. + if (_testMode && _inbound) { + const db = _inbound; + return { prepare: (sql: string) => db.prepare(sql), exec: (sql: string) => db.exec(sql), close: () => {} } as unknown as Database; + } const db = new Database(DEFAULT_INBOUND_PATH, { readonly: true }); db.exec('PRAGMA busy_timeout = 5000'); db.exec('PRAGMA mmap_size = 0'); @@ -183,13 +178,8 @@ export function clearStaleProcessingAcks(): void { /** For tests — creates in-memory DBs with the session schemas. */ export function initTestSessionDb(): { inbound: Database; outbound: Database } { + _testMode = true; _inbound = new Database(':memory:'); - _inboundIsTest = true; - // No-op .close() so callers using openInboundDb()'s try/finally pattern - // don't tear down our shared singleton. closeSessionDb() does the real - // teardown via the saved original. - _inboundOriginalClose = _inbound.close.bind(_inbound); - _inbound.close = () => {}; _inbound.exec('PRAGMA foreign_keys = ON'); _inbound.exec(` CREATE TABLE messages_in ( @@ -263,14 +253,9 @@ export function initTestSessionDb(): { inbound: Database; outbound: Database } { } export function closeSessionDb(): void { - if (_inboundOriginalClose) { - _inboundOriginalClose(); - _inboundOriginalClose = null; - } else { - _inbound?.close(); - } + _inbound?.close(); _inbound = null; - _inboundIsTest = false; + _testMode = false; _outbound?.close(); _outbound = null; } From 9df6a91b32e277f6b8f41e4bd14bad56155c855b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 5 May 2026 13:04:29 +0000 Subject: [PATCH 11/11] =?UTF-8?q?docs:=20update=20token=20count=20to=20141?= =?UTF-8?q?k=20tokens=20=C2=B7=2070%=20of=20context=20window?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- repo-tokens/badge.svg | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/repo-tokens/badge.svg b/repo-tokens/badge.svg index d0bd6da..263081f 100644 --- a/repo-tokens/badge.svg +++ b/repo-tokens/badge.svg @@ -1,5 +1,5 @@ - - 140k tokens, 70% of context window + + 141k tokens, 70% of context window @@ -15,8 +15,8 @@ tokens - - 140k + + 141k