From 67020f9fbff1e6b979fa38bb5653f34953f316d4 Mon Sep 17 00:00:00 2001 From: Gavriel Cohen Date: Sun, 5 Apr 2026 00:03:00 +0300 Subject: [PATCH] feat: auto-prune stale session artifacts on startup + daily MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Session files (JSONLs, debug logs, todos, telemetry, group logs) accumulate unboundedly — especially from daily cron tasks. This adds a cleanup script that prunes old artifacts while protecting active sessions (read from DB), and wires it into the main process on a 24h interval. Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/cleanup-sessions.sh | 150 ++++++++++++++++++++++++++++++++++++ src/index.ts | 2 + src/session-cleanup.ts | 25 ++++++ 3 files changed, 177 insertions(+) create mode 100755 scripts/cleanup-sessions.sh create mode 100644 src/session-cleanup.ts diff --git a/scripts/cleanup-sessions.sh b/scripts/cleanup-sessions.sh new file mode 100755 index 0000000..cf03fe0 --- /dev/null +++ b/scripts/cleanup-sessions.sh @@ -0,0 +1,150 @@ +#!/bin/bash +# +# Prune stale session artifacts (JSONLs, debug logs, todos, telemetry, group logs). +# Safe to run while NanoClaw is live — active sessions are read from the DB. +# +# Usage: ./scripts/cleanup-sessions.sh [--dry-run] +# +# Retention: +# Session JSONLs + tool-results: 7 days (active session always kept) +# Debug logs: 3 days +# Todo files: 3 days +# Telemetry: 7 days +# Group logs: 7 days + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" + +STORE_DB="$PROJECT_ROOT/store/messages.db" +SESSIONS_DIR="$PROJECT_ROOT/data/sessions" +GROUPS_DIR="$PROJECT_ROOT/groups" + +DRY_RUN=false +[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true + +TOTAL_FREED=0 + +log() { echo "[cleanup] $*"; } + +remove() { + local target="$1" + if $DRY_RUN; then + if [ -d "$target" ]; then + size=$(du -sk "$target" 2>/dev/null | cut -f1) + else + size=$(stat -f%z "$target" 2>/dev/null || echo 0) + size=$((size / 1024)) + fi + TOTAL_FREED=$((TOTAL_FREED + size)) + log "would remove: $target (${size}K)" + else + if [ -d "$target" ]; then + size=$(du -sk "$target" 2>/dev/null | cut -f1) + rm -rf "$target" + else + size=$(stat -f%z "$target" 2>/dev/null || echo 0) + size=$((size / 1024)) + rm -f "$target" + fi + TOTAL_FREED=$((TOTAL_FREED + size)) + fi +} + +# --- Collect active session IDs from the database --- + +if [ ! -f "$STORE_DB" ]; then + log "ERROR: database not found at $STORE_DB" + exit 1 +fi + +ACTIVE_IDS=$(sqlite3 "$STORE_DB" "SELECT session_id FROM sessions;" 2>/dev/null || true) + +is_active() { + echo "$ACTIVE_IDS" | grep -qF "$1" +} + +# --- Prune session JSONLs and tool-results dirs --- + +for group_dir in "$SESSIONS_DIR"/*/; do + [ -d "$group_dir" ] || continue + jsonl_dir="$group_dir/.claude/projects/-workspace-group" + [ -d "$jsonl_dir" ] || continue + + for jsonl in "$jsonl_dir"/*.jsonl; do + [ -f "$jsonl" ] || continue + id=$(basename "$jsonl" .jsonl) + + # Never delete the active session + if is_active "$id"; then + continue + fi + + # Only delete if older than 7 days + if [ -n "$(find "$jsonl" -mtime +7 2>/dev/null)" ]; then + remove "$jsonl" + # Remove matching tool-results directory + [ -d "$jsonl_dir/$id" ] && remove "$jsonl_dir/$id" + fi + done +done + +# --- Prune debug logs (>3 days, skip files named after active sessions) --- + +for group_dir in "$SESSIONS_DIR"/*/; do + debug_dir="$group_dir/.claude/debug" + [ -d "$debug_dir" ] || continue + find "$debug_dir" -type f -mtime +3 ! -name "latest" -print0 2>/dev/null | while IFS= read -r -d '' f; do + fname=$(basename "$f" .txt) + is_active "$fname" && continue + remove "$f" + done +done + +# --- Prune todo files (>3 days, skip files named after active sessions) --- + +for group_dir in "$SESSIONS_DIR"/*/; do + todos_dir="$group_dir/.claude/todos" + [ -d "$todos_dir" ] || continue + find "$todos_dir" -type f -mtime +3 -print0 2>/dev/null | while IFS= read -r -d '' f; do + fname=$(basename "$f" .json) + # Todo filenames are like {session_id}-agent-{session_id}.json + for aid in $ACTIVE_IDS; do + if [[ "$fname" == *"$aid"* ]]; then + continue 2 + fi + done + remove "$f" + done +done + +# --- Prune telemetry (>7 days, skip files named after active sessions) --- + +for group_dir in "$SESSIONS_DIR"/*/; do + telem_dir="$group_dir/.claude/telemetry" + [ -d "$telem_dir" ] || continue + find "$telem_dir" -type f -mtime +7 -print0 2>/dev/null | while IFS= read -r -d '' f; do + fname=$(basename "$f") + for aid in $ACTIVE_IDS; do + if [[ "$fname" == *"$aid"* ]]; then + continue 2 + fi + done + remove "$f" + done +done + +# --- Prune group logs (>7 days) --- + +find "$GROUPS_DIR"/*/logs -type f -mtime +7 -print0 2>/dev/null | while IFS= read -r -d '' f; do + remove "$f" +done + +# --- Summary --- + +if $DRY_RUN; then + log "DRY RUN complete — would free ~${TOTAL_FREED}K" +else + log "Done — freed ~${TOTAL_FREED}K" +fi diff --git a/src/index.ts b/src/index.ts index a6b74cf..004764d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -61,6 +61,7 @@ import { loadSenderAllowlist, shouldDropMessage, } from './sender-allowlist.js'; +import { startSessionCleanup } from './session-cleanup.js'; import { startSchedulerLoop } from './task-scheduler.js'; import { Channel, NewMessage, RegisteredGroup } from './types.js'; import { logger } from './logger.js'; @@ -746,6 +747,7 @@ async function main(): Promise { } }, }); + startSessionCleanup(); queue.setProcessMessagesFn(processGroupMessages); recoverPendingMessages(); startMessageLoop().catch((err) => { diff --git a/src/session-cleanup.ts b/src/session-cleanup.ts new file mode 100644 index 0000000..feb507c --- /dev/null +++ b/src/session-cleanup.ts @@ -0,0 +1,25 @@ +import { execFile } from 'child_process'; +import path from 'path'; + +import { logger } from './logger.js'; + +const CLEANUP_INTERVAL = 24 * 60 * 60 * 1000; // 24 hours +const SCRIPT_PATH = path.resolve(process.cwd(), 'scripts/cleanup-sessions.sh'); + +function runCleanup(): void { + execFile('/bin/bash', [SCRIPT_PATH], { timeout: 60_000 }, (err, stdout) => { + if (err) { + logger.error({ err }, 'Session cleanup failed'); + return; + } + const summary = stdout.trim().split('\n').pop(); + if (summary) logger.info(summary); + }); +} + +export function startSessionCleanup(): void { + // Run once at startup (delayed 30s to not compete with init) + setTimeout(runCleanup, 30_000); + // Then every 24 hours + setInterval(runCleanup, CLEANUP_INTERVAL); +}