Merge branch 'main' into main

This commit is contained in:
gavrielc
2026-04-23 23:04:35 +03:00
committed by GitHub
15 changed files with 531 additions and 103 deletions

View File

@@ -81,6 +81,26 @@ export interface ChatSdkBridgeConfig {
* chunk boundary will render as two independent blocks on the receiving
* platform, which is the same behavior as manually re-opening a fence.
*/
/**
* Decode the actual option value from a button callback. Buttons are encoded
* with an integer index (to keep under Telegram's 64-byte callback_data cap),
* and the real value is looked up via `getAskQuestionRender(questionId)`.
* Falls back to treating the tail as a literal value so old in-flight cards
* (encoded before this shortening landed) still resolve.
*/
function resolveSelectedOption(
render: { options: NormalizedOption[] } | undefined,
eventValue: string | undefined,
tail: string | undefined,
): string {
const candidate = eventValue ?? tail ?? '';
if (render && /^\d+$/.test(candidate)) {
const idx = Number(candidate);
if (render.options[idx]) return render.options[idx].value;
}
return candidate;
}
export function splitForLimit(text: string, limit: number): string[] {
if (text.length <= limit) return [text];
const chunks: string[] = [];
@@ -241,11 +261,15 @@ export function createChatSdkBridge(config: ChatSdkBridgeConfig): ChannelAdapter
const parts = event.actionId.split(':');
if (parts.length < 3) return;
const questionId = parts[1];
const selectedOption = event.value || '';
const tail = parts.slice(2).join(':');
const userId = event.user?.userId || '';
// Resolve render metadata BEFORE dispatching onAction (which deletes the row).
const render = getAskQuestionRender(questionId);
// New format: button id/value is an integer index into options (kept
// short to fit Telegram's 64-byte callback_data cap). Old format:
// the full value is embedded in actionId/value directly.
const selectedOption = resolveSelectedOption(render, event.value, tail);
const title = render?.title ?? '❓ Question';
const matched = render?.options.find((o) => o.value === selectedOption);
const selectedLabel = matched?.selectedLabel ?? selectedOption ?? '(clicked)';
@@ -349,8 +373,13 @@ export function createChatSdkBridge(config: ChatSdkBridgeConfig): ChannelAdapter
children: [
CardText(question),
Actions(
options.map((opt) =>
Button({ id: `ncq:${questionId}:${opt.value}`, label: opt.label, value: opt.value }),
// Encode button id/value with the option index rather than the
// full value. Telegram caps callback_data at 64 bytes, and
// long values (e.g. ISO datetimes, URLs) push the JSON payload
// well past that. The onAction handlers resolve the index back
// to the real value via getAskQuestionRender(questionId).
options.map((opt, idx) =>
Button({ id: `ncq:${questionId}:${idx}`, label: opt.label, value: String(idx) }),
),
),
],
@@ -511,12 +540,12 @@ async function handleForwardedEvent(
// Parse the selected option from custom_id
let questionId: string | undefined;
let selectedOption: string | undefined;
let tail: string | undefined;
if (customId?.startsWith('ncq:')) {
const colonIdx = customId.indexOf(':', 4); // after "ncq:"
if (colonIdx !== -1) {
questionId = customId.slice(4, colonIdx);
selectedOption = customId.slice(colonIdx + 1);
tail = customId.slice(colonIdx + 1);
}
}
@@ -525,6 +554,9 @@ async function handleForwardedEvent(
((interaction.message as Record<string, unknown>)?.embeds as Array<Record<string, unknown>>) || [];
const originalDescription = (originalEmbeds[0]?.description as string) || '';
const render = questionId ? getAskQuestionRender(questionId) : undefined;
// Discord custom_id mirrors the new index-based encoding (see Button
// construction). Decode back to the real option value for downstream.
const selectedOption = resolveSelectedOption(render, tail, tail);
const cardTitle = render?.title ?? ((originalEmbeds[0]?.title as string) || '❓ Question');
const matchedOpt = render?.options.find((o) => o.value === selectedOption);
const selectedLabel = matchedOpt?.selectedLabel ?? selectedOption ?? customId;

View File

@@ -36,7 +36,13 @@ import {
type ProviderContainerContribution,
type VolumeMount,
} from './providers/provider-container-registry.js';
import { markContainerRunning, markContainerStopped, sessionDir, writeSessionRouting } from './session-manager.js';
import {
heartbeatPath,
markContainerRunning,
markContainerStopped,
sessionDir,
writeSessionRouting,
} from './session-manager.js';
import type { AgentGroup, Session } from './types.js';
const onecli = new OneCLI({ url: ONECLI_URL, apiKey: ONECLI_API_KEY });
@@ -131,6 +137,12 @@ async function spawnContainer(session: Session): Promise<void> {
log.info('Spawning container', { sessionId: session.id, agentGroup: agentGroup.name, containerName });
// Clear any orphan heartbeat from a previous container instance — the
// sweep's ceiling check treats a missing file as "fresh spawn, give grace"
// (host-sweep.ts line 87). Without this, the stale mtime can trigger an
// immediate kill before the new container touches the file itself.
fs.rmSync(heartbeatPath(agentGroup.id, session.id), { force: true });
const container = spawn(CONTAINER_RUNTIME_BIN, args, { stdio: ['ignore', 'pipe', 'pipe'] });
activeContainers.set(session.id, { process: container, containerName });

View File

@@ -139,10 +139,10 @@ export function getMessageForRetry(
db: Database.Database,
messageId: string,
status: string,
): { id: string; tries: number } | undefined {
return db.prepare('SELECT id, tries FROM messages_in WHERE id = ? AND status = ?').get(messageId, status) as
| { id: string; tries: number }
| undefined;
): { id: string; tries: number; processAfter: string | null } | undefined {
return db
.prepare('SELECT id, tries, process_after as processAfter FROM messages_in WHERE id = ? AND status = ?')
.get(messageId, status) as { id: string; tries: number; processAfter: string | null } | undefined;
}
export function syncProcessingAcks(inDb: Database.Database, outDb: Database.Database): void {

View File

@@ -97,10 +97,16 @@ export function deleteSession(id: string): void {
// ── Pending Questions ──
export function createPendingQuestion(pq: PendingQuestion): void {
getDb()
/**
* Insert a pending question row. Idempotent: when delivery fails and retries,
* the second attempt calls this with the same question_id — without `OR
* IGNORE` that would throw UNIQUE and prevent the retry from reaching the
* actual send step. Returns true if a new row was inserted.
*/
export function createPendingQuestion(pq: PendingQuestion): boolean {
const result = getDb()
.prepare(
`INSERT INTO pending_questions (question_id, session_id, message_out_id, platform_id, channel_type, thread_id, title, options_json, created_at)
`INSERT OR IGNORE INTO pending_questions (question_id, session_id, message_out_id, platform_id, channel_type, thread_id, title, options_json, created_at)
VALUES (@question_id, @session_id, @message_out_id, @platform_id, @channel_type, @thread_id, @title, @options_json, @created_at)`,
)
.run({
@@ -114,6 +120,7 @@ export function createPendingQuestion(pq: PendingQuestion): void {
options_json: JSON.stringify(pq.options),
created_at: pq.created_at,
});
return result.changes > 0;
}
export function getPendingQuestion(questionId: string): PendingQuestion | undefined {
@@ -131,16 +138,21 @@ export function deletePendingQuestion(questionId: string): void {
// ── Pending Approvals ──
/**
* Insert a pending approval row. Idempotent for the same reason as
* createPendingQuestion: delivery retries with the same approval_id must not
* fail on UNIQUE before the send step gets a chance to succeed.
*/
export function createPendingApproval(
pa: Partial<PendingApproval> &
Pick<
PendingApproval,
'approval_id' | 'request_id' | 'action' | 'payload' | 'created_at' | 'title' | 'options_json'
>,
): void {
getDb()
): boolean {
const result = getDb()
.prepare(
`INSERT INTO pending_approvals
`INSERT OR IGNORE INTO pending_approvals
(approval_id, session_id, request_id, action, payload, created_at,
agent_group_id, channel_type, platform_id, platform_message_id, expires_at, status,
title, options_json)
@@ -159,6 +171,7 @@ export function createPendingApproval(
status: 'pending',
...pa,
});
return result.changes > 0;
}
export function getPendingApproval(approvalId: string): PendingApproval | undefined {

View File

@@ -321,7 +321,7 @@ async function deliverMessage(
questionId: content.questionId,
});
} else {
createPendingQuestion({
const inserted = createPendingQuestion({
question_id: content.questionId,
session_id: session.id,
message_out_id: msg.id,
@@ -332,7 +332,9 @@ async function deliverMessage(
options: normalizeOptions(rawOptions as never),
created_at: new Date().toISOString(),
});
log.info('Pending question created', { questionId: content.questionId, sessionId: session.id });
if (inserted) {
log.info('Pending question created', { questionId: content.questionId, sessionId: session.id });
}
}
}

View File

@@ -159,23 +159,31 @@ async function sweepSession(session: Session): Promise<void> {
syncProcessingAcks(inDb, outDb);
}
const alive = isContainerRunning(session.id);
// 2. Crashed-container cleanup: processing rows left behind get retried.
if (!alive && outDb) {
resetStuckProcessingRows(inDb, outDb, session, 'container not running');
// 2. Wake a container if work is due and nothing is running. Ordered
// before the crashed-container cleanup so a fresh container gets a chance
// to clean its own orphan processing_ack rows on startup (see
// container/agent-runner/src/db/connection.ts). Otherwise the reset path
// would keep bumping process_after into the future, dueCount would stay 0,
// and the wake would never fire.
const dueCount = countDueMessages(inDb);
if (dueCount > 0 && !isContainerRunning(session.id)) {
log.info('Waking container for due messages', { sessionId: session.id, count: dueCount });
await wakeContainer(session);
}
const alive = isContainerRunning(session.id);
// 3. Running-container SLA: absolute ceiling + per-claim stuck rules.
if (alive && outDb) {
enforceRunningContainerSla(inDb, outDb, session, agentGroup.id);
}
// 4. Wake a container if new work is due and nothing is running.
const dueCount = countDueMessages(inDb);
if (dueCount > 0 && !isContainerRunning(session.id)) {
log.info('Waking container for due messages', { sessionId: session.id, count: dueCount });
await wakeContainer(session);
// 4. Crashed-container cleanup: processing rows left behind get retried.
// Only fires when wake in step 2 didn't pick up the work (no due messages,
// or wake failed). resetStuckProcessingRows itself is idempotent — it
// skips messages already scheduled for a future retry.
if (!alive && outDb) {
resetStuckProcessingRows(inDb, outDb, session, 'container not running');
}
// 5. Recurrence fanout for completed recurring tasks.
@@ -246,10 +254,16 @@ function resetStuckProcessingRows(
reason: string,
): void {
const claims = getProcessingClaims(outDb);
const now = Date.now();
for (const { message_id } of claims) {
const msg = getMessageForRetry(inDb, message_id, 'pending');
if (!msg) continue;
// Already rescheduled for a future retry — don't bump tries again. The
// wake path (sweep step 2) will fire when process_after elapses and a
// fresh container will clean the orphan claim on startup.
if (msg.processAfter && Date.parse(msg.processAfter) > now) continue;
if (msg.tries >= MAX_TRIES) {
markMessageFailed(inDb, msg.id);
log.warn('Message marked as failed after max retries', {