From 9faa8a9a2c6824e28c56d2e05a273354c9503b70 Mon Sep 17 00:00:00 2001 From: gabi-simons Date: Thu, 23 Apr 2026 12:41:33 +0000 Subject: [PATCH] fix(migrate-v1): splice guild_id into Discord platform_id during seed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2's Chat SDK Discord adapter emits `platform_id` as `discord::` at runtime, but v1 only stored `dc:` (no guild). Before this fix `migrate-db` wrote `discord:` into `messaging_groups.platform_id`, which didn't match what v2 saw on incoming messages — v2 treated every message as a new channel and fired its channel-registration approval flow instead of routing to the migrated agent_group. Now `migrate-db` fetches the bot's guilds once per channel_type via `GET /users/@me/guilds`. When the bot is in exactly one guild (the common case), the guild id is spliced into every Discord platform_id at seed time — matching v2's runtime format. Multi-guild bots fall back to the v1-format id; v2's channel-registration flow repairs on first message. Cost: one extra Discord API call per migration run (not per channel). No new failure modes — network/auth issues return null, fall through to the existing behavior. ## Surface - `v2PlatformId(channelType, jid, { guildId })` — new optional `extra` parameter. Back-compat with existing callers. - `fetchBotGuilds(channelType, lookup)` — new helper in `shared.ts`, same pattern as `autoResolveV2Keys`. Handles Discord today; extending to other channels is a case-by-case API check. - `migrate-db` pre-loop: builds `v1EnvMap`, fetches guilds per channel type, caches single-guild IDs for the row loop. ## Testing Verified on a 300-channel Discord v1 install: - Fresh run produced `discord::` platform_ids from the start - Incoming messages now route to the migrated agent_group instead of firing the unwire approval flow Rate-limit note: `/users/@me/guilds` is a single call. Per-channel `/guilds//channels` lookups for multi-guild bots would need proper rate-limit handling — deferred. --- setup/migrate-v1/db.ts | 27 +++++++++++++++++++- setup/migrate-v1/shared.ts | 50 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/setup/migrate-v1/db.ts b/setup/migrate-v1/db.ts index e455012..d338214 100644 --- a/setup/migrate-v1/db.ts +++ b/setup/migrate-v1/db.ts @@ -36,6 +36,7 @@ import { runMigrations } from '../../src/db/migrations/index.js'; import { log } from '../../src/log.js'; import { emitStatus } from '../status.js'; import { + fetchBotGuilds, generateId, inferChannelType, readHandoff, @@ -158,6 +159,29 @@ export async function run(args: string[]): Promise { })); writeHandoff(h); + // For channels where v2's platform_id includes a component v1 didn't record + // (Discord's guild id), fetch the bot's guilds up-front. If the bot is in + // a single guild we can splice that id into every platform_id; otherwise + // fall back to the v1-format id (v2's channel-registration flow will repair + // on first message). Done ONCE per channel_type, not per-row, so this is + // cheap regardless of group count. + const v1EnvText = fs.existsSync(paths.env) ? fs.readFileSync(paths.env, 'utf-8') : ''; + const v1EnvMap = new Map(); + for (const line of v1EnvText.split('\n')) { + const t = line.trim(); + if (!t || t.startsWith('#')) continue; + const eq = t.indexOf('='); + if (eq <= 0) continue; + v1EnvMap.set(t.slice(0, eq).trim(), t.slice(eq + 1)); + } + const singleGuildByChannel = new Map(); + for (const channelType of detectedChannels.keys()) { + const info = await fetchBotGuilds(channelType, (k) => v1EnvMap.get(k)); + if (info && info.guildIds.length === 1) { + singleGuildByChannel.set(channelType, info.guildIds[0]); + } + } + // Initialize v2.db (creates schema if not present — runMigrations is no-op // when the schema is already current, so this is safe on a live v2 install). fs.mkdirSync(path.join(process.cwd(), 'data'), { recursive: true }); @@ -181,7 +205,8 @@ export async function run(args: string[]): Promise { continue; } - const platformId = v2PlatformId(channelType, g.jid); + const guildId = singleGuildByChannel.get(channelType); + const platformId = v2PlatformId(channelType, g.jid, { guildId }); const createdAt = new Date().toISOString(); try { diff --git a/setup/migrate-v1/shared.ts b/setup/migrate-v1/shared.ts index bc5d3dd..4597fcf 100644 --- a/setup/migrate-v1/shared.ts +++ b/setup/migrate-v1/shared.ts @@ -358,11 +358,57 @@ export function inferChannelType(jid: string, channelName: string | null): strin * v2's messaging_groups.platform_id is always prefixed with the channel_type * (see setup/register.ts:118-120). This helper normalizes v1's `jid` into * that shape so router lookups at runtime find the right row. + * + * Some channels need extra structure on the id itself. Discord's Chat SDK + * emits `discord::` at runtime but v1 only stored + * `dc:` (no guild). Callers that know the guild (e.g. bot with + * a single guild) can pass it via `extra`; otherwise the returned id will + * be the v1-format `discord:` and will be repaired on first + * message via v2's channel-registration approval flow. */ -export function v2PlatformId(channelType: string, jid: string): string { +export function v2PlatformId(channelType: string, jid: string, extra?: { guildId?: string }): string { const parsed = parseJid(jid); const id = parsed?.id ?? jid; - return id.startsWith(`${channelType}:`) ? id : `${channelType}:${id}`; + const prefixed = id.startsWith(`${channelType}:`) ? id : `${channelType}:${id}`; + // For Discord: splice the guild id in between when we know it and the id + // isn't already in `:` form. + if (channelType === 'discord' && extra?.guildId) { + const body = prefixed.slice(`discord:`.length); + if (!body.includes(':')) return `discord:${extra.guildId}:${body}`; + } + return prefixed; +} + +/** + * Fetch the bot's guild memberships for a channel_type so migrate-db can + * form platform_ids matching what the v2 adapter emits at runtime. Returns + * null on any failure (network, auth, rate limit, unsupported channel_type) + * — callers fall back to the v1-format platform_id, which works but may + * trigger v2's channel-registration flow on first message. + * + * Currently handles Discord. Extending to other channels: the function + * needs a "single-or-multi guild?" shape; for single-guild bots the caller + * can splice the guild id globally, for multi-guild a per-channel lookup + * is needed and the caller should probably bail (rate-limit risk). + */ +export async function fetchBotGuilds( + channelType: string, + v1EnvLookup: (key: string) => string | undefined, +): Promise<{ guildIds: string[] } | null> { + if (channelType !== 'discord') return null; + const token = v1EnvLookup('DISCORD_BOT_TOKEN'); + if (!token) return null; + try { + const resp = await fetch('https://discord.com/api/v10/users/@me/guilds', { + headers: { Authorization: `Bot ${token}` }, + }); + if (!resp.ok) return null; + const data = (await resp.json()) as Array<{ id?: string }>; + const guildIds = data.map((g) => g.id).filter((id): id is string => typeof id === 'string'); + return { guildIds }; + } catch { + return null; + } } // ── Trigger rules → engage mode (ports migration 010's backfill) ───────