fix(telegram): sanitize outbound markdown for legacy parse mode
The @chat-adapter/telegram adapter hardcodes parse_mode=Markdown (legacy) but its converter emits CommonMark. Messages containing **bold** or list bullets that round-trip to `*` produce "can't parse entities" errors and get dropped after retries. Add an opt-in transformOutboundText hook on the chat-sdk bridge and wire a Telegram-specific sanitizer that downgrades **bold** to *bold*, rewrites dash/plus list bullets to a Unicode bullet so the adapter's re-stringify doesn't inject stray `*`, and strips unbalanced delimiters or brackets. Only Telegram opts in; other channels are unaffected. Workaround until upstream (vercel/chat) ships mode-aware conversion — PR #367 adds a parseMode knob but not the converter fix. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -57,10 +57,18 @@ export interface ChatSdkBridgeConfig {
|
|||||||
* way and the default depends on installation style.
|
* way and the default depends on installation style.
|
||||||
*/
|
*/
|
||||||
supportsThreads: boolean;
|
supportsThreads: boolean;
|
||||||
|
/**
|
||||||
|
* Optional transform applied to outbound text/markdown before it reaches the
|
||||||
|
* adapter. Used by channels that need to sanitize for a platform-specific
|
||||||
|
* quirk (e.g. Telegram's legacy Markdown parse mode).
|
||||||
|
*/
|
||||||
|
transformOutboundText?: (text: string) => string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function createChatSdkBridge(config: ChatSdkBridgeConfig): ChannelAdapter {
|
export function createChatSdkBridge(config: ChatSdkBridgeConfig): ChannelAdapter {
|
||||||
const { adapter } = config;
|
const { adapter } = config;
|
||||||
|
const transformText = (t: string): string =>
|
||||||
|
config.transformOutboundText ? config.transformOutboundText(t) : t;
|
||||||
let chat: Chat;
|
let chat: Chat;
|
||||||
let state: SqliteStateAdapter;
|
let state: SqliteStateAdapter;
|
||||||
let setupConfig: ChannelSetup;
|
let setupConfig: ChannelSetup;
|
||||||
@@ -321,7 +329,7 @@ export function createChatSdkBridge(config: ChatSdkBridgeConfig): ChannelAdapter
|
|||||||
|
|
||||||
if (content.operation === 'edit' && content.messageId) {
|
if (content.operation === 'edit' && content.messageId) {
|
||||||
await adapter.editMessage(tid, content.messageId as string, {
|
await adapter.editMessage(tid, content.messageId as string, {
|
||||||
markdown: (content.text as string) || (content.markdown as string) || '',
|
markdown: transformText((content.text as string) || (content.markdown as string) || ''),
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -370,7 +378,8 @@ export function createChatSdkBridge(config: ChatSdkBridgeConfig): ChannelAdapter
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Normal message
|
// Normal message
|
||||||
const text = (content.markdown as string) || (content.text as string);
|
const rawText = (content.markdown as string) || (content.text as string);
|
||||||
|
const text = rawText ? transformText(rawText) : rawText;
|
||||||
if (text) {
|
if (text) {
|
||||||
// Attach files if present (FileUpload format: { data, filename })
|
// Attach files if present (FileUpload format: { data, filename })
|
||||||
const fileUploads = message.files?.map((f: { data: Buffer; filename: string }) => ({
|
const fileUploads = message.files?.map((f: { data: Buffer; filename: string }) => ({
|
||||||
|
|||||||
70
src/channels/telegram-markdown-sanitize.test.ts
Normal file
70
src/channels/telegram-markdown-sanitize.test.ts
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
import { describe, it, expect } from 'vitest';
|
||||||
|
import { sanitizeTelegramLegacyMarkdown } from './telegram-markdown-sanitize.js';
|
||||||
|
|
||||||
|
describe('sanitizeTelegramLegacyMarkdown', () => {
|
||||||
|
it('downgrades CommonMark **bold** to legacy *bold*', () => {
|
||||||
|
expect(sanitizeTelegramLegacyMarkdown('**Host path**')).toBe('*Host path*');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('downgrades CommonMark __bold__ to legacy _italic_', () => {
|
||||||
|
expect(sanitizeTelegramLegacyMarkdown('__label__')).toBe('_label_');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('leaves balanced legacy *bold* and _italic_ alone', () => {
|
||||||
|
expect(sanitizeTelegramLegacyMarkdown('a *b* c _d_ e')).toBe('a *b* c _d_ e');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('preserves inline code spans untouched', () => {
|
||||||
|
const input = 'see `file_name.py` and `**not bold**` here';
|
||||||
|
expect(sanitizeTelegramLegacyMarkdown(input)).toBe(input);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('preserves fenced code blocks untouched', () => {
|
||||||
|
const input = '```\nfoo_bar **baz**\n```';
|
||||||
|
expect(sanitizeTelegramLegacyMarkdown(input)).toBe(input);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips formatting chars on odd delimiter count (unbalanced *)', () => {
|
||||||
|
expect(sanitizeTelegramLegacyMarkdown('a * b *c*')).toBe('a b c');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips formatting chars on odd delimiter count (unbalanced _)', () => {
|
||||||
|
expect(sanitizeTelegramLegacyMarkdown('file_name has _one italic_')).toBe(
|
||||||
|
'filename has one italic',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips brackets when unbalanced', () => {
|
||||||
|
expect(sanitizeTelegramLegacyMarkdown('see [docs here')).toBe('see docs here');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('leaves matched brackets (e.g. links) alone when counts balance', () => {
|
||||||
|
const input = 'see [docs](https://example.com) for more';
|
||||||
|
expect(sanitizeTelegramLegacyMarkdown(input)).toBe(input);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('fixes the real failing message', () => {
|
||||||
|
const input =
|
||||||
|
'Sure! What do you want to mount, and where should it appear inside the container?\n\n' +
|
||||||
|
'- **Host path** (on your machine): e.g. `~/projects/webapp`\n' +
|
||||||
|
'- **Container path**: e.g. `workspace/webapp`\n' +
|
||||||
|
'- **Read-only or read-write?**';
|
||||||
|
const out = sanitizeTelegramLegacyMarkdown(input);
|
||||||
|
expect(out).not.toContain('**');
|
||||||
|
expect(out).toContain('*Host path*');
|
||||||
|
expect(out).toContain('`~/projects/webapp`');
|
||||||
|
expect((out.match(/\*/g) ?? []).length % 2).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('is a no-op on empty string', () => {
|
||||||
|
expect(sanitizeTelegramLegacyMarkdown('')).toBe('');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('replaces dash list bullets with • so the adapter does not re-emit `*` markers', () => {
|
||||||
|
expect(sanitizeTelegramLegacyMarkdown('- one\n- two')).toBe('• one\n• two');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('preserves indented list structure', () => {
|
||||||
|
expect(sanitizeTelegramLegacyMarkdown(' - nested')).toBe(' • nested');
|
||||||
|
});
|
||||||
|
});
|
||||||
50
src/channels/telegram-markdown-sanitize.ts
Normal file
50
src/channels/telegram-markdown-sanitize.ts
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
/**
|
||||||
|
* Sanitize outbound text for Telegram's legacy `Markdown` parse mode.
|
||||||
|
*
|
||||||
|
* WORKAROUND: The @chat-adapter/telegram adapter hardcodes parse_mode=Markdown
|
||||||
|
* (legacy) but its converter emits CommonMark. Messages with `**bold**`, odd
|
||||||
|
* delimiter counts, or malformed links are rejected by Telegram and dropped
|
||||||
|
* after retries. Remove this once upstream ships real mode-aware conversion
|
||||||
|
* (vercel/chat PR #367 adds the knob; a follow-up is needed for the converter).
|
||||||
|
*/
|
||||||
|
|
||||||
|
const CODE_PATTERN = /```[\s\S]*?```|`[^`\n]*`/g;
|
||||||
|
const PLACEHOLDER_PREFIX = '\x00CODE';
|
||||||
|
const PLACEHOLDER_SUFFIX = '\x00';
|
||||||
|
|
||||||
|
export function sanitizeTelegramLegacyMarkdown(input: string): string {
|
||||||
|
if (!input) return input;
|
||||||
|
|
||||||
|
const codeSegments: string[] = [];
|
||||||
|
let text = input.replace(CODE_PATTERN, (m) => {
|
||||||
|
codeSegments.push(m);
|
||||||
|
return `${PLACEHOLDER_PREFIX}${codeSegments.length - 1}${PLACEHOLDER_SUFFIX}`;
|
||||||
|
});
|
||||||
|
|
||||||
|
// The adapter re-parses and re-stringifies markdown before sending, which
|
||||||
|
// rewrites `- item` list bullets into `* item` — injecting unbalanced
|
||||||
|
// asterisks that Telegram's legacy Markdown parser then rejects. Replace
|
||||||
|
// list bullets with a plain Unicode bullet so the adapter treats the line
|
||||||
|
// as prose.
|
||||||
|
text = text.replace(/^(\s*)[-+]\s+/gm, '$1• ');
|
||||||
|
|
||||||
|
text = text.replace(/\*\*([^*\n]+?)\*\*/g, '*$1*');
|
||||||
|
text = text.replace(/__([^_\n]+?)__/g, '_$1_');
|
||||||
|
|
||||||
|
const starCount = (text.match(/\*/g) ?? []).length;
|
||||||
|
const underCount = (text.match(/_/g) ?? []).length;
|
||||||
|
if (starCount % 2 !== 0 || underCount % 2 !== 0) {
|
||||||
|
text = text.replace(/[*_]/g, '');
|
||||||
|
}
|
||||||
|
|
||||||
|
const openBrackets = (text.match(/\[/g) ?? []).length;
|
||||||
|
const closeBrackets = (text.match(/\]/g) ?? []).length;
|
||||||
|
if (openBrackets !== closeBrackets) {
|
||||||
|
text = text.replace(/[[\]]/g, '');
|
||||||
|
}
|
||||||
|
|
||||||
|
return text.replace(
|
||||||
|
new RegExp(`${PLACEHOLDER_PREFIX}(\\d+)${PLACEHOLDER_SUFFIX}`, 'g'),
|
||||||
|
(_, i) => codeSegments[Number(i)],
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -9,6 +9,7 @@ import { readEnvFile } from '../env.js';
|
|||||||
import { log } from '../log.js';
|
import { log } from '../log.js';
|
||||||
import { createMessagingGroup, getMessagingGroupByPlatform, updateMessagingGroup } from '../db/messaging-groups.js';
|
import { createMessagingGroup, getMessagingGroupByPlatform, updateMessagingGroup } from '../db/messaging-groups.js';
|
||||||
import { createChatSdkBridge, type ReplyContext } from './chat-sdk-bridge.js';
|
import { createChatSdkBridge, type ReplyContext } from './chat-sdk-bridge.js';
|
||||||
|
import { sanitizeTelegramLegacyMarkdown } from './telegram-markdown-sanitize.js';
|
||||||
import { registerChannelAdapter } from './channel-registry.js';
|
import { registerChannelAdapter } from './channel-registry.js';
|
||||||
import type { ChannelAdapter, ChannelSetup, InboundMessage } from './adapter.js';
|
import type { ChannelAdapter, ChannelSetup, InboundMessage } from './adapter.js';
|
||||||
import { tryConsume } from './telegram-pairing.js';
|
import { tryConsume } from './telegram-pairing.js';
|
||||||
@@ -155,6 +156,7 @@ registerChannelAdapter('telegram', {
|
|||||||
concurrency: 'concurrent',
|
concurrency: 'concurrent',
|
||||||
extractReplyContext,
|
extractReplyContext,
|
||||||
supportsThreads: false,
|
supportsThreads: false,
|
||||||
|
transformOutboundText: sanitizeTelegramLegacyMarkdown,
|
||||||
});
|
});
|
||||||
|
|
||||||
const botUsernamePromise = fetchBotUsername(token);
|
const botUsernamePromise = fetchBotUsername(token);
|
||||||
|
|||||||
Reference in New Issue
Block a user