fix(telegram): sanitize outbound markdown for legacy parse mode
The @chat-adapter/telegram adapter hardcodes parse_mode=Markdown (legacy) but its converter emits CommonMark. Messages containing **bold** or list bullets that round-trip to `*` produce "can't parse entities" errors and get dropped after retries. Add an opt-in transformOutboundText hook on the chat-sdk bridge and wire a Telegram-specific sanitizer that downgrades **bold** to *bold*, rewrites dash/plus list bullets to a Unicode bullet so the adapter's re-stringify doesn't inject stray `*`, and strips unbalanced delimiters or brackets. Only Telegram opts in; other channels are unaffected. Workaround until upstream (vercel/chat) ships mode-aware conversion — PR #367 adds a parseMode knob but not the converter fix. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
70
src/channels/telegram-markdown-sanitize.test.ts
Normal file
70
src/channels/telegram-markdown-sanitize.test.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { sanitizeTelegramLegacyMarkdown } from './telegram-markdown-sanitize.js';
|
||||
|
||||
describe('sanitizeTelegramLegacyMarkdown', () => {
|
||||
it('downgrades CommonMark **bold** to legacy *bold*', () => {
|
||||
expect(sanitizeTelegramLegacyMarkdown('**Host path**')).toBe('*Host path*');
|
||||
});
|
||||
|
||||
it('downgrades CommonMark __bold__ to legacy _italic_', () => {
|
||||
expect(sanitizeTelegramLegacyMarkdown('__label__')).toBe('_label_');
|
||||
});
|
||||
|
||||
it('leaves balanced legacy *bold* and _italic_ alone', () => {
|
||||
expect(sanitizeTelegramLegacyMarkdown('a *b* c _d_ e')).toBe('a *b* c _d_ e');
|
||||
});
|
||||
|
||||
it('preserves inline code spans untouched', () => {
|
||||
const input = 'see `file_name.py` and `**not bold**` here';
|
||||
expect(sanitizeTelegramLegacyMarkdown(input)).toBe(input);
|
||||
});
|
||||
|
||||
it('preserves fenced code blocks untouched', () => {
|
||||
const input = '```\nfoo_bar **baz**\n```';
|
||||
expect(sanitizeTelegramLegacyMarkdown(input)).toBe(input);
|
||||
});
|
||||
|
||||
it('strips formatting chars on odd delimiter count (unbalanced *)', () => {
|
||||
expect(sanitizeTelegramLegacyMarkdown('a * b *c*')).toBe('a b c');
|
||||
});
|
||||
|
||||
it('strips formatting chars on odd delimiter count (unbalanced _)', () => {
|
||||
expect(sanitizeTelegramLegacyMarkdown('file_name has _one italic_')).toBe(
|
||||
'filename has one italic',
|
||||
);
|
||||
});
|
||||
|
||||
it('strips brackets when unbalanced', () => {
|
||||
expect(sanitizeTelegramLegacyMarkdown('see [docs here')).toBe('see docs here');
|
||||
});
|
||||
|
||||
it('leaves matched brackets (e.g. links) alone when counts balance', () => {
|
||||
const input = 'see [docs](https://example.com) for more';
|
||||
expect(sanitizeTelegramLegacyMarkdown(input)).toBe(input);
|
||||
});
|
||||
|
||||
it('fixes the real failing message', () => {
|
||||
const input =
|
||||
'Sure! What do you want to mount, and where should it appear inside the container?\n\n' +
|
||||
'- **Host path** (on your machine): e.g. `~/projects/webapp`\n' +
|
||||
'- **Container path**: e.g. `workspace/webapp`\n' +
|
||||
'- **Read-only or read-write?**';
|
||||
const out = sanitizeTelegramLegacyMarkdown(input);
|
||||
expect(out).not.toContain('**');
|
||||
expect(out).toContain('*Host path*');
|
||||
expect(out).toContain('`~/projects/webapp`');
|
||||
expect((out.match(/\*/g) ?? []).length % 2).toBe(0);
|
||||
});
|
||||
|
||||
it('is a no-op on empty string', () => {
|
||||
expect(sanitizeTelegramLegacyMarkdown('')).toBe('');
|
||||
});
|
||||
|
||||
it('replaces dash list bullets with • so the adapter does not re-emit `*` markers', () => {
|
||||
expect(sanitizeTelegramLegacyMarkdown('- one\n- two')).toBe('• one\n• two');
|
||||
});
|
||||
|
||||
it('preserves indented list structure', () => {
|
||||
expect(sanitizeTelegramLegacyMarkdown(' - nested')).toBe(' • nested');
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user