/* Copyright 2024 New Vector Ltd. Copyright 2019, 2020 The Matrix.org Foundation C.I.C. Copyright 2019 New Vector Ltd SPDX-License-Identifier: AGPL-3.0-only OR GPL-3.0-only Please see LICENSE files in the repository root for full details. */ import { MatrixEvent, MsgType } from "matrix-js-sdk/src/matrix"; import { checkBlockNode } from "../HtmlUtils"; import { getPrimaryPermalinkEntity } from "../utils/permalinks/Permalinks"; import { Part, PartCreator, Type } from "./parts"; import SdkConfig from "../SdkConfig"; import { textToHtmlRainbow } from "../utils/colour"; import { stripPlainReply } from "../utils/Reply"; const LIST_TYPES = ["UL", "OL", "LI"]; // Escapes all markup in the given text function escape(text: string): string { return text.replace(/[\\*_[\]`<]|^>/g, (match) => `\\${match}`); } // Finds the length of the longest backtick sequence in the given text, used for // escaping backticks in code blocks export function longestBacktickSequence(text: string): number { let length = 0; let currentLength = 0; for (const c of text) { if (c === "`") { currentLength++; } else { length = Math.max(length, currentLength); currentLength = 0; } } return Math.max(length, currentLength); } function isListChild(n: Node): boolean { return LIST_TYPES.includes(n.parentNode?.nodeName || ""); } function parseAtRoomMentions(text: string, pc: PartCreator, opts: IParseOptions): Part[] { const ATROOM = "@room"; const parts: Part[] = []; text.split(ATROOM).forEach((textPart, i, arr) => { if (textPart.length) { parts.push(...pc.plainWithEmoji(opts.shouldEscape ? escape(textPart) : textPart)); } // it's safe to never append @room after the last textPart // as split will report an empty string at the end if // `text` ended in @room. const isLast = i === arr.length - 1; if (!isLast) { parts.push(pc.atRoomPill(ATROOM)); } }); return parts; } function parseLink(n: Node, pc: PartCreator, opts: IParseOptions): Part[] { const { href } = n as HTMLAnchorElement; const resourceId = getPrimaryPermalinkEntity(href); // The room/user ID switch (resourceId?.[0]) { case "@": return [pc.userPill(n.textContent || "", resourceId)]; case "#": return [pc.roomPill(resourceId)]; } const children = Array.from(n.childNodes); if (href === n.textContent && children.every((c) => c.nodeType === Node.TEXT_NODE)) { return parseAtRoomMentions(n.textContent, pc, opts); } else { return [pc.plain("["), ...parseChildren(n, pc, opts), pc.plain(`](${href})`)]; } } function parseImage(n: Node, pc: PartCreator, opts: IParseOptions): Part[] { const { alt, src } = n as HTMLImageElement; return pc.plainWithEmoji(`![${escape(alt)}](${src})`); } function parseCodeBlock(n: Node, pc: PartCreator, opts: IParseOptions): Part[] { if (!n.textContent) return []; let language = ""; if (n.firstChild?.nodeName === "CODE") { for (const className of (n.firstChild as HTMLElement).classList) { if (className.startsWith("language-") && !className.startsWith("language-_")) { language = className.slice("language-".length); break; } } } const text = n.textContent.replace(/\n$/, ""); // Escape backticks by using even more backticks for the fence if necessary const fence = "`".repeat(Math.max(3, longestBacktickSequence(text) + 1)); const parts: Part[] = [...pc.plainWithEmoji(fence + language), pc.newline()]; text.split("\n").forEach((line) => { parts.push(...pc.plainWithEmoji(line)); parts.push(pc.newline()); }); parts.push(pc.plain(fence)); return parts; } function parseHeader(n: Node, pc: PartCreator, opts: IParseOptions): Part[] { const depth = parseInt(n.nodeName.slice(1), 10); const prefix = pc.plain("#".repeat(depth) + " "); return [prefix, ...parseChildren(n, pc, opts)]; } function checkIgnored(n: Node): boolean { if (n.nodeType === Node.TEXT_NODE) { // Element adds \n text nodes in a lot of places, // which should be ignored return n.nodeValue === "\n"; } else if (n.nodeType === Node.ELEMENT_NODE) { return n.nodeName === "MX-REPLY"; } return true; } function prefixLines(parts: Part[], prefix: string, pc: PartCreator): void { parts.unshift(pc.plain(prefix)); for (let i = 0; i < parts.length; i++) { if (parts[i].type === Type.Newline) { parts.splice(i + 1, 0, pc.plain(prefix)); i += 1; } } } function parseChildren(n: Node, pc: PartCreator, opts: IParseOptions, mkListItem?: (li: Node) => Part[]): Part[] { let prev: ChildNode | undefined; return Array.from(n.childNodes).flatMap((c) => { const parsed = parseNode(c, pc, opts, mkListItem); if (parsed.length && prev && (checkBlockNode(prev) || checkBlockNode(c))) { if (isListChild(c)) { // Use tighter spacing within lists parsed.unshift(pc.newline()); } else { parsed.unshift(pc.newline(), pc.newline()); } } if (parsed.length) prev = c; return parsed; }); } function parseNode(n: Node, pc: PartCreator, opts: IParseOptions, mkListItem?: (li: Node) => Part[]): Part[] { if (checkIgnored(n)) return []; switch (n.nodeType) { case Node.TEXT_NODE: return parseAtRoomMentions(n.nodeValue || "", pc, opts); case Node.ELEMENT_NODE: switch (n.nodeName) { case "H1": case "H2": case "H3": case "H4": case "H5": case "H6": return parseHeader(n, pc, opts); case "A": return parseLink(n, pc, opts); case "IMG": return parseImage(n, pc, opts); case "BR": return [pc.newline()]; case "HR": return [pc.plain("---")]; case "EM": return [pc.plain("_"), ...parseChildren(n, pc, opts), pc.plain("_")]; case "STRONG": return [pc.plain("**"), ...parseChildren(n, pc, opts), pc.plain("**")]; case "DEL": return [pc.plain(""), ...parseChildren(n, pc, opts), pc.plain("")]; case "S": return [pc.plain(""), ...parseChildren(n, pc, opts), pc.plain("")]; case "SUB": return [pc.plain(""), ...parseChildren(n, pc, opts), pc.plain("")]; case "SUP": return [pc.plain(""), ...parseChildren(n, pc, opts), pc.plain("")]; case "U": return [pc.plain(""), ...parseChildren(n, pc, opts), pc.plain("")]; case "PRE": return parseCodeBlock(n, pc, opts); case "CODE": { // Escape backticks by using multiple backticks for the fence if necessary const fence = "`".repeat(longestBacktickSequence(n.textContent || "") + 1); return pc.plainWithEmoji(`${fence}${n.textContent}${fence}`); } case "BLOCKQUOTE": { const parts = parseChildren(n, pc, opts); prefixLines(parts, "> ", pc); return parts; } case "LI": return mkListItem?.(n) ?? parseChildren(n, pc, opts); case "UL": { const parts = parseChildren(n, pc, opts, (li) => [pc.plain("- "), ...parseChildren(li, pc, opts)]); if (isListChild(n)) { prefixLines(parts, " ", pc); } return parts; } case "OL": { let counter = (n as HTMLOListElement).start ?? 1; const parts = parseChildren(n, pc, opts, (li) => { const parts = [pc.plain(`${counter}. `), ...parseChildren(li, pc, opts)]; counter++; return parts; }); if (isListChild(n)) { prefixLines(parts, " ", pc); } return parts; } case "DIV": case "SPAN": // Math nodes are translated back into delimited latex strings if ((n as Element).hasAttribute("data-mx-maths")) { const delims = SdkConfig.get().latex_maths_delims; const delimLeft = n.nodeName === "SPAN" ? (delims?.inline?.left ?? "\\(") : (delims?.display?.left ?? "\\["); const delimRight = n.nodeName === "SPAN" ? (delims?.inline?.right ?? "\\)") : (delims?.display?.right ?? "\\]"); const tex = (n as Element).getAttribute("data-mx-maths"); return pc.plainWithEmoji(`${delimLeft}${tex}${delimRight}`); } // Spoilers are translated back into their slash command form else if ((n as Element).hasAttribute("data-mx-spoiler")) { return [pc.plain("/spoiler "), ...parseChildren(n, pc, opts)]; } } } return parseChildren(n, pc, opts); } interface IParseOptions { isQuotedMessage?: boolean; shouldEscape?: boolean; } function parseHtmlMessage(html: string, pc: PartCreator, opts: IParseOptions): Part[] { // no nodes from parsing here should be inserted in the document, // as scripts in event handlers, etc would be executed then. // we're only taking text, so that is fine const parts = parseNode(new DOMParser().parseFromString(html, "text/html").body, pc, opts); if (opts.isQuotedMessage) { prefixLines(parts, "> ", pc); } return parts; } export function parsePlainTextMessage(body: string, pc: PartCreator, opts: IParseOptions): Part[] { const lines = body.split(/\r\n|\r|\n/g); // split on any new-line combination not just \n, collapses \r\n return lines.reduce((parts, line, i) => { if (opts.isQuotedMessage) { parts.push(pc.plain("> ")); } parts.push(...parseAtRoomMentions(line, pc, opts)); const isLast = i === lines.length - 1; if (!isLast) { parts.push(pc.newline()); } return parts; }, [] as Part[]); } export function parseEvent(event: MatrixEvent, pc: PartCreator, opts: IParseOptions = { shouldEscape: true }): Part[] { const content = event.getContent(); let parts: Part[]; const isEmote = content.msgtype === MsgType.Emote; let isRainbow = false; if (content.format === "org.matrix.custom.html") { parts = parseHtmlMessage(content.formatted_body || "", pc, opts); if (content.body && content.formatted_body && textToHtmlRainbow(content.body) === content.formatted_body) { isRainbow = true; } } else { let body = content.body || ""; if (event.replyEventId) { body = stripPlainReply(body); } parts = parsePlainTextMessage(body, pc, opts); } if (isEmote && isRainbow) { parts.unshift(pc.plain("/rainbowme ")); } else if (isRainbow) { parts.unshift(pc.plain("/rainbow ")); } else if (isEmote) { parts.unshift(pc.plain("/me ")); } return parts; }