element-web/src/editor/deserialize.ts
David Langley 491f0cd08a
Change license (#13)
* Copyright headers 1

* Licence headers 2

* Copyright Headers 3

* Copyright Headers 4

* Copyright Headers 5

* Copyright Headers 6

* Copyright headers 7

* Add copyright headers for html and config file

* Replace license files and update package.json

* Update with CLA

* lint
2024-09-09 13:57:16 +00:00

315 lines
12 KiB
TypeScript

/*
Copyright 2024 New Vector Ltd.
Copyright 2019, 2020 The Matrix.org Foundation C.I.C.
Copyright 2019 New Vector Ltd
SPDX-License-Identifier: AGPL-3.0-only OR GPL-3.0-only
Please see LICENSE files in the repository root for full details.
*/
import { MatrixEvent, MsgType } from "matrix-js-sdk/src/matrix";
import { checkBlockNode } from "../HtmlUtils";
import { getPrimaryPermalinkEntity } from "../utils/permalinks/Permalinks";
import { Part, PartCreator, Type } from "./parts";
import SdkConfig from "../SdkConfig";
import { textToHtmlRainbow } from "../utils/colour";
import { stripPlainReply } from "../utils/Reply";
const LIST_TYPES = ["UL", "OL", "LI"];
// Escapes all markup in the given text
function escape(text: string): string {
return text.replace(/[\\*_[\]`<]|^>/g, (match) => `\\${match}`);
}
// Finds the length of the longest backtick sequence in the given text, used for
// escaping backticks in code blocks
export function longestBacktickSequence(text: string): number {
let length = 0;
let currentLength = 0;
for (const c of text) {
if (c === "`") {
currentLength++;
} else {
length = Math.max(length, currentLength);
currentLength = 0;
}
}
return Math.max(length, currentLength);
}
function isListChild(n: Node): boolean {
return LIST_TYPES.includes(n.parentNode?.nodeName || "");
}
function parseAtRoomMentions(text: string, pc: PartCreator, opts: IParseOptions): Part[] {
const ATROOM = "@room";
const parts: Part[] = [];
text.split(ATROOM).forEach((textPart, i, arr) => {
if (textPart.length) {
parts.push(...pc.plainWithEmoji(opts.shouldEscape ? escape(textPart) : textPart));
}
// it's safe to never append @room after the last textPart
// as split will report an empty string at the end if
// `text` ended in @room.
const isLast = i === arr.length - 1;
if (!isLast) {
parts.push(pc.atRoomPill(ATROOM));
}
});
return parts;
}
function parseLink(n: Node, pc: PartCreator, opts: IParseOptions): Part[] {
const { href } = n as HTMLAnchorElement;
const resourceId = getPrimaryPermalinkEntity(href); // The room/user ID
switch (resourceId?.[0]) {
case "@":
return [pc.userPill(n.textContent || "", resourceId)];
case "#":
return [pc.roomPill(resourceId)];
}
const children = Array.from(n.childNodes);
if (href === n.textContent && children.every((c) => c.nodeType === Node.TEXT_NODE)) {
return parseAtRoomMentions(n.textContent, pc, opts);
} else {
return [pc.plain("["), ...parseChildren(n, pc, opts), pc.plain(`](${href})`)];
}
}
function parseImage(n: Node, pc: PartCreator, opts: IParseOptions): Part[] {
const { alt, src } = n as HTMLImageElement;
return pc.plainWithEmoji(`![${escape(alt)}](${src})`);
}
function parseCodeBlock(n: Node, pc: PartCreator, opts: IParseOptions): Part[] {
if (!n.textContent) return [];
let language = "";
if (n.firstChild?.nodeName === "CODE") {
for (const className of (n.firstChild as HTMLElement).classList) {
if (className.startsWith("language-") && !className.startsWith("language-_")) {
language = className.slice("language-".length);
break;
}
}
}
const text = n.textContent.replace(/\n$/, "");
// Escape backticks by using even more backticks for the fence if necessary
const fence = "`".repeat(Math.max(3, longestBacktickSequence(text) + 1));
const parts: Part[] = [...pc.plainWithEmoji(fence + language), pc.newline()];
text.split("\n").forEach((line) => {
parts.push(...pc.plainWithEmoji(line));
parts.push(pc.newline());
});
parts.push(pc.plain(fence));
return parts;
}
function parseHeader(n: Node, pc: PartCreator, opts: IParseOptions): Part[] {
const depth = parseInt(n.nodeName.slice(1), 10);
const prefix = pc.plain("#".repeat(depth) + " ");
return [prefix, ...parseChildren(n, pc, opts)];
}
function checkIgnored(n: Node): boolean {
if (n.nodeType === Node.TEXT_NODE) {
// Element adds \n text nodes in a lot of places,
// which should be ignored
return n.nodeValue === "\n";
} else if (n.nodeType === Node.ELEMENT_NODE) {
return n.nodeName === "MX-REPLY";
}
return true;
}
function prefixLines(parts: Part[], prefix: string, pc: PartCreator): void {
parts.unshift(pc.plain(prefix));
for (let i = 0; i < parts.length; i++) {
if (parts[i].type === Type.Newline) {
parts.splice(i + 1, 0, pc.plain(prefix));
i += 1;
}
}
}
function parseChildren(n: Node, pc: PartCreator, opts: IParseOptions, mkListItem?: (li: Node) => Part[]): Part[] {
let prev: ChildNode | undefined;
return Array.from(n.childNodes).flatMap((c) => {
const parsed = parseNode(c, pc, opts, mkListItem);
if (parsed.length && prev && (checkBlockNode(prev) || checkBlockNode(c))) {
if (isListChild(c)) {
// Use tighter spacing within lists
parsed.unshift(pc.newline());
} else {
parsed.unshift(pc.newline(), pc.newline());
}
}
if (parsed.length) prev = c;
return parsed;
});
}
function parseNode(n: Node, pc: PartCreator, opts: IParseOptions, mkListItem?: (li: Node) => Part[]): Part[] {
if (checkIgnored(n)) return [];
switch (n.nodeType) {
case Node.TEXT_NODE:
return parseAtRoomMentions(n.nodeValue || "", pc, opts);
case Node.ELEMENT_NODE:
switch (n.nodeName) {
case "H1":
case "H2":
case "H3":
case "H4":
case "H5":
case "H6":
return parseHeader(n, pc, opts);
case "A":
return parseLink(n, pc, opts);
case "IMG":
return parseImage(n, pc, opts);
case "BR":
return [pc.newline()];
case "HR":
return [pc.plain("---")];
case "EM":
return [pc.plain("_"), ...parseChildren(n, pc, opts), pc.plain("_")];
case "STRONG":
return [pc.plain("**"), ...parseChildren(n, pc, opts), pc.plain("**")];
case "DEL":
return [pc.plain("<del>"), ...parseChildren(n, pc, opts), pc.plain("</del>")];
case "S":
return [pc.plain("<s>"), ...parseChildren(n, pc, opts), pc.plain("</s>")];
case "SUB":
return [pc.plain("<sub>"), ...parseChildren(n, pc, opts), pc.plain("</sub>")];
case "SUP":
return [pc.plain("<sup>"), ...parseChildren(n, pc, opts), pc.plain("</sup>")];
case "U":
return [pc.plain("<u>"), ...parseChildren(n, pc, opts), pc.plain("</u>")];
case "PRE":
return parseCodeBlock(n, pc, opts);
case "CODE": {
// Escape backticks by using multiple backticks for the fence if necessary
const fence = "`".repeat(longestBacktickSequence(n.textContent || "") + 1);
return pc.plainWithEmoji(`${fence}${n.textContent}${fence}`);
}
case "BLOCKQUOTE": {
const parts = parseChildren(n, pc, opts);
prefixLines(parts, "> ", pc);
return parts;
}
case "LI":
return mkListItem?.(n) ?? parseChildren(n, pc, opts);
case "UL": {
const parts = parseChildren(n, pc, opts, (li) => [pc.plain("- "), ...parseChildren(li, pc, opts)]);
if (isListChild(n)) {
prefixLines(parts, " ", pc);
}
return parts;
}
case "OL": {
let counter = (n as HTMLOListElement).start ?? 1;
const parts = parseChildren(n, pc, opts, (li) => {
const parts = [pc.plain(`${counter}. `), ...parseChildren(li, pc, opts)];
counter++;
return parts;
});
if (isListChild(n)) {
prefixLines(parts, " ", pc);
}
return parts;
}
case "DIV":
case "SPAN":
// Math nodes are translated back into delimited latex strings
if ((n as Element).hasAttribute("data-mx-maths")) {
const delims = SdkConfig.get().latex_maths_delims;
const delimLeft =
n.nodeName === "SPAN" ? (delims?.inline?.left ?? "\\(") : (delims?.display?.left ?? "\\[");
const delimRight =
n.nodeName === "SPAN"
? (delims?.inline?.right ?? "\\)")
: (delims?.display?.right ?? "\\]");
const tex = (n as Element).getAttribute("data-mx-maths");
return pc.plainWithEmoji(`${delimLeft}${tex}${delimRight}`);
}
// Spoilers are translated back into their slash command form
else if ((n as Element).hasAttribute("data-mx-spoiler")) {
return [pc.plain("/spoiler "), ...parseChildren(n, pc, opts)];
}
}
}
return parseChildren(n, pc, opts);
}
interface IParseOptions {
isQuotedMessage?: boolean;
shouldEscape?: boolean;
}
function parseHtmlMessage(html: string, pc: PartCreator, opts: IParseOptions): Part[] {
// no nodes from parsing here should be inserted in the document,
// as scripts in event handlers, etc would be executed then.
// we're only taking text, so that is fine
const parts = parseNode(new DOMParser().parseFromString(html, "text/html").body, pc, opts);
if (opts.isQuotedMessage) {
prefixLines(parts, "> ", pc);
}
return parts;
}
export function parsePlainTextMessage(body: string, pc: PartCreator, opts: IParseOptions): Part[] {
const lines = body.split(/\r\n|\r|\n/g); // split on any new-line combination not just \n, collapses \r\n
return lines.reduce((parts, line, i) => {
if (opts.isQuotedMessage) {
parts.push(pc.plain("> "));
}
parts.push(...parseAtRoomMentions(line, pc, opts));
const isLast = i === lines.length - 1;
if (!isLast) {
parts.push(pc.newline());
}
return parts;
}, [] as Part[]);
}
export function parseEvent(event: MatrixEvent, pc: PartCreator, opts: IParseOptions = { shouldEscape: true }): Part[] {
const content = event.getContent();
let parts: Part[];
const isEmote = content.msgtype === MsgType.Emote;
let isRainbow = false;
if (content.format === "org.matrix.custom.html") {
parts = parseHtmlMessage(content.formatted_body || "", pc, opts);
if (content.body && content.formatted_body && textToHtmlRainbow(content.body) === content.formatted_body) {
isRainbow = true;
}
} else {
let body = content.body || "";
if (event.replyEventId) {
body = stripPlainReply(body);
}
parts = parsePlainTextMessage(body, pc, opts);
}
if (isEmote && isRainbow) {
parts.unshift(pc.plain("/rainbowme "));
} else if (isRainbow) {
parts.unshift(pc.plain("/rainbow "));
} else if (isEmote) {
parts.unshift(pc.plain("/me "));
}
return parts;
}