2019-05-07 18:31:37 +03:00
|
|
|
/*
|
|
|
|
Copyright 2019 New Vector Ltd
|
2020-04-15 02:49:08 +03:00
|
|
|
Copyright 2019, 2020 The Matrix.org Foundation C.I.C.
|
2019-05-07 18:31:37 +03:00
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2020-04-15 02:49:08 +03:00
|
|
|
import { MatrixEvent } from "matrix-js-sdk/src/models/event";
|
2022-04-27 11:43:10 +03:00
|
|
|
import { MsgType } from "matrix-js-sdk/src/@types/event";
|
2020-04-15 02:49:08 +03:00
|
|
|
|
2019-07-23 10:12:24 +03:00
|
|
|
import { checkBlockNode } from "../HtmlUtils";
|
2020-04-15 02:49:08 +03:00
|
|
|
import { getPrimaryPermalinkEntity } from "../utils/permalinks/Permalinks";
|
2021-10-12 13:01:40 +03:00
|
|
|
import { Part, PartCreator, Type } from "./parts";
|
2020-09-21 13:00:39 +03:00
|
|
|
import SdkConfig from "../SdkConfig";
|
2021-10-12 16:02:05 +03:00
|
|
|
import { textToHtmlRainbow } from "../utils/colour";
|
2022-04-27 11:43:10 +03:00
|
|
|
import { stripPlainReply } from "../utils/Reply";
|
2019-05-22 14:00:39 +03:00
|
|
|
|
2022-03-09 15:43:05 +03:00
|
|
|
const LIST_TYPES = ["UL", "OL", "LI"];
|
|
|
|
|
|
|
|
// Escapes all markup in the given text
|
|
|
|
function escape(text: string): string {
|
2022-12-12 14:24:14 +03:00
|
|
|
return text.replace(/[\\*_[\]`<]|^>/g, (match) => `\\${match}`);
|
2022-03-09 15:43:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Finds the length of the longest backtick sequence in the given text, used for
|
|
|
|
// escaping backticks in code blocks
|
2022-04-19 12:20:56 +03:00
|
|
|
export function longestBacktickSequence(text: string): number {
|
2022-03-09 15:43:05 +03:00
|
|
|
let length = 0;
|
|
|
|
let currentLength = 0;
|
|
|
|
|
|
|
|
for (const c of text) {
|
|
|
|
if (c === "`") {
|
|
|
|
currentLength++;
|
|
|
|
} else {
|
|
|
|
length = Math.max(length, currentLength);
|
|
|
|
currentLength = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return Math.max(length, currentLength);
|
|
|
|
}
|
|
|
|
|
|
|
|
function isListChild(n: Node): boolean {
|
|
|
|
return LIST_TYPES.includes(n.parentNode?.nodeName);
|
|
|
|
}
|
|
|
|
|
2022-04-19 16:53:59 +03:00
|
|
|
function parseAtRoomMentions(text: string, pc: PartCreator, opts: IParseOptions): Part[] {
|
2019-06-14 19:25:02 +03:00
|
|
|
const ATROOM = "@room";
|
2022-01-14 16:24:51 +03:00
|
|
|
const parts: Part[] = [];
|
2019-06-14 19:25:02 +03:00
|
|
|
text.split(ATROOM).forEach((textPart, i, arr) => {
|
|
|
|
if (textPart.length) {
|
2022-04-19 16:53:59 +03:00
|
|
|
parts.push(...pc.plainWithEmoji(opts.shouldEscape ? escape(textPart) : textPart));
|
2019-06-14 19:25:02 +03:00
|
|
|
}
|
2019-06-18 10:50:31 +03:00
|
|
|
// it's safe to never append @room after the last textPart
|
2019-06-18 09:40:58 +03:00
|
|
|
// as split will report an empty string at the end if
|
|
|
|
// `text` ended in @room.
|
2019-06-14 19:25:02 +03:00
|
|
|
const isLast = i === arr.length - 1;
|
|
|
|
if (!isLast) {
|
2022-03-09 15:43:05 +03:00
|
|
|
parts.push(pc.atRoomPill(ATROOM));
|
2019-06-14 19:25:02 +03:00
|
|
|
}
|
|
|
|
});
|
|
|
|
return parts;
|
|
|
|
}
|
|
|
|
|
2022-04-19 16:53:59 +03:00
|
|
|
function parseLink(n: Node, pc: PartCreator, opts: IParseOptions): Part[] {
|
2022-03-09 15:43:05 +03:00
|
|
|
const { href } = n as HTMLAnchorElement;
|
2019-10-01 05:37:24 +03:00
|
|
|
const resourceId = getPrimaryPermalinkEntity(href); // The room/user ID
|
2022-03-09 15:43:05 +03:00
|
|
|
|
|
|
|
switch (resourceId?.[0]) {
|
2022-12-12 14:24:14 +03:00
|
|
|
case "@":
|
|
|
|
return [pc.userPill(n.textContent, resourceId)];
|
|
|
|
case "#":
|
|
|
|
return [pc.roomPill(resourceId)];
|
2022-03-09 15:43:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
const children = Array.from(n.childNodes);
|
2022-12-12 14:24:14 +03:00
|
|
|
if (href === n.textContent && children.every((c) => c.nodeType === Node.TEXT_NODE)) {
|
2022-04-19 16:53:59 +03:00
|
|
|
return parseAtRoomMentions(n.textContent, pc, opts);
|
2022-03-09 15:43:05 +03:00
|
|
|
} else {
|
2022-04-19 16:53:59 +03:00
|
|
|
return [pc.plain("["), ...parseChildren(n, pc, opts), pc.plain(`](${href})`)];
|
2019-05-29 15:46:15 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-19 16:53:59 +03:00
|
|
|
function parseImage(n: Node, pc: PartCreator, opts: IParseOptions): Part[] {
|
2022-03-09 15:43:05 +03:00
|
|
|
const { alt, src } = n as HTMLImageElement;
|
|
|
|
return pc.plainWithEmoji(`![${escape(alt)}](${src})`);
|
2021-03-01 20:14:48 +03:00
|
|
|
}
|
|
|
|
|
2022-04-19 16:53:59 +03:00
|
|
|
function parseCodeBlock(n: Node, pc: PartCreator, opts: IParseOptions): Part[] {
|
2019-10-13 14:04:54 +03:00
|
|
|
let language = "";
|
2022-03-09 15:43:05 +03:00
|
|
|
if (n.firstChild?.nodeName === "CODE") {
|
|
|
|
for (const className of (n.firstChild as HTMLElement).classList) {
|
2020-07-21 19:47:40 +03:00
|
|
|
if (className.startsWith("language-") && !className.startsWith("language-_")) {
|
2022-04-14 10:52:42 +03:00
|
|
|
language = className.slice("language-".length);
|
2019-10-13 14:04:54 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-05-29 15:46:15 +03:00
|
|
|
|
2022-03-09 15:43:05 +03:00
|
|
|
const text = n.textContent.replace(/\n$/, "");
|
|
|
|
// Escape backticks by using even more backticks for the fence if necessary
|
|
|
|
const fence = "`".repeat(Math.max(3, longestBacktickSequence(text) + 1));
|
|
|
|
const parts: Part[] = [...pc.plainWithEmoji(fence + language), pc.newline()];
|
2019-09-02 17:23:56 +03:00
|
|
|
|
2022-12-12 14:24:14 +03:00
|
|
|
text.split("\n").forEach((line) => {
|
2022-03-09 15:43:05 +03:00
|
|
|
parts.push(...pc.plainWithEmoji(line));
|
|
|
|
parts.push(pc.newline());
|
|
|
|
});
|
2020-04-15 02:49:08 +03:00
|
|
|
|
2022-03-09 15:43:05 +03:00
|
|
|
parts.push(pc.plain(fence));
|
|
|
|
return parts;
|
2019-05-29 15:46:15 +03:00
|
|
|
}
|
|
|
|
|
2022-04-19 16:53:59 +03:00
|
|
|
function parseHeader(n: Node, pc: PartCreator, opts: IParseOptions): Part[] {
|
2022-04-14 10:52:42 +03:00
|
|
|
const depth = parseInt(n.nodeName.slice(1), 10);
|
2022-03-09 15:43:05 +03:00
|
|
|
const prefix = pc.plain("#".repeat(depth) + " ");
|
2022-04-19 16:53:59 +03:00
|
|
|
return [prefix, ...parseChildren(n, pc, opts)];
|
2019-05-29 15:46:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
function checkIgnored(n) {
|
|
|
|
if (n.nodeType === Node.TEXT_NODE) {
|
2020-08-03 18:02:26 +03:00
|
|
|
// Element adds \n text nodes in a lot of places,
|
2019-05-29 15:46:15 +03:00
|
|
|
// which should be ignored
|
|
|
|
return n.nodeValue === "\n";
|
|
|
|
} else if (n.nodeType === Node.ELEMENT_NODE) {
|
|
|
|
return n.nodeName === "MX-REPLY";
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2022-03-09 15:43:05 +03:00
|
|
|
function prefixLines(parts: Part[], prefix: string, pc: PartCreator) {
|
|
|
|
parts.unshift(pc.plain(prefix));
|
|
|
|
for (let i = 0; i < parts.length; i++) {
|
2021-07-12 15:26:34 +03:00
|
|
|
if (parts[i].type === Type.Newline) {
|
2022-03-09 15:43:05 +03:00
|
|
|
parts.splice(i + 1, 0, pc.plain(prefix));
|
2019-05-29 15:46:15 +03:00
|
|
|
i += 1;
|
2019-05-22 14:00:39 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-19 16:53:59 +03:00
|
|
|
function parseChildren(n: Node, pc: PartCreator, opts: IParseOptions, mkListItem?: (li: Node) => Part[]): Part[] {
|
2022-03-09 15:43:05 +03:00
|
|
|
let prev;
|
2022-12-12 14:24:14 +03:00
|
|
|
return Array.from(n.childNodes).flatMap((c) => {
|
2022-04-19 16:53:59 +03:00
|
|
|
const parsed = parseNode(c, pc, opts, mkListItem);
|
2022-03-09 15:43:05 +03:00
|
|
|
if (parsed.length && prev && (checkBlockNode(prev) || checkBlockNode(c))) {
|
|
|
|
if (isListChild(c)) {
|
|
|
|
// Use tighter spacing within lists
|
|
|
|
parsed.unshift(pc.newline());
|
|
|
|
} else {
|
|
|
|
parsed.unshift(pc.newline(), pc.newline());
|
2021-12-21 13:07:44 +03:00
|
|
|
}
|
2022-03-09 15:43:05 +03:00
|
|
|
}
|
|
|
|
if (parsed.length) prev = c;
|
|
|
|
return parsed;
|
|
|
|
});
|
|
|
|
}
|
2021-12-21 13:07:44 +03:00
|
|
|
|
2022-04-19 16:53:59 +03:00
|
|
|
function parseNode(n: Node, pc: PartCreator, opts: IParseOptions, mkListItem?: (li: Node) => Part[]): Part[] {
|
2022-03-09 15:43:05 +03:00
|
|
|
if (checkIgnored(n)) return [];
|
|
|
|
|
|
|
|
switch (n.nodeType) {
|
|
|
|
case Node.TEXT_NODE:
|
2022-04-19 16:53:59 +03:00
|
|
|
return parseAtRoomMentions(n.nodeValue, pc, opts);
|
2022-03-09 15:43:05 +03:00
|
|
|
case Node.ELEMENT_NODE:
|
|
|
|
switch (n.nodeName) {
|
|
|
|
case "H1":
|
|
|
|
case "H2":
|
|
|
|
case "H3":
|
|
|
|
case "H4":
|
|
|
|
case "H5":
|
|
|
|
case "H6":
|
2022-04-19 16:53:59 +03:00
|
|
|
return parseHeader(n, pc, opts);
|
2022-03-09 15:43:05 +03:00
|
|
|
case "A":
|
2022-04-19 16:53:59 +03:00
|
|
|
return parseLink(n, pc, opts);
|
2022-03-09 15:43:05 +03:00
|
|
|
case "IMG":
|
2022-04-19 16:53:59 +03:00
|
|
|
return parseImage(n, pc, opts);
|
2022-03-09 15:43:05 +03:00
|
|
|
case "BR":
|
|
|
|
return [pc.newline()];
|
|
|
|
case "HR":
|
|
|
|
return [pc.plain("---")];
|
|
|
|
case "EM":
|
2022-04-19 16:53:59 +03:00
|
|
|
return [pc.plain("_"), ...parseChildren(n, pc, opts), pc.plain("_")];
|
2022-03-09 15:43:05 +03:00
|
|
|
case "STRONG":
|
2022-04-19 16:53:59 +03:00
|
|
|
return [pc.plain("**"), ...parseChildren(n, pc, opts), pc.plain("**")];
|
2022-03-09 15:43:05 +03:00
|
|
|
case "DEL":
|
2022-04-19 16:53:59 +03:00
|
|
|
return [pc.plain("<del>"), ...parseChildren(n, pc, opts), pc.plain("</del>")];
|
2022-03-09 15:43:05 +03:00
|
|
|
case "SUB":
|
2022-04-19 16:53:59 +03:00
|
|
|
return [pc.plain("<sub>"), ...parseChildren(n, pc, opts), pc.plain("</sub>")];
|
2022-03-09 15:43:05 +03:00
|
|
|
case "SUP":
|
2022-04-19 16:53:59 +03:00
|
|
|
return [pc.plain("<sup>"), ...parseChildren(n, pc, opts), pc.plain("</sup>")];
|
2022-03-09 15:43:05 +03:00
|
|
|
case "U":
|
2022-04-19 16:53:59 +03:00
|
|
|
return [pc.plain("<u>"), ...parseChildren(n, pc, opts), pc.plain("</u>")];
|
2022-03-09 15:43:05 +03:00
|
|
|
case "PRE":
|
2022-04-19 16:53:59 +03:00
|
|
|
return parseCodeBlock(n, pc, opts);
|
2022-03-09 15:43:05 +03:00
|
|
|
case "CODE": {
|
|
|
|
// Escape backticks by using multiple backticks for the fence if necessary
|
|
|
|
const fence = "`".repeat(longestBacktickSequence(n.textContent) + 1);
|
|
|
|
return pc.plainWithEmoji(`${fence}${n.textContent}${fence}`);
|
|
|
|
}
|
|
|
|
case "BLOCKQUOTE": {
|
2022-04-19 16:53:59 +03:00
|
|
|
const parts = parseChildren(n, pc, opts);
|
2022-03-09 15:43:05 +03:00
|
|
|
prefixLines(parts, "> ", pc);
|
|
|
|
return parts;
|
|
|
|
}
|
|
|
|
case "LI":
|
2022-04-19 16:53:59 +03:00
|
|
|
return mkListItem?.(n) ?? parseChildren(n, pc, opts);
|
2022-03-09 15:43:05 +03:00
|
|
|
case "UL": {
|
2022-12-12 14:24:14 +03:00
|
|
|
const parts = parseChildren(n, pc, opts, (li) => [pc.plain("- "), ...parseChildren(li, pc, opts)]);
|
2022-03-09 15:43:05 +03:00
|
|
|
if (isListChild(n)) {
|
|
|
|
prefixLines(parts, " ", pc);
|
|
|
|
}
|
|
|
|
return parts;
|
2019-05-07 18:31:37 +03:00
|
|
|
}
|
2022-03-09 15:43:05 +03:00
|
|
|
case "OL": {
|
2022-03-31 20:40:51 +03:00
|
|
|
let counter = (n as HTMLOListElement).start ?? 1;
|
2022-12-12 14:24:14 +03:00
|
|
|
const parts = parseChildren(n, pc, opts, (li) => {
|
2022-04-19 16:53:59 +03:00
|
|
|
const parts = [pc.plain(`${counter}. `), ...parseChildren(li, pc, opts)];
|
2022-03-09 15:43:05 +03:00
|
|
|
counter++;
|
|
|
|
return parts;
|
|
|
|
});
|
|
|
|
if (isListChild(n)) {
|
|
|
|
prefixLines(parts, " ", pc);
|
|
|
|
}
|
|
|
|
return parts;
|
|
|
|
}
|
|
|
|
case "DIV":
|
|
|
|
case "SPAN":
|
|
|
|
// Math nodes are translated back into delimited latex strings
|
|
|
|
if ((n as Element).hasAttribute("data-mx-maths")) {
|
|
|
|
const delims = SdkConfig.get().latex_maths_delims;
|
2022-12-12 14:24:14 +03:00
|
|
|
const delimLeft =
|
|
|
|
n.nodeName === "SPAN" ? delims?.inline?.left ?? "\\(" : delims?.display?.left ?? "\\[";
|
|
|
|
const delimRight =
|
|
|
|
n.nodeName === "SPAN" ? delims?.inline?.right ?? "\\)" : delims?.display?.right ?? "\\]";
|
2022-03-09 15:43:05 +03:00
|
|
|
const tex = (n as Element).getAttribute("data-mx-maths");
|
|
|
|
|
|
|
|
return pc.plainWithEmoji(`${delimLeft}${tex}${delimRight}`);
|
|
|
|
}
|
2019-05-29 15:46:15 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-19 16:53:59 +03:00
|
|
|
return parseChildren(n, pc, opts);
|
2022-03-09 15:43:05 +03:00
|
|
|
}
|
2019-05-29 15:46:15 +03:00
|
|
|
|
2022-04-19 16:53:59 +03:00
|
|
|
interface IParseOptions {
|
|
|
|
isQuotedMessage?: boolean;
|
|
|
|
shouldEscape?: boolean;
|
|
|
|
}
|
|
|
|
|
|
|
|
function parseHtmlMessage(html: string, pc: PartCreator, opts: IParseOptions): Part[] {
|
2022-03-09 15:43:05 +03:00
|
|
|
// no nodes from parsing here should be inserted in the document,
|
|
|
|
// as scripts in event handlers, etc would be executed then.
|
|
|
|
// we're only taking text, so that is fine
|
2022-04-19 16:53:59 +03:00
|
|
|
const parts = parseNode(new DOMParser().parseFromString(html, "text/html").body, pc, opts);
|
|
|
|
if (opts.isQuotedMessage) {
|
2022-03-09 15:43:05 +03:00
|
|
|
prefixLines(parts, "> ", pc);
|
|
|
|
}
|
2019-05-07 18:31:37 +03:00
|
|
|
return parts;
|
|
|
|
}
|
|
|
|
|
2022-12-12 14:24:14 +03:00
|
|
|
export function parsePlainTextMessage(body: string, pc: PartCreator, opts: IParseOptions): Part[] {
|
2020-01-22 16:37:27 +03:00
|
|
|
const lines = body.split(/\r\n|\r|\n/g); // split on any new-line combination not just \n, collapses \r\n
|
2020-04-15 02:53:35 +03:00
|
|
|
return lines.reduce((parts, line, i) => {
|
2022-03-10 16:30:43 +03:00
|
|
|
if (opts.isQuotedMessage) {
|
2022-03-09 15:43:05 +03:00
|
|
|
parts.push(pc.plain("> "));
|
2019-08-20 13:34:35 +03:00
|
|
|
}
|
2022-04-19 16:53:59 +03:00
|
|
|
parts.push(...parseAtRoomMentions(line, pc, opts));
|
2019-08-22 16:41:40 +03:00
|
|
|
const isLast = i === lines.length - 1;
|
|
|
|
if (!isLast) {
|
2022-03-09 15:43:05 +03:00
|
|
|
parts.push(pc.newline());
|
2019-08-22 16:41:40 +03:00
|
|
|
}
|
2019-08-20 13:34:35 +03:00
|
|
|
return parts;
|
2021-10-12 13:01:40 +03:00
|
|
|
}, [] as Part[]);
|
2019-08-20 13:34:35 +03:00
|
|
|
}
|
|
|
|
|
2022-04-19 16:53:59 +03:00
|
|
|
export function parseEvent(event: MatrixEvent, pc: PartCreator, opts: IParseOptions = { shouldEscape: true }) {
|
2019-05-07 18:31:37 +03:00
|
|
|
const content = event.getContent();
|
2021-10-12 16:02:05 +03:00
|
|
|
let parts: Part[];
|
2022-04-27 11:43:10 +03:00
|
|
|
const isEmote = content.msgtype === MsgType.Emote;
|
2021-10-12 16:02:05 +03:00
|
|
|
let isRainbow = false;
|
|
|
|
|
2019-05-07 18:31:37 +03:00
|
|
|
if (content.format === "org.matrix.custom.html") {
|
2022-04-19 16:53:59 +03:00
|
|
|
parts = parseHtmlMessage(content.formatted_body || "", pc, opts);
|
2021-10-12 16:02:05 +03:00
|
|
|
if (content.body && content.formatted_body && textToHtmlRainbow(content.body) === content.formatted_body) {
|
|
|
|
isRainbow = true;
|
|
|
|
}
|
2019-05-07 18:31:37 +03:00
|
|
|
} else {
|
2022-04-27 11:43:10 +03:00
|
|
|
let body = content.body || "";
|
|
|
|
if (event.replyEventId) {
|
|
|
|
body = stripPlainReply(body);
|
|
|
|
}
|
|
|
|
parts = parsePlainTextMessage(body, pc, opts);
|
2019-05-07 18:31:37 +03:00
|
|
|
}
|
2021-10-12 16:02:05 +03:00
|
|
|
|
|
|
|
if (isEmote && isRainbow) {
|
2022-03-09 15:43:05 +03:00
|
|
|
parts.unshift(pc.plain("/rainbowme "));
|
2021-10-12 16:02:05 +03:00
|
|
|
} else if (isRainbow) {
|
2022-03-09 15:43:05 +03:00
|
|
|
parts.unshift(pc.plain("/rainbow "));
|
2021-10-12 16:02:05 +03:00
|
|
|
} else if (isEmote) {
|
2022-03-09 15:43:05 +03:00
|
|
|
parts.unshift(pc.plain("/me "));
|
2019-06-14 12:01:52 +03:00
|
|
|
}
|
2021-10-12 16:02:05 +03:00
|
|
|
|
2019-06-14 12:01:52 +03:00
|
|
|
return parts;
|
2019-05-07 18:31:37 +03:00
|
|
|
}
|