2019-05-07 18:31:37 +03:00
|
|
|
/*
|
|
|
|
Copyright 2019 New Vector Ltd
|
2020-04-15 02:49:08 +03:00
|
|
|
Copyright 2019, 2020 The Matrix.org Foundation C.I.C.
|
2019-05-07 18:31:37 +03:00
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2020-04-15 02:49:08 +03:00
|
|
|
import { MatrixEvent } from "matrix-js-sdk/src/models/event";
|
|
|
|
|
2019-05-29 15:46:15 +03:00
|
|
|
import { walkDOMDepthFirst } from "./dom";
|
2019-07-23 10:12:24 +03:00
|
|
|
import { checkBlockNode } from "../HtmlUtils";
|
2020-04-15 02:49:08 +03:00
|
|
|
import { getPrimaryPermalinkEntity } from "../utils/permalinks/Permalinks";
|
|
|
|
import { PartCreator } from "./parts";
|
2019-05-22 14:00:39 +03:00
|
|
|
|
2020-04-15 02:49:08 +03:00
|
|
|
function parseAtRoomMentions(text: string, partCreator: PartCreator) {
|
2019-06-14 19:25:02 +03:00
|
|
|
const ATROOM = "@room";
|
|
|
|
const parts = [];
|
|
|
|
text.split(ATROOM).forEach((textPart, i, arr) => {
|
|
|
|
if (textPart.length) {
|
|
|
|
parts.push(partCreator.plain(textPart));
|
|
|
|
}
|
2019-06-18 10:50:31 +03:00
|
|
|
// it's safe to never append @room after the last textPart
|
2019-06-18 09:40:58 +03:00
|
|
|
// as split will report an empty string at the end if
|
|
|
|
// `text` ended in @room.
|
2019-06-14 19:25:02 +03:00
|
|
|
const isLast = i === arr.length - 1;
|
|
|
|
if (!isLast) {
|
|
|
|
parts.push(partCreator.atRoomPill(ATROOM));
|
|
|
|
}
|
|
|
|
});
|
|
|
|
return parts;
|
|
|
|
}
|
|
|
|
|
2020-04-15 02:49:08 +03:00
|
|
|
function parseLink(a: HTMLAnchorElement, partCreator: PartCreator) {
|
2019-05-22 14:00:39 +03:00
|
|
|
const {href} = a;
|
2019-10-01 05:37:24 +03:00
|
|
|
const resourceId = getPrimaryPermalinkEntity(href); // The room/user ID
|
|
|
|
const prefix = resourceId ? resourceId[0] : undefined; // First character of ID
|
2019-05-22 14:00:39 +03:00
|
|
|
switch (prefix) {
|
|
|
|
case "@":
|
2019-06-14 13:16:34 +03:00
|
|
|
return partCreator.userPill(a.textContent, resourceId);
|
2019-05-22 14:00:39 +03:00
|
|
|
case "#":
|
2019-06-14 13:16:34 +03:00
|
|
|
return partCreator.roomPill(resourceId);
|
2019-05-22 14:00:39 +03:00
|
|
|
default: {
|
|
|
|
if (href === a.textContent) {
|
2019-06-14 13:16:34 +03:00
|
|
|
return partCreator.plain(a.textContent);
|
2019-05-22 14:00:39 +03:00
|
|
|
} else {
|
2020-04-15 04:31:30 +03:00
|
|
|
return partCreator.plain(`[${a.textContent.replace(/[[\\\]]/g, c => "\\" + c)}](${href})`);
|
2019-05-22 14:00:39 +03:00
|
|
|
}
|
2019-05-29 15:46:15 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-15 02:49:08 +03:00
|
|
|
function parseCodeBlock(n: HTMLElement, partCreator: PartCreator) {
|
2019-05-29 15:46:15 +03:00
|
|
|
const parts = [];
|
2019-10-13 14:04:54 +03:00
|
|
|
let language = "";
|
|
|
|
if (n.firstChild && n.firstChild.nodeName === "CODE") {
|
2020-04-15 02:49:08 +03:00
|
|
|
for (const className of (<HTMLElement>n.firstChild).classList) {
|
2020-07-21 19:47:40 +03:00
|
|
|
if (className.startsWith("language-") && !className.startsWith("language-_")) {
|
2019-10-13 14:04:54 +03:00
|
|
|
language = className.substr("language-".length);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
const preLines = ("```" + language + "\n" + n.textContent + "```").split("\n");
|
2019-05-29 15:46:15 +03:00
|
|
|
preLines.forEach((l, i) => {
|
2019-06-14 13:16:34 +03:00
|
|
|
parts.push(partCreator.plain(l));
|
2019-05-29 15:46:15 +03:00
|
|
|
if (i < preLines.length - 1) {
|
2019-06-14 13:16:34 +03:00
|
|
|
parts.push(partCreator.newline());
|
2019-05-29 15:46:15 +03:00
|
|
|
}
|
|
|
|
});
|
|
|
|
return parts;
|
|
|
|
}
|
|
|
|
|
2020-04-15 02:49:08 +03:00
|
|
|
function parseHeader(el: HTMLElement, partCreator: PartCreator) {
|
2019-09-02 17:23:56 +03:00
|
|
|
const depth = parseInt(el.nodeName.substr(1), 10);
|
|
|
|
return partCreator.plain("#".repeat(depth) + " ");
|
|
|
|
}
|
|
|
|
|
2020-04-15 02:49:08 +03:00
|
|
|
interface IState {
|
|
|
|
listIndex: number[];
|
|
|
|
listDepth?: number;
|
|
|
|
}
|
|
|
|
|
|
|
|
function parseElement(n: HTMLElement, partCreator: PartCreator, lastNode: HTMLElement | undefined, state: IState) {
|
2019-05-29 15:46:15 +03:00
|
|
|
switch (n.nodeName) {
|
2019-09-02 17:23:56 +03:00
|
|
|
case "H1":
|
|
|
|
case "H2":
|
|
|
|
case "H3":
|
|
|
|
case "H4":
|
|
|
|
case "H5":
|
|
|
|
case "H6":
|
|
|
|
return parseHeader(n, partCreator);
|
2019-05-29 15:46:15 +03:00
|
|
|
case "A":
|
2020-04-15 02:49:08 +03:00
|
|
|
return parseLink(<HTMLAnchorElement>n, partCreator);
|
2019-05-29 15:46:15 +03:00
|
|
|
case "BR":
|
2019-06-14 13:16:34 +03:00
|
|
|
return partCreator.newline();
|
2019-05-29 15:46:15 +03:00
|
|
|
case "EM":
|
2019-09-23 15:59:53 +03:00
|
|
|
return partCreator.plain(`_${n.textContent}_`);
|
2019-05-29 15:46:15 +03:00
|
|
|
case "STRONG":
|
2019-06-14 13:16:34 +03:00
|
|
|
return partCreator.plain(`**${n.textContent}**`);
|
2019-05-29 15:46:15 +03:00
|
|
|
case "PRE":
|
2019-06-14 13:16:34 +03:00
|
|
|
return parseCodeBlock(n, partCreator);
|
2019-05-29 15:46:15 +03:00
|
|
|
case "CODE":
|
2019-06-14 13:16:34 +03:00
|
|
|
return partCreator.plain(`\`${n.textContent}\``);
|
2019-05-29 15:46:15 +03:00
|
|
|
case "DEL":
|
2019-06-14 13:16:34 +03:00
|
|
|
return partCreator.plain(`<del>${n.textContent}</del>`);
|
2019-06-21 12:40:27 +03:00
|
|
|
case "LI": {
|
|
|
|
const indent = " ".repeat(state.listDepth - 1);
|
2019-05-29 15:46:15 +03:00
|
|
|
if (n.parentElement.nodeName === "OL") {
|
2019-10-13 14:27:12 +03:00
|
|
|
// The markdown parser doesn't do nested indexed lists at all, but this supports it anyway.
|
2019-10-22 15:49:02 +03:00
|
|
|
const index = state.listIndex[state.listIndex.length - 1];
|
2019-10-18 19:58:55 +03:00
|
|
|
state.listIndex[state.listIndex.length - 1] += 1;
|
2019-10-13 14:27:12 +03:00
|
|
|
return partCreator.plain(`${indent}${index}. `);
|
2019-05-29 15:46:15 +03:00
|
|
|
} else {
|
2019-06-21 12:40:27 +03:00
|
|
|
return partCreator.plain(`${indent}- `);
|
2019-05-29 15:46:15 +03:00
|
|
|
}
|
2019-06-21 12:40:27 +03:00
|
|
|
}
|
2019-09-23 16:06:22 +03:00
|
|
|
case "P": {
|
|
|
|
if (lastNode) {
|
|
|
|
return partCreator.newline();
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2019-06-21 12:40:27 +03:00
|
|
|
case "OL":
|
2020-04-15 02:49:08 +03:00
|
|
|
state.listIndex.push((<HTMLOListElement>n).start || 1);
|
2020-04-15 02:53:35 +03:00
|
|
|
/* falls through */
|
2019-06-21 12:40:27 +03:00
|
|
|
case "UL":
|
|
|
|
state.listDepth = (state.listDepth || 0) + 1;
|
2020-04-15 02:53:35 +03:00
|
|
|
/* falls through */
|
2019-05-29 15:46:15 +03:00
|
|
|
default:
|
2019-10-13 14:10:11 +03:00
|
|
|
// don't textify block nodes we'll descend into
|
|
|
|
if (!checkDescendInto(n)) {
|
2019-06-14 13:16:34 +03:00
|
|
|
return partCreator.plain(n.textContent);
|
2019-05-29 15:46:15 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-13 14:10:11 +03:00
|
|
|
function checkDescendInto(node) {
|
2019-05-29 15:46:15 +03:00
|
|
|
switch (node.nodeName) {
|
|
|
|
case "PRE":
|
|
|
|
// a code block is textified in parseCodeBlock
|
|
|
|
// as we don't want to preserve markup in it,
|
2019-10-13 14:10:11 +03:00
|
|
|
// so no need to descend into it
|
2019-05-29 15:46:15 +03:00
|
|
|
return false;
|
|
|
|
default:
|
|
|
|
return checkBlockNode(node);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function checkIgnored(n) {
|
|
|
|
if (n.nodeType === Node.TEXT_NODE) {
|
2020-08-03 18:02:26 +03:00
|
|
|
// Element adds \n text nodes in a lot of places,
|
2019-05-29 15:46:15 +03:00
|
|
|
// which should be ignored
|
|
|
|
return n.nodeValue === "\n";
|
|
|
|
} else if (n.nodeType === Node.ELEMENT_NODE) {
|
|
|
|
return n.nodeName === "MX-REPLY";
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-08-20 13:34:35 +03:00
|
|
|
const QUOTE_LINE_PREFIX = "> ";
|
2019-06-14 13:16:34 +03:00
|
|
|
function prefixQuoteLines(isFirstNode, parts, partCreator) {
|
2019-05-29 15:46:15 +03:00
|
|
|
// a newline (to append a > to) wouldn't be added to parts for the first line
|
|
|
|
// if there was no content before the BLOCKQUOTE, so handle that
|
|
|
|
if (isFirstNode) {
|
2019-08-20 13:34:35 +03:00
|
|
|
parts.splice(0, 0, partCreator.plain(QUOTE_LINE_PREFIX));
|
2019-05-29 15:46:15 +03:00
|
|
|
}
|
|
|
|
for (let i = 0; i < parts.length; i += 1) {
|
|
|
|
if (parts[i].type === "newline") {
|
2019-08-20 13:34:35 +03:00
|
|
|
parts.splice(i + 1, 0, partCreator.plain(QUOTE_LINE_PREFIX));
|
2019-05-29 15:46:15 +03:00
|
|
|
i += 1;
|
2019-05-22 14:00:39 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-15 02:49:08 +03:00
|
|
|
function parseHtmlMessage(html: string, partCreator: PartCreator, isQuotedMessage: boolean) {
|
2019-05-08 12:13:36 +03:00
|
|
|
// no nodes from parsing here should be inserted in the document,
|
|
|
|
// as scripts in event handlers, etc would be executed then.
|
|
|
|
// we're only taking text, so that is fine
|
2019-05-29 15:46:15 +03:00
|
|
|
const rootNode = new DOMParser().parseFromString(html, "text/html").body;
|
2019-05-22 14:00:39 +03:00
|
|
|
const parts = [];
|
2019-05-29 15:46:15 +03:00
|
|
|
let lastNode;
|
2019-08-20 13:34:35 +03:00
|
|
|
let inQuote = isQuotedMessage;
|
2020-04-15 02:49:08 +03:00
|
|
|
const state: IState = {
|
2019-10-13 14:27:12 +03:00
|
|
|
listIndex: [],
|
|
|
|
};
|
2019-05-29 15:46:15 +03:00
|
|
|
|
|
|
|
function onNodeEnter(n) {
|
|
|
|
if (checkIgnored(n)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (n.nodeName === "BLOCKQUOTE") {
|
|
|
|
inQuote = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
const newParts = [];
|
|
|
|
if (lastNode && (checkBlockNode(lastNode) || checkBlockNode(n))) {
|
2019-06-14 13:16:34 +03:00
|
|
|
newParts.push(partCreator.newline());
|
2019-05-29 15:46:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (n.nodeType === Node.TEXT_NODE) {
|
2019-06-14 19:25:02 +03:00
|
|
|
newParts.push(...parseAtRoomMentions(n.nodeValue, partCreator));
|
2019-05-29 15:46:15 +03:00
|
|
|
} else if (n.nodeType === Node.ELEMENT_NODE) {
|
2019-09-23 16:06:22 +03:00
|
|
|
const parseResult = parseElement(n, partCreator, lastNode, state);
|
2019-05-29 15:46:15 +03:00
|
|
|
if (parseResult) {
|
|
|
|
if (Array.isArray(parseResult)) {
|
|
|
|
newParts.push(...parseResult);
|
|
|
|
} else {
|
|
|
|
newParts.push(parseResult);
|
2019-05-07 18:31:37 +03:00
|
|
|
}
|
2019-05-29 15:46:15 +03:00
|
|
|
}
|
2019-05-07 18:31:37 +03:00
|
|
|
}
|
2019-05-29 15:46:15 +03:00
|
|
|
|
|
|
|
if (newParts.length && inQuote) {
|
|
|
|
const isFirstPart = parts.length === 0;
|
2019-06-14 13:16:34 +03:00
|
|
|
prefixQuoteLines(isFirstPart, newParts, partCreator);
|
2019-05-29 15:46:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
parts.push(...newParts);
|
|
|
|
|
2019-10-13 14:10:11 +03:00
|
|
|
const descend = checkDescendInto(n);
|
|
|
|
// when not descending (like for PRE), onNodeLeave won't be called to set lastNode
|
2019-08-02 17:36:09 +03:00
|
|
|
// so do that here.
|
2019-10-13 14:10:11 +03:00
|
|
|
lastNode = descend ? null : n;
|
|
|
|
return descend;
|
2019-05-29 15:46:15 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
function onNodeLeave(n) {
|
|
|
|
if (checkIgnored(n)) {
|
|
|
|
return;
|
|
|
|
}
|
2019-06-21 12:40:27 +03:00
|
|
|
switch (n.nodeName) {
|
|
|
|
case "BLOCKQUOTE":
|
|
|
|
inQuote = false;
|
|
|
|
break;
|
|
|
|
case "OL":
|
2019-10-13 14:27:12 +03:00
|
|
|
state.listIndex.pop();
|
2020-04-15 02:53:35 +03:00
|
|
|
/* falls through */
|
2019-06-21 12:40:27 +03:00
|
|
|
case "UL":
|
|
|
|
state.listDepth -= 1;
|
|
|
|
break;
|
2019-05-22 14:00:39 +03:00
|
|
|
}
|
2019-05-29 15:46:15 +03:00
|
|
|
lastNode = n;
|
2019-05-22 14:00:39 +03:00
|
|
|
}
|
2019-05-29 15:46:15 +03:00
|
|
|
|
|
|
|
walkDOMDepthFirst(rootNode, onNodeEnter, onNodeLeave);
|
|
|
|
|
2019-05-07 18:31:37 +03:00
|
|
|
return parts;
|
|
|
|
}
|
|
|
|
|
2020-07-15 11:45:45 +03:00
|
|
|
export function parsePlainTextMessage(body: string, partCreator: PartCreator, isQuotedMessage?: boolean) {
|
2020-01-22 16:37:27 +03:00
|
|
|
const lines = body.split(/\r\n|\r|\n/g); // split on any new-line combination not just \n, collapses \r\n
|
2020-04-15 02:53:35 +03:00
|
|
|
return lines.reduce((parts, line, i) => {
|
2019-08-20 13:34:35 +03:00
|
|
|
if (isQuotedMessage) {
|
|
|
|
parts.push(partCreator.plain(QUOTE_LINE_PREFIX));
|
|
|
|
}
|
|
|
|
parts.push(...parseAtRoomMentions(line, partCreator));
|
2019-08-22 16:41:40 +03:00
|
|
|
const isLast = i === lines.length - 1;
|
|
|
|
if (!isLast) {
|
|
|
|
parts.push(partCreator.newline());
|
|
|
|
}
|
2019-08-20 13:34:35 +03:00
|
|
|
return parts;
|
|
|
|
}, []);
|
|
|
|
}
|
|
|
|
|
2020-04-15 02:49:08 +03:00
|
|
|
export function parseEvent(event: MatrixEvent, partCreator: PartCreator, {isQuotedMessage = false} = {}) {
|
2019-05-07 18:31:37 +03:00
|
|
|
const content = event.getContent();
|
2019-06-14 12:01:52 +03:00
|
|
|
let parts;
|
2019-05-07 18:31:37 +03:00
|
|
|
if (content.format === "org.matrix.custom.html") {
|
2019-08-20 13:34:35 +03:00
|
|
|
parts = parseHtmlMessage(content.formatted_body || "", partCreator, isQuotedMessage);
|
2019-05-07 18:31:37 +03:00
|
|
|
} else {
|
2019-08-20 13:34:35 +03:00
|
|
|
parts = parsePlainTextMessage(content.body || "", partCreator, isQuotedMessage);
|
2019-05-07 18:31:37 +03:00
|
|
|
}
|
2019-06-14 12:01:52 +03:00
|
|
|
if (content.msgtype === "m.emote") {
|
2019-06-14 13:16:34 +03:00
|
|
|
parts.unshift(partCreator.plain("/me "));
|
2019-06-14 12:01:52 +03:00
|
|
|
}
|
|
|
|
return parts;
|
2019-05-07 18:31:37 +03:00
|
|
|
}
|