elk/composables/content.ts

214 lines
6.1 KiB
TypeScript
Raw Normal View History

2022-11-21 10:14:07 +03:00
import type { Emoji } from 'masto'
2022-11-21 00:21:53 +03:00
import type { DefaultTreeAdapterMap } from 'parse5'
import { parseFragment, serialize } from 'parse5'
import type { VNode } from 'vue'
2022-11-24 06:42:03 +03:00
import { Fragment, h, isVNode } from 'vue'
2022-11-21 00:21:53 +03:00
import { RouterLink } from 'vue-router'
2022-11-24 06:42:03 +03:00
import ContentCode from '~/components/content/ContentCode.vue'
2022-11-30 10:08:10 +03:00
import AccountHoverWrapper from '~/components/account/AccountHoverWrapper.vue'
2022-11-21 00:21:53 +03:00
type Node = DefaultTreeAdapterMap['childNode']
type Element = DefaultTreeAdapterMap['element']
2022-11-24 06:42:03 +03:00
function handleMention(el: Element) {
2022-11-21 00:21:53 +03:00
// Redirect mentions to the user page
if (el.tagName === 'a' && el.attrs.find(i => i.name === 'class' && i.value.includes('mention'))) {
const href = el.attrs.find(i => i.name === 'href')
if (href) {
const matchUser = href.value.match(UserLinkRE)
if (matchUser) {
const [, server, username] = matchUser
// Handles need to ignore server subdomains
2022-11-30 10:08:10 +03:00
const handle = `@${username}@${server.replace(/(.+\.)(.+\..+)/, '$2')}`
href.value = `/${handle}`
return h(AccountHoverWrapper, { handle, class: 'inline-block' }, () => nodeToVNode(el))
2022-11-21 00:21:53 +03:00
}
const matchTag = href.value.match(TagLinkRE)
if (matchTag) {
const [, , name] = matchTag
href.value = `/tags/${name}`
}
}
}
2022-11-24 06:42:03 +03:00
return undefined
}
function handleCodeBlock(el: Element) {
if (el.tagName === 'pre' && el.childNodes[0]?.nodeName === 'code') {
const codeEl = el.childNodes[0] as Element
const classes = codeEl.attrs.find(i => i.name === 'class')?.value
const lang = classes?.split(/\s/g).find(i => i.startsWith('language-'))?.replace('language-', '')
const code = treeToText(codeEl.childNodes[0])
return h(ContentCode, { lang, code: encodeURIComponent(code) })
2022-11-24 06:42:03 +03:00
}
}
function handleNode(el: Element) {
return handleCodeBlock(el) || handleMention(el) || el
2022-11-21 00:21:53 +03:00
}
/**
* Parse raw HTML form Mastodon server to AST,
* with interop of custom emojis and inline Markdown syntax
*/
export function parseMastodonHTML(html: string, customEmojis: Record<string, Emoji> = {}) {
const processed = html
// custom emojis
2022-11-24 06:42:03 +03:00
.replace(/:([\w-]+?):/g, (_, name) => {
const emoji = customEmojis[name]
if (emoji)
2022-11-30 09:50:47 +03:00
return `<img src="${emoji.url}" alt=":${name}:" class="custom-emoji" data-emoji-id="${name}" />`
2022-11-24 06:42:03 +03:00
return `:${name}:`
})
// handle code blocks
2022-11-27 09:16:02 +03:00
.replace(/>(```|~~~)([\s\S]+?)\1/g, (_1, _2, raw) => {
const plain = htmlToText(raw)
const [lang, ...code] = plain.split('\n')
const classes = lang ? ` class="language-${lang}"` : ''
return `><pre><code${classes}>${code.join('\n')}</code></pre>`
})
2022-11-24 06:42:03 +03:00
const tree = parseFragment(processed)
function walk(node: Node) {
if ('childNodes' in node)
node.childNodes = node.childNodes.flatMap(n => walk(n))
if (node.nodeName === '#text') {
// @ts-expect-error casing
const text = node.value as string
const converted = text
2022-11-30 09:50:47 +03:00
.replace(/\*\*\*(.*?)\*\*\*/g, '<b><em>$1</em></b>')
.replace(/\*\*(.*?)\*\*/g, '<b>$1</b>')
.replace(/\*(.*?)\*/g, '<em>$1</em>')
.replace(/~~(.*?)~~/g, '<del>$1</del>')
.replace(/`([^`]+?)`/g, '<code>$1</code>')
if (converted !== text)
return parseFragment(converted).childNodes
}
return [node]
}
tree.childNodes = tree.childNodes.flatMap(n => walk(n))
return tree
}
export function convertMastodonHTML(html: string, customEmojis: Record<string, Emoji> = {}) {
const tree = parseMastodonHTML(html, customEmojis)
return serialize(tree)
}
/**
* Raw HTML to VNodes
*/
export function contentToVNode(
content: string,
customEmojis: Record<string, Emoji> = {},
): VNode {
const tree = parseMastodonHTML(content, customEmojis)
2022-11-24 06:42:03 +03:00
return h(Fragment, tree.childNodes.map(n => treeToVNode(n)))
2022-11-21 00:21:53 +03:00
}
2022-11-30 10:08:10 +03:00
function nodeToVNode(node: Node): VNode | string | null {
if (node.nodeName === '#text') {
2022-11-21 00:21:53 +03:00
// @ts-expect-error casing
2022-11-30 10:08:10 +03:00
return input.value as string
}
2022-11-21 00:21:53 +03:00
2022-11-30 10:08:10 +03:00
if ('childNodes' in node) {
2022-11-21 00:21:53 +03:00
const attrs = Object.fromEntries(node.attrs.map(i => [i.name, i.value]))
if (node.nodeName === 'a' && (attrs.href?.startsWith('/') || attrs.href?.startsWith('.'))) {
attrs.to = attrs.href
delete attrs.href
delete attrs.target
return h(
RouterLink as any,
attrs,
2022-11-24 06:42:03 +03:00
() => node.childNodes.map(treeToVNode),
2022-11-21 00:21:53 +03:00
)
}
return h(
node.nodeName,
attrs,
2022-11-24 06:42:03 +03:00
node.childNodes.map(treeToVNode),
2022-11-21 00:21:53 +03:00
)
}
return null
}
2022-11-24 06:42:03 +03:00
2022-11-30 10:08:10 +03:00
function treeToVNode(
input: Node,
): VNode | string | null {
if (input.nodeName === '#text') {
// @ts-expect-error casing
return input.value as string
}
if ('childNodes' in input) {
const node = handleNode(input)
if (node == null)
return null
if (isVNode(node))
return node
return nodeToVNode(node)
}
return null
}
2022-11-25 19:17:15 +03:00
export function htmlToText(html: string) {
2022-11-24 06:42:03 +03:00
const tree = parseFragment(html)
2022-11-25 19:17:15 +03:00
return tree.childNodes.map(n => treeToText(n)).join('').trim()
2022-11-24 06:42:03 +03:00
}
2022-11-30 09:50:47 +03:00
export function treeToText(input: Node): string {
2022-11-24 06:42:03 +03:00
let pre = ''
2022-11-25 19:17:15 +03:00
let body = ''
let post = ''
2022-11-24 06:42:03 +03:00
if (input.nodeName === '#text')
// @ts-expect-error casing
return input.value
if (input.nodeName === 'br')
return '\n'
2022-11-25 19:17:15 +03:00
if (['p', 'pre'].includes(input.nodeName))
2022-11-24 06:42:03 +03:00
pre = '\n'
2022-11-25 19:17:15 +03:00
if (input.nodeName === 'code') {
2022-11-30 07:50:29 +03:00
if (input.parentNode?.nodeName === 'pre') {
const clz = input.attrs.find(attr => attr.name === 'class')
const lang = clz?.value.replace('language-', '')
2022-11-25 22:21:53 +03:00
2022-11-30 07:50:29 +03:00
pre = `\`\`\`${lang || ''}\n`
post = '\n```'
}
else {
pre = '`'
post = '`'
}
}
else if (input.nodeName === 'b' || input.nodeName === 'strong') {
pre = '**'
post = '**'
}
else if (input.nodeName === 'i' || input.nodeName === 'em') {
pre = '*'
post = '*'
2022-11-25 19:17:15 +03:00
}
2022-11-30 09:50:47 +03:00
else if (input.nodeName === 'del') {
pre = '~~'
post = '~~'
}
2022-11-25 19:17:15 +03:00
2022-11-24 06:42:03 +03:00
if ('childNodes' in input)
2022-11-25 19:17:15 +03:00
body = input.childNodes.map(n => treeToText(n)).join('')
2022-11-24 06:42:03 +03:00
2022-11-30 09:50:47 +03:00
if (input.nodeName === 'img' && input.attrs.some(attr => attr.name === 'class' && attr.value.includes('custom-emoji')))
return `:${input.attrs.find(attr => attr.name === 'data-emoji-id')?.value}:`
2022-11-25 19:17:15 +03:00
return pre + body + post
2022-11-24 06:42:03 +03:00
}