fix: use lightweight bot/scraper html responses. Fixes #3253

This commit is contained in:
Gabe Kangas 2023-08-16 18:19:09 -07:00
parent 78ec6302b9
commit 1e57cff3e0
No known key found for this signature in database
GPG key ID: 4345B2060657F330
7 changed files with 262 additions and 0 deletions

View file

@ -4,13 +4,18 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"net/http" "net/http"
"net/url"
"path/filepath" "path/filepath"
"strings" "strings"
"github.com/owncast/owncast/config"
"github.com/owncast/owncast/core"
"github.com/owncast/owncast/core/data" "github.com/owncast/owncast/core/data"
"github.com/owncast/owncast/models"
"github.com/owncast/owncast/router/middleware" "github.com/owncast/owncast/router/middleware"
"github.com/owncast/owncast/static" "github.com/owncast/owncast/static"
"github.com/owncast/owncast/utils" "github.com/owncast/owncast/utils"
log "github.com/sirupsen/logrus"
) )
// IndexHandler handles the default index route. // IndexHandler handles the default index route.
@ -24,6 +29,13 @@ func IndexHandler(w http.ResponseWriter, r *http.Request) {
return return
} }
// For search engine bots and social scrapers return a special
// server-rendered page.
if utils.IsUserAgentABot(r.UserAgent()) && isIndexRequest {
handleScraperMetadataPage(w, r)
return
}
// Set a cache control max-age header // Set a cache control max-age header
middleware.SetCachingHeaders(w, r) middleware.SetCachingHeaders(w, r)
@ -93,3 +105,79 @@ func renderIndexHtml(w http.ResponseWriter, nonce string) {
http.Error(w, err.Error(), http.StatusInternalServerError) http.Error(w, err.Error(), http.StatusInternalServerError)
} }
} }
// MetadataPage represents a server-rendered web page for bots and web scrapers.
type MetadataPage struct {
RequestedURL string
Image string
Thumbnail string
TagsString string
Summary string
Name string
Tags []string
SocialHandles []models.SocialHandle
}
// Return a basic HTML page with server-rendered metadata from the config
// to give to Opengraph clients and web scrapers (bots, web crawlers, etc).
func handleScraperMetadataPage(w http.ResponseWriter, r *http.Request) {
tmpl, err := static.GetBotMetadataTemplate()
if err != nil {
log.Errorln(err)
w.WriteHeader(http.StatusInternalServerError)
return
}
scheme := "http"
if siteURL := data.GetServerURL(); siteURL != "" {
if parsed, err := url.Parse(siteURL); err == nil && parsed.Scheme != "" {
scheme = parsed.Scheme
}
}
fullURL, err := url.Parse(fmt.Sprintf("%s://%s%s", scheme, r.Host, r.URL.Path))
if err != nil {
log.Errorln(err)
}
imageURL, err := url.Parse(fmt.Sprintf("%s://%s%s", scheme, r.Host, "/logo/external"))
if err != nil {
log.Errorln(err)
}
status := core.GetStatus()
// If the thumbnail does not exist or we're offline then just use the logo image
var thumbnailURL string
if status.Online && utils.DoesFileExists(filepath.Join(config.DataDirectory, "tmp", "thumbnail.jpg")) {
thumbnail, err := url.Parse(fmt.Sprintf("%s://%s%s", scheme, r.Host, "/thumbnail.jpg"))
if err != nil {
log.Errorln(err)
thumbnailURL = imageURL.String()
} else {
thumbnailURL = thumbnail.String()
}
} else {
thumbnailURL = imageURL.String()
}
tagsString := strings.Join(data.GetServerMetadataTags(), ",")
metadata := MetadataPage{
Name: data.GetServerName(),
RequestedURL: fullURL.String(),
Image: imageURL.String(),
Summary: data.GetServerSummary(),
Thumbnail: thumbnailURL,
TagsString: tagsString,
Tags: data.GetServerMetadataTags(),
SocialHandles: data.GetSocialHandles(),
}
// Set a cache header
middleware.SetCachingHeaders(w, r)
w.Header().Set("Content-Type", "text/html")
if err := tmpl.Execute(w, metadata); err != nil {
log.Errorln(err)
}
}

1
go.mod
View file

@ -67,6 +67,7 @@ require (
github.com/golang-jwt/jwt v3.2.2+incompatible // indirect github.com/golang-jwt/jwt v3.2.2+incompatible // indirect
github.com/gorilla/css v1.0.0 // indirect github.com/gorilla/css v1.0.0 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/mssola/user_agent v0.6.0 // indirect
github.com/oschwald/maxminddb-golang v1.11.0 // indirect github.com/oschwald/maxminddb-golang v1.11.0 // indirect
github.com/shoenig/go-m1cpu v0.1.6 // indirect github.com/shoenig/go-m1cpu v0.1.6 // indirect
) )

2
go.sum
View file

@ -85,6 +85,8 @@ github.com/microcosm-cc/bluemonday v1.0.25 h1:4NEwSfiJ+Wva0VxN5B8OwMicaJvD8r9tlJ
github.com/microcosm-cc/bluemonday v1.0.25/go.mod h1:ZIOjCQp1OrzBBPIJmfX4qDYFuhU02nx4bn030ixfHLE= github.com/microcosm-cc/bluemonday v1.0.25/go.mod h1:ZIOjCQp1OrzBBPIJmfX4qDYFuhU02nx4bn030ixfHLE=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/mssola/user_agent v0.6.0 h1:uwPR4rtWlCHRFyyP9u2KOV0u8iQXmS7Z7feTrstQwk4=
github.com/mssola/user_agent v0.6.0/go.mod h1:TTPno8LPY3wAIEKRpAtkdMT0f8SE24pLRGPahjCH4uw=
github.com/mvdan/xurls v1.1.0 h1:OpuDelGQ1R1ueQ6sSryzi6P+1RtBpfQHM8fJwlE45ww= github.com/mvdan/xurls v1.1.0 h1:OpuDelGQ1R1ueQ6sSryzi6P+1RtBpfQHM8fJwlE45ww=
github.com/mvdan/xurls v1.1.0/go.mod h1:tQlNn3BED8bE/15hnSL2HLkDeLWpNPAwtw7wkEq44oU= github.com/mvdan/xurls v1.1.0/go.mod h1:tQlNn3BED8bE/15hnSL2HLkDeLWpNPAwtw7wkEq44oU=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=

83
static/metadata.html.tmpl vendored Normal file
View file

@ -0,0 +1,83 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>{{.Name}}</title>
<meta name="description" content="{{.Summary}}">
<meta property="og:title" content="{{.Name}}">
<meta property="og:site_name" content="{{.Name}}">
<meta property="og:url" content="{{.RequestedURL}}">
<meta property="og:description" content="{{.Summary}}">
<meta property="og:type" content="video.other">
<meta property="video:tag" content="{{.TagsString}}">
<meta property="og:image" content="{{.Thumbnail}}">
<meta property="og:image:url" content="{{.Thumbnail}}">
<meta property="og:image:alt" content="{{.Image}}">
<meta property="og:video" content='{{.RequestedURL}}embed/video' />
<meta property="og:video:secure_url" content='{{.RequestedURL}}embed/video' />
<meta property="og:video:height" content="315" />
<meta property="og:video:width" content="560" />
<meta property="og:video:type" content="text/html" />
<meta property="og:video:actor" content="{{.Name}}" />
<meta property="twitter:title" content="{{.Name}}">
<meta property="twitter:url" content="{{.RequestedURL}}">
<meta property="twitter:description" content="{{.Summary}}">
<meta property="twitter:image" content="{{.Image}}">
<meta property="twitter:card" content="player" />
<meta property="twitter:player" content='{{.RequestedURL}}embed/video' />
<meta property="twitter:player:width" content="560" />
<meta property="twitter:player:height" content="315" />
<link rel="apple-touch-icon" sizes="57x57" href="/img/favicon/apple-icon-57x57.png">
<link rel="apple-touch-icon" sizes="60x60" href="/img/favicon/apple-icon-60x60.png">
<link rel="apple-touch-icon" sizes="72x72" href="/img/favicon/apple-icon-72x72.png">
<link rel="apple-touch-icon" sizes="76x76" href="/img/favicon/apple-icon-76x76.png">
<link rel="apple-touch-icon" sizes="114x114" href="/img/favicon/apple-icon-114x114.png">
<link rel="apple-touch-icon" sizes="120x120" href="/img/favicon/apple-icon-120x120.png">
<link rel="apple-touch-icon" sizes="144x144" href="/img/favicon/apple-icon-144x144.png">
<link rel="apple-touch-icon" sizes="152x152" href="/img/favicon/apple-icon-152x152.png">
<link rel="apple-touch-icon" sizes="180x180" href="/img/favicon/apple-icon-180x180.png">
<link rel="icon" type="image/png" sizes="192x192" href="/img/favicon/android-icon-192x192.png">
<link rel="icon" type="image/png" sizes="32x32" href="/img/favicon/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="96x96" href="/img/favicon/favicon-96x96.png">
<link rel="icon" type="image/png" sizes="16x16" href="/img/favicon/favicon-16x16.png">
<link rel="manifest" href="/manifest.json">
<link rel="authorization_endpoint" href="/api/auth/provider/indieauth">
<meta name="msapplication-TileColor" content="#ffffff">
<meta name="msapplication-TileImage" content="/img/favicon/ms-icon-144x144.png">
<meta name="theme-color" content="#ffffff">
</head>
<body>
<h1>{{.Name}}</h1>
<center>
<img src="{{.Thumbnail}}" width=10% />
</center>
<h3>{{.Summary}}</h3>
{{range .Tags}}
<li>{{.}}</li>
{{end}}
<br/>
<h3>Links for {{.Name}}:</h3>
{{range .SocialHandles}}
<li><a href="{{.URL}}">{{.Platform}}</a></li>
{{end}}
</body>
</html>

11
static/static.go vendored
View file

@ -76,3 +76,14 @@ func getFileSystemStaticFileOrDefault(path string, defaultData []byte) []byte {
return data return data
} }
//go:embed metadata.html.tmpl
var botMetadataTemplate embed.FS
// GetBotMetadataTemplate will return the bot/scraper metadata template.
func GetBotMetadataTemplate() (*template.Template, error) {
name := "metadata.html.tmpl"
t, err := template.ParseFS(botMetadataTemplate, name)
tmpl := template.Must(t, err)
return tmpl, err
}

View file

@ -0,0 +1,48 @@
const listenForErrors = require('./lib/errors.js').listenForErrors;
describe('Video embed page', () => {
async function getMetaTagContent(property) {
const selector = `meta[property="${property}"]`;
const tag = await page.evaluate((selector) => {
return document.head.querySelector(selector).getAttribute("content");
}, selector);
return tag;
}
beforeAll(async () => {
await page.setViewport({ width: 1080, height: 720 });
listenForErrors(browser, page);
page.setUserAgent(
"Mastodon"
);
await page.goto('http://localhost:5309');
});
afterAll(async () => {
await page.waitForTimeout(3000);
await page.screenshot({ path: 'screenshots/screenshot_bots_share_search_scrapers.png', fullPage: true });
});
it('should have rendered the simple bot accessible html page', async () => {
await page.waitForSelector('h1');
await page.waitForSelector('h3');
const ogVideo = await getMetaTagContent('og:video');
expect(ogVideo).toBe('http://localhost:5309/embed/video');
const ogVideoType = await getMetaTagContent('og:video:type');
expect(ogVideoType).toBe('text/html');
// When stream is live the thumbnail is provided as the image.
const ogImage = await getMetaTagContent('og:image');
expect(ogImage).toBe('http://localhost:5309/thumbnail.jpg');
const twitterUrl = await getMetaTagContent('twitter:url');
expect(twitterUrl).toBe('http://localhost:5309/');
const twitterImage = await getMetaTagContent('twitter:image');
expect(twitterImage).toBe('http://localhost:5309/logo/external');
});
});

View file

@ -16,6 +16,7 @@ import (
"strings" "strings"
"time" "time"
"github.com/mssola/user_agent"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"github.com/yuin/goldmark" "github.com/yuin/goldmark"
"github.com/yuin/goldmark/extension" "github.com/yuin/goldmark/extension"
@ -120,6 +121,34 @@ func IsUserAgentAPlayer(userAgent string) bool {
return false return false
} }
// IsUserAgentABot returns if a web client user-agent is seen as a bot.
func IsUserAgentABot(userAgent string) bool {
if userAgent == "" {
return false
}
botStrings := []string{
"mastodon",
"pleroma",
"applebot",
"whatsapp",
"matrix",
"synapse",
"element",
"rocket.chat",
"duckduckbot",
}
for _, botString := range botStrings {
if strings.Contains(strings.ToLower(userAgent), botString) {
return true
}
}
ua := user_agent.New(userAgent)
return ua.Bot()
}
// RenderSimpleMarkdown will return HTML without sanitization or specific formatting rules. // RenderSimpleMarkdown will return HTML without sanitization or specific formatting rules.
func RenderSimpleMarkdown(raw string) string { func RenderSimpleMarkdown(raw string) string {
markdown := goldmark.New( markdown := goldmark.New(