mirror of
https://github.com/owncast/owncast.git
synced 2024-11-22 04:40:37 +03:00
fix: use lightweight bot/scraper html responses. Fixes #3253
This commit is contained in:
parent
78ec6302b9
commit
1e57cff3e0
7 changed files with 262 additions and 0 deletions
|
@ -4,13 +4,18 @@ import (
|
|||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/owncast/owncast/config"
|
||||
"github.com/owncast/owncast/core"
|
||||
"github.com/owncast/owncast/core/data"
|
||||
"github.com/owncast/owncast/models"
|
||||
"github.com/owncast/owncast/router/middleware"
|
||||
"github.com/owncast/owncast/static"
|
||||
"github.com/owncast/owncast/utils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// IndexHandler handles the default index route.
|
||||
|
@ -24,6 +29,13 @@ func IndexHandler(w http.ResponseWriter, r *http.Request) {
|
|||
return
|
||||
}
|
||||
|
||||
// For search engine bots and social scrapers return a special
|
||||
// server-rendered page.
|
||||
if utils.IsUserAgentABot(r.UserAgent()) && isIndexRequest {
|
||||
handleScraperMetadataPage(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
// Set a cache control max-age header
|
||||
middleware.SetCachingHeaders(w, r)
|
||||
|
||||
|
@ -93,3 +105,79 @@ func renderIndexHtml(w http.ResponseWriter, nonce string) {
|
|||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
}
|
||||
}
|
||||
|
||||
// MetadataPage represents a server-rendered web page for bots and web scrapers.
|
||||
type MetadataPage struct {
|
||||
RequestedURL string
|
||||
Image string
|
||||
Thumbnail string
|
||||
TagsString string
|
||||
Summary string
|
||||
Name string
|
||||
Tags []string
|
||||
SocialHandles []models.SocialHandle
|
||||
}
|
||||
|
||||
// Return a basic HTML page with server-rendered metadata from the config
|
||||
// to give to Opengraph clients and web scrapers (bots, web crawlers, etc).
|
||||
func handleScraperMetadataPage(w http.ResponseWriter, r *http.Request) {
|
||||
tmpl, err := static.GetBotMetadataTemplate()
|
||||
if err != nil {
|
||||
log.Errorln(err)
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
scheme := "http"
|
||||
|
||||
if siteURL := data.GetServerURL(); siteURL != "" {
|
||||
if parsed, err := url.Parse(siteURL); err == nil && parsed.Scheme != "" {
|
||||
scheme = parsed.Scheme
|
||||
}
|
||||
}
|
||||
|
||||
fullURL, err := url.Parse(fmt.Sprintf("%s://%s%s", scheme, r.Host, r.URL.Path))
|
||||
if err != nil {
|
||||
log.Errorln(err)
|
||||
}
|
||||
imageURL, err := url.Parse(fmt.Sprintf("%s://%s%s", scheme, r.Host, "/logo/external"))
|
||||
if err != nil {
|
||||
log.Errorln(err)
|
||||
}
|
||||
|
||||
status := core.GetStatus()
|
||||
|
||||
// If the thumbnail does not exist or we're offline then just use the logo image
|
||||
var thumbnailURL string
|
||||
if status.Online && utils.DoesFileExists(filepath.Join(config.DataDirectory, "tmp", "thumbnail.jpg")) {
|
||||
thumbnail, err := url.Parse(fmt.Sprintf("%s://%s%s", scheme, r.Host, "/thumbnail.jpg"))
|
||||
if err != nil {
|
||||
log.Errorln(err)
|
||||
thumbnailURL = imageURL.String()
|
||||
} else {
|
||||
thumbnailURL = thumbnail.String()
|
||||
}
|
||||
} else {
|
||||
thumbnailURL = imageURL.String()
|
||||
}
|
||||
|
||||
tagsString := strings.Join(data.GetServerMetadataTags(), ",")
|
||||
metadata := MetadataPage{
|
||||
Name: data.GetServerName(),
|
||||
RequestedURL: fullURL.String(),
|
||||
Image: imageURL.String(),
|
||||
Summary: data.GetServerSummary(),
|
||||
Thumbnail: thumbnailURL,
|
||||
TagsString: tagsString,
|
||||
Tags: data.GetServerMetadataTags(),
|
||||
SocialHandles: data.GetSocialHandles(),
|
||||
}
|
||||
|
||||
// Set a cache header
|
||||
middleware.SetCachingHeaders(w, r)
|
||||
|
||||
w.Header().Set("Content-Type", "text/html")
|
||||
if err := tmpl.Execute(w, metadata); err != nil {
|
||||
log.Errorln(err)
|
||||
}
|
||||
}
|
||||
|
|
1
go.mod
1
go.mod
|
@ -67,6 +67,7 @@ require (
|
|||
github.com/golang-jwt/jwt v3.2.2+incompatible // indirect
|
||||
github.com/gorilla/css v1.0.0 // indirect
|
||||
github.com/jmespath/go-jmespath v0.4.0 // indirect
|
||||
github.com/mssola/user_agent v0.6.0 // indirect
|
||||
github.com/oschwald/maxminddb-golang v1.11.0 // indirect
|
||||
github.com/shoenig/go-m1cpu v0.1.6 // indirect
|
||||
)
|
||||
|
|
2
go.sum
2
go.sum
|
@ -85,6 +85,8 @@ github.com/microcosm-cc/bluemonday v1.0.25 h1:4NEwSfiJ+Wva0VxN5B8OwMicaJvD8r9tlJ
|
|||
github.com/microcosm-cc/bluemonday v1.0.25/go.mod h1:ZIOjCQp1OrzBBPIJmfX4qDYFuhU02nx4bn030ixfHLE=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/mssola/user_agent v0.6.0 h1:uwPR4rtWlCHRFyyP9u2KOV0u8iQXmS7Z7feTrstQwk4=
|
||||
github.com/mssola/user_agent v0.6.0/go.mod h1:TTPno8LPY3wAIEKRpAtkdMT0f8SE24pLRGPahjCH4uw=
|
||||
github.com/mvdan/xurls v1.1.0 h1:OpuDelGQ1R1ueQ6sSryzi6P+1RtBpfQHM8fJwlE45ww=
|
||||
github.com/mvdan/xurls v1.1.0/go.mod h1:tQlNn3BED8bE/15hnSL2HLkDeLWpNPAwtw7wkEq44oU=
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
||||
|
|
83
static/metadata.html.tmpl
vendored
Normal file
83
static/metadata.html.tmpl
vendored
Normal file
|
@ -0,0 +1,83 @@
|
|||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
|
||||
<title>{{.Name}}</title>
|
||||
<meta name="description" content="{{.Summary}}">
|
||||
|
||||
<meta property="og:title" content="{{.Name}}">
|
||||
<meta property="og:site_name" content="{{.Name}}">
|
||||
<meta property="og:url" content="{{.RequestedURL}}">
|
||||
<meta property="og:description" content="{{.Summary}}">
|
||||
<meta property="og:type" content="video.other">
|
||||
<meta property="video:tag" content="{{.TagsString}}">
|
||||
|
||||
<meta property="og:image" content="{{.Thumbnail}}">
|
||||
<meta property="og:image:url" content="{{.Thumbnail}}">
|
||||
<meta property="og:image:alt" content="{{.Image}}">
|
||||
|
||||
<meta property="og:video" content='{{.RequestedURL}}embed/video' />
|
||||
<meta property="og:video:secure_url" content='{{.RequestedURL}}embed/video' />
|
||||
<meta property="og:video:height" content="315" />
|
||||
<meta property="og:video:width" content="560" />
|
||||
<meta property="og:video:type" content="text/html" />
|
||||
<meta property="og:video:actor" content="{{.Name}}" />
|
||||
|
||||
<meta property="twitter:title" content="{{.Name}}">
|
||||
<meta property="twitter:url" content="{{.RequestedURL}}">
|
||||
<meta property="twitter:description" content="{{.Summary}}">
|
||||
<meta property="twitter:image" content="{{.Image}}">
|
||||
<meta property="twitter:card" content="player" />
|
||||
<meta property="twitter:player" content='{{.RequestedURL}}embed/video' />
|
||||
<meta property="twitter:player:width" content="560" />
|
||||
<meta property="twitter:player:height" content="315" />
|
||||
|
||||
<link rel="apple-touch-icon" sizes="57x57" href="/img/favicon/apple-icon-57x57.png">
|
||||
<link rel="apple-touch-icon" sizes="60x60" href="/img/favicon/apple-icon-60x60.png">
|
||||
<link rel="apple-touch-icon" sizes="72x72" href="/img/favicon/apple-icon-72x72.png">
|
||||
<link rel="apple-touch-icon" sizes="76x76" href="/img/favicon/apple-icon-76x76.png">
|
||||
<link rel="apple-touch-icon" sizes="114x114" href="/img/favicon/apple-icon-114x114.png">
|
||||
<link rel="apple-touch-icon" sizes="120x120" href="/img/favicon/apple-icon-120x120.png">
|
||||
<link rel="apple-touch-icon" sizes="144x144" href="/img/favicon/apple-icon-144x144.png">
|
||||
<link rel="apple-touch-icon" sizes="152x152" href="/img/favicon/apple-icon-152x152.png">
|
||||
<link rel="apple-touch-icon" sizes="180x180" href="/img/favicon/apple-icon-180x180.png">
|
||||
<link rel="icon" type="image/png" sizes="192x192" href="/img/favicon/android-icon-192x192.png">
|
||||
<link rel="icon" type="image/png" sizes="32x32" href="/img/favicon/favicon-32x32.png">
|
||||
<link rel="icon" type="image/png" sizes="96x96" href="/img/favicon/favicon-96x96.png">
|
||||
<link rel="icon" type="image/png" sizes="16x16" href="/img/favicon/favicon-16x16.png">
|
||||
<link rel="manifest" href="/manifest.json">
|
||||
|
||||
<link rel="authorization_endpoint" href="/api/auth/provider/indieauth">
|
||||
|
||||
<meta name="msapplication-TileColor" content="#ffffff">
|
||||
<meta name="msapplication-TileImage" content="/img/favicon/ms-icon-144x144.png">
|
||||
<meta name="theme-color" content="#ffffff">
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>{{.Name}}</h1>
|
||||
|
||||
<center>
|
||||
<img src="{{.Thumbnail}}" width=10% />
|
||||
</center>
|
||||
|
||||
<h3>{{.Summary}}</h3>
|
||||
|
||||
{{range .Tags}}
|
||||
<li>{{.}}</li>
|
||||
{{end}}
|
||||
|
||||
<br/>
|
||||
|
||||
<h3>Links for {{.Name}}:</h3>
|
||||
|
||||
{{range .SocialHandles}}
|
||||
<li><a href="{{.URL}}">{{.Platform}}</a></li>
|
||||
{{end}}
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
11
static/static.go
vendored
11
static/static.go
vendored
|
@ -76,3 +76,14 @@ func getFileSystemStaticFileOrDefault(path string, defaultData []byte) []byte {
|
|||
|
||||
return data
|
||||
}
|
||||
|
||||
//go:embed metadata.html.tmpl
|
||||
var botMetadataTemplate embed.FS
|
||||
|
||||
// GetBotMetadataTemplate will return the bot/scraper metadata template.
|
||||
func GetBotMetadataTemplate() (*template.Template, error) {
|
||||
name := "metadata.html.tmpl"
|
||||
t, err := template.ParseFS(botMetadataTemplate, name)
|
||||
tmpl := template.Must(t, err)
|
||||
return tmpl, err
|
||||
}
|
||||
|
|
48
test/automated/browser/bot-share-search-scrapers.test.js
Normal file
48
test/automated/browser/bot-share-search-scrapers.test.js
Normal file
|
@ -0,0 +1,48 @@
|
|||
const listenForErrors = require('./lib/errors.js').listenForErrors;
|
||||
|
||||
describe('Video embed page', () => {
|
||||
|
||||
async function getMetaTagContent(property) {
|
||||
const selector = `meta[property="${property}"]`;
|
||||
|
||||
const tag = await page.evaluate((selector) => {
|
||||
return document.head.querySelector(selector).getAttribute("content");
|
||||
}, selector);
|
||||
return tag;
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
await page.setViewport({ width: 1080, height: 720 });
|
||||
listenForErrors(browser, page);
|
||||
page.setUserAgent(
|
||||
"Mastodon"
|
||||
);
|
||||
await page.goto('http://localhost:5309');
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await page.waitForTimeout(3000);
|
||||
await page.screenshot({ path: 'screenshots/screenshot_bots_share_search_scrapers.png', fullPage: true });
|
||||
});
|
||||
|
||||
it('should have rendered the simple bot accessible html page', async () => {
|
||||
await page.waitForSelector('h1');
|
||||
await page.waitForSelector('h3');
|
||||
|
||||
const ogVideo = await getMetaTagContent('og:video');
|
||||
expect(ogVideo).toBe('http://localhost:5309/embed/video');
|
||||
|
||||
const ogVideoType = await getMetaTagContent('og:video:type');
|
||||
expect(ogVideoType).toBe('text/html');
|
||||
|
||||
// When stream is live the thumbnail is provided as the image.
|
||||
const ogImage = await getMetaTagContent('og:image');
|
||||
expect(ogImage).toBe('http://localhost:5309/thumbnail.jpg');
|
||||
|
||||
const twitterUrl = await getMetaTagContent('twitter:url');
|
||||
expect(twitterUrl).toBe('http://localhost:5309/');
|
||||
|
||||
const twitterImage = await getMetaTagContent('twitter:image');
|
||||
expect(twitterImage).toBe('http://localhost:5309/logo/external');
|
||||
});
|
||||
});
|
|
@ -16,6 +16,7 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mssola/user_agent"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/yuin/goldmark"
|
||||
"github.com/yuin/goldmark/extension"
|
||||
|
@ -120,6 +121,34 @@ func IsUserAgentAPlayer(userAgent string) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// IsUserAgentABot returns if a web client user-agent is seen as a bot.
|
||||
func IsUserAgentABot(userAgent string) bool {
|
||||
if userAgent == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
botStrings := []string{
|
||||
"mastodon",
|
||||
"pleroma",
|
||||
"applebot",
|
||||
"whatsapp",
|
||||
"matrix",
|
||||
"synapse",
|
||||
"element",
|
||||
"rocket.chat",
|
||||
"duckduckbot",
|
||||
}
|
||||
|
||||
for _, botString := range botStrings {
|
||||
if strings.Contains(strings.ToLower(userAgent), botString) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
ua := user_agent.New(userAgent)
|
||||
return ua.Bot()
|
||||
}
|
||||
|
||||
// RenderSimpleMarkdown will return HTML without sanitization or specific formatting rules.
|
||||
func RenderSimpleMarkdown(raw string) string {
|
||||
markdown := goldmark.New(
|
||||
|
|
Loading…
Reference in a new issue