[feature] Add healthcheck endpoints /livez and /readyz (#2783)

* [feature] Add healthcheck endpoints `/livez` and `/readyz`

* use select that returns no data
This commit is contained in:
tobi 2024-03-25 18:05:14 +01:00 committed by GitHub
parent 36f79e650c
commit b7b42e832a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 328 additions and 4 deletions

View file

@ -309,6 +309,7 @@ var Start action.GTSAction = func(ctx context.Context) error {
authModule = api.NewAuth(dbService, processor, idp, routerSession, sessionName) // auth/oauth paths authModule = api.NewAuth(dbService, processor, idp, routerSession, sessionName) // auth/oauth paths
clientModule = api.NewClient(dbService, processor) // api client endpoints clientModule = api.NewClient(dbService, processor) // api client endpoints
metricsModule = api.NewMetrics() // Metrics endpoints metricsModule = api.NewMetrics() // Metrics endpoints
healthModule = api.NewHealth(dbService.Ready) // Health check endpoints
fileserverModule = api.NewFileserver(processor) // fileserver endpoints fileserverModule = api.NewFileserver(processor) // fileserver endpoints
wellKnownModule = api.NewWellKnown(processor) // .well-known endpoints wellKnownModule = api.NewWellKnown(processor) // .well-known endpoints
nodeInfoModule = api.NewNodeInfo(processor) // nodeinfo endpoint nodeInfoModule = api.NewNodeInfo(processor) // nodeinfo endpoint
@ -340,6 +341,7 @@ var Start action.GTSAction = func(ctx context.Context) error {
authModule.Route(router, clLimit, clThrottle, gzip) authModule.Route(router, clLimit, clThrottle, gzip)
clientModule.Route(router, clLimit, clThrottle, gzip) clientModule.Route(router, clLimit, clThrottle, gzip)
metricsModule.Route(router, clLimit, clThrottle, gzip) metricsModule.Route(router, clLimit, clThrottle, gzip)
healthModule.Route(router, clLimit, clThrottle)
fileserverModule.Route(router, fsMainLimit, fsThrottle) fileserverModule.Route(router, fsMainLimit, fsThrottle)
fileserverModule.RouteEmojis(router, instanceAccount.ID, fsEmojiLimit, fsThrottle) fileserverModule.RouteEmojis(router, instanceAccount.ID, fsEmojiLimit, fsThrottle)
wellKnownModule.Route(router, gzip, s2sLimit, s2sThrottle) wellKnownModule.Route(router, gzip, s2sLimit, s2sThrottle)

View file

@ -224,6 +224,7 @@ var Start action.GTSAction = func(ctx context.Context) error {
authModule = api.NewAuth(state.DB, processor, idp, routerSession, sessionName) // auth/oauth paths authModule = api.NewAuth(state.DB, processor, idp, routerSession, sessionName) // auth/oauth paths
clientModule = api.NewClient(state.DB, processor) // api client endpoints clientModule = api.NewClient(state.DB, processor) // api client endpoints
metricsModule = api.NewMetrics() // Metrics endpoints metricsModule = api.NewMetrics() // Metrics endpoints
healthModule = api.NewHealth(state.DB.Ready) // Health check endpoints
fileserverModule = api.NewFileserver(processor) // fileserver endpoints fileserverModule = api.NewFileserver(processor) // fileserver endpoints
wellKnownModule = api.NewWellKnown(processor) // .well-known endpoints wellKnownModule = api.NewWellKnown(processor) // .well-known endpoints
nodeInfoModule = api.NewNodeInfo(processor) // nodeinfo endpoint nodeInfoModule = api.NewNodeInfo(processor) // nodeinfo endpoint
@ -235,6 +236,7 @@ var Start action.GTSAction = func(ctx context.Context) error {
authModule.Route(router) authModule.Route(router)
clientModule.Route(router) clientModule.Route(router)
metricsModule.Route(router) metricsModule.Route(router)
healthModule.Route(router)
fileserverModule.Route(router) fileserverModule.Route(router)
fileserverModule.RouteEmojis(router, instanceAccount.ID) fileserverModule.RouteEmojis(router, instanceAccount.ID)
wellKnownModule.Route(router) wellKnownModule.Route(router)

View file

@ -0,0 +1,48 @@
# Health Checks
GoToSocial exposes two health check HTTP endpoints: `/readyz` and `/livez`.
These can be used to check whether GoToSocial is reachable and able to make simple database queries.
`/livez` will always return a 200 OK response with no body, in response to both GET and HEAD requests. This is useful to check if the GoToSocial service is alive.
`/readyz` will return a 200 OK response with no body, in response to both GET and HEAD requests, if GoToSocial is able to run a very simple SELECT query against the configured database backend. If an error occurs while running the SELECT, the error will be logged, and 500 Internal Server Error will be returned, with no body.
You can use the above endpoints to implement health checks in container runtimes / orchestration systems.
For example, in a Docker setup, you could add the following to your docker-compose.yaml:
```yaml
healthcheck:
test: wget --no-verbose --tries=1 --spider http://localhost:8080/readyz || exit 1
interval: 120s
retries: 5
start_period: 30s
timeout: 10s
```
The above health check will start after 30 seconds, and check every two minutes whether the service is available by doing a HEAD request to `/readyz`. If the check fails five times in a row, the service will be reported as unhealthy. You can use this in whatever orchestration system you are using to force the container to restart.
!!! warning
When doing database migrations on slow hardware, migration might take longer than the 10 minutes afforded by the above health check.
On such a system, you may want to increase the interval or number of retries of the health check to ensure that you don't stop GoToSocial in the middle of a migration (which is a very bad thing to do!).
!!! tip
Though the health check endpoints don't reveal any sensitive info, and run only very simple queries, you may want to avoid exposing them to the outside world. You could do this in nginx, for example, by adding the following snippet to your `server` stanza:
```nginx
location /livez {
return 404;
}
location /readyz {
return 404;
}
```
This will cause nginx to intercept these requests *before* they are passed to GoToSocial, and just return 404 Not Found.
References:
- [Dockerfile reference](https://docs.docker.com/reference/dockerfile/#healthcheck)
- [Compose file reference](https://docs.docker.com/compose/compose-file/compose-file-v3/#healthcheck)

View file

@ -7878,6 +7878,23 @@ paths:
summary: View instance information. summary: View instance information.
tags: tags:
- instance - instance
/livez:
get:
operationId: liveGet
responses:
"200":
description: OK
summary: Returns code 200 with no body if GoToSocial is "live", ie., able to respond to HTTP requests.
tags:
- health
head:
operationId: liveHead
responses:
"200":
description: OK
summary: Returns code 200 if GoToSocial is "live", ie., able to respond to HTTP requests.
tags:
- health
/nodeinfo/2.0: /nodeinfo/2.0:
get: get:
description: 'See: https://nodeinfo.diaspora.software/schema.html' description: 'See: https://nodeinfo.diaspora.software/schema.html'
@ -7892,6 +7909,27 @@ paths:
summary: Returns a compliant nodeinfo response to node info queries. summary: Returns a compliant nodeinfo response to node info queries.
tags: tags:
- nodeinfo - nodeinfo
/readyz:
get:
description: If GtS is not ready, 500 Internal Error will be returned, and an error will be logged (but not returned to the caller, to avoid leaking internals).
operationId: readyGet
responses:
"200":
description: OK
"500":
description: Not ready. Check logs for error message.
summary: Returns code 200 with no body if GoToSocial is "ready", ie., able to connect to the database backend and do a simple SELECT.
tags:
- health
head:
description: If GtS is not ready, 500 Internal Error will be returned, and an error will be logged (but not returned to the caller, to avoid leaking internals).
operationId: readyHead
responses:
"200":
description: OK
summary: Returns code 200 with no body if GoToSocial is "ready", ie., able to connect to the database backend and do a simple SELECT.
tags:
- health
/users/{username}/collections/featured: /users/{username}/collections/featured:
get: get:
description: |- description: |-

51
internal/api/health.go Normal file
View file

@ -0,0 +1,51 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package api
import (
"context"
"github.com/gin-gonic/gin"
"github.com/superseriousbusiness/gotosocial/internal/api/health"
"github.com/superseriousbusiness/gotosocial/internal/middleware"
"github.com/superseriousbusiness/gotosocial/internal/router"
)
type Health struct {
health *health.Module
}
func (mt *Health) Route(r *router.Router, m ...gin.HandlerFunc) {
// Create new group on top level prefix.
healthGroup := r.AttachGroup("")
healthGroup.Use(m...)
healthGroup.Use(
middleware.CacheControl(middleware.CacheControlConfig{
// Never cache health responses.
Directives: []string{"no-store"},
}),
)
mt.health.Route(healthGroup.Handle)
}
func NewHealth(readyF func(context.Context) error) *Health {
return &Health{
health: health.New(readyF),
}
}

View file

@ -0,0 +1,48 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package health
import (
"context"
"net/http"
"github.com/gin-gonic/gin"
)
const (
LivePath = "/livez"
ReadyPath = "/readyz"
)
type Module struct {
readyF func(context.Context) error
}
func New(readyF func(context.Context) error) *Module {
return &Module{
readyF: readyF,
}
}
func (m *Module) Route(attachHandler func(method string, path string, f ...gin.HandlerFunc) gin.IRoutes) {
attachHandler(http.MethodGet, LivePath, m.LiveGETRequest)
attachHandler(http.MethodHead, LivePath, m.LiveHEADRequest)
attachHandler(http.MethodGet, ReadyPath, m.ReadyGETRequest)
attachHandler(http.MethodHead, ReadyPath, m.ReadyHEADRequest)
}

View file

@ -0,0 +1,54 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package health
import (
"net/http"
"github.com/gin-gonic/gin"
)
// LiveGETRequest swagger:operation GET /livez liveGet
//
// Returns code 200 with no body if GoToSocial is "live", ie., able to respond to HTTP requests.
//
// ---
// tags:
// - health
//
// responses:
// '200':
// description: OK
func (m *Module) LiveGETRequest(c *gin.Context) {
c.Status(http.StatusOK)
}
// LiveHEADRequest swagger:operation HEAD /livez liveHead
//
// Returns code 200 if GoToSocial is "live", ie., able to respond to HTTP requests.
//
// ---
// tags:
// - health
//
// responses:
// '200':
// description: OK
func (m *Module) LiveHEADRequest(c *gin.Context) {
c.Status(http.StatusOK)
}

View file

@ -0,0 +1,74 @@
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package health
import (
"net/http"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/gin-gonic/gin"
)
func (m *Module) ready(c *gin.Context) {
if err := m.readyF(c.Request.Context()); err != nil {
// Set error on the gin context so
// it's logged by the logging middleware.
errWithCode := gtserror.NewErrorInternalError(err)
c.Error(errWithCode) //nolint:errcheck
c.Status(http.StatusInternalServerError)
} else {
c.Status(http.StatusOK)
}
}
// ReadyGETRequest swagger:operation GET /readyz readyGet
//
// Returns code 200 with no body if GoToSocial is "ready", ie., able to connect to the database backend and do a simple SELECT.
//
// If GtS is not ready, 500 Internal Error will be returned, and an error will be logged (but not returned to the caller, to avoid leaking internals).
//
// ---
// tags:
// - health
//
// responses:
// '200':
// description: OK
// '500':
// description: Not ready. Check logs for error message.
func (m *Module) ReadyGETRequest(c *gin.Context) {
m.ready(c)
}
// ReadyHEADRequest swagger:operation HEAD /readyz readyHead
//
// Returns code 200 with no body if GoToSocial is "ready", ie., able to connect to the database backend and do a simple SELECT.
//
// If GtS is not ready, 500 Internal Error will be returned, and an error will be logged (but not returned to the caller, to avoid leaking internals).
//
// ---
// tags:
// - health
//
// responses:
// '200':
// description: OK
func (m *Module) ReadyHEADRequest(c *gin.Context) {
m.ready(c)
}

View file

@ -33,8 +33,8 @@ type Basic interface {
// If the database implementation doesn't need to be stopped, this can just return nil. // If the database implementation doesn't need to be stopped, this can just return nil.
Close() error Close() error
// IsHealthy should return nil if the database connection is healthy, or an error if not. // Ready returns nil if the database connection is ready, or an error if not.
IsHealthy(ctx context.Context) error Ready(ctx context.Context) error
// GetByID gets one entry by its id. In a database like postgres, this might be the 'id' field of the entry, // GetByID gets one entry by its id. In a database like postgres, this might be the 'id' field of the entry,
// for other implementations (for example, in-memory) it might just be the key of a map. // for other implementations (for example, in-memory) it might just be the key of a map.

View file

@ -124,8 +124,14 @@ func (b *basicDB) DropTable(ctx context.Context, i interface{}) error {
return err return err
} }
func (b *basicDB) IsHealthy(ctx context.Context) error { func (b *basicDB) Ready(ctx context.Context) error {
return b.db.PingContext(ctx) if _, err := b.db.
NewRaw("SELECT NULL FROM ? LIMIT 0", bun.Ident("instances")).
Exec(ctx); err != nil {
return err
}
return nil
} }
func (b *basicDB) Close() error { func (b *basicDB) Close() error {

View file

@ -108,6 +108,7 @@ nav:
- "advanced/security/index.md" - "advanced/security/index.md"
- "advanced/security/sandboxing.md" - "advanced/security/sandboxing.md"
- "advanced/security/firewall.md" - "advanced/security/firewall.md"
- "advanced/healthchecks.md"
- "advanced/tracing.md" - "advanced/tracing.md"
- "advanced/metrics.md" - "advanced/metrics.md"