Simple hardware metrics collection + alerting (#115)

* Add CPU and RAM usage alerting

* Create basic troubleshooting document to point alerts at

* Limit max number of hardware values collected

* Save metric value with the point in time it was taken
This commit is contained in:
Gabe Kangas 2020-08-27 00:37:32 -07:00 committed by GitHub
parent e791a3c1dc
commit 4c3da2704f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 137 additions and 15 deletions

10
doc/troubleshooting.md Normal file
View file

@ -0,0 +1,10 @@
## CPU and RAM usage alerts
If your hardware is being maxed out then your video may not be processed and delivered fast enough to keep up with the real-time requirements of live video.
Here are some steps you can try taking to resolve this.
1. You may have too many bitrates defined as separate video quality variants. Try removing one.
1. Change to a [faster encoder preset](https://github.com/gabek/owncast/blob/master/doc/encoding.md#encoder-preset) in your configuration. If you're currently using `veryfast`, try `superfast`, for example.
1. Try reducing [the quality of the video you're sending to Owncast from your broadcasting software](https://github.com/gabek/owncast/blob/master/doc/encoding.md#how-you-configure-your-broadcasting-software-matters).
1. If you've gone down to a single bitrate, changed the encoder preset to the fastest, and experimented with different qualities in your broadcasting software, it's possible the server you're running Owncast is just not powerful enough for the task and you might need to try a different environment to run this on.

4
go.mod
View file

@ -3,13 +3,15 @@ module github.com/gabek/owncast
go 1.14
require (
github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect
github.com/aws/aws-sdk-go v1.34.0
github.com/kr/pretty v0.2.0 // indirect
github.com/go-ole/go-ole v1.2.4 // indirect
github.com/mattn/go-sqlite3 v1.14.0
github.com/mssola/user_agent v0.5.2
github.com/nareix/joy5 v0.0.0-20200712071056-a55089207c88
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
github.com/radovskyb/watcher v1.0.7
github.com/shirou/gopsutil v2.20.7+incompatible
github.com/sirupsen/logrus v1.6.0
github.com/teris-io/shortid v0.0.0-20171029131806-771a37caa5cf
golang.org/x/net v0.0.0-20200707034311-ab3426394381

20
go.sum
View file

@ -1,19 +1,19 @@
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d h1:G0m3OIz70MZUWq3EgK3CesDbo8upS2Vm9/P3FtgI+Jk=
github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/aws/aws-sdk-go v1.32.1 h1:0dy5DkMKNPH9mLWveAWA9ZTiKIEEvJJA6fbe0eCs19k=
github.com/aws/aws-sdk-go v1.32.1/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0=
github.com/aws/aws-sdk-go v1.34.0 h1:brux2dRrlwCF5JhTL7MUT3WUwo9zfDHZZp3+g3Mvlmo=
github.com/aws/aws-sdk-go v1.34.0/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-ole/go-ole v1.2.4 h1:nNBDSCOigTSiarFpYE9J/KtEA1IOW4CNeqT9TQDqCxI=
github.com/go-ole/go-ole v1.2.4/go.mod h1:XCwSNxSkXRo4vlyPy93sltvi/qJq0jqQhjqQNIwKuxM=
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/jmespath/go-jmespath v0.3.0 h1:OS12ieG61fsCg5+qLJ+SsW9NicxNkg3b25OyT2yCeUc=
github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik=
github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/pretty v0.2.0 h1:s5hAObm+yFO5uHYt5dYjxi2rXrsnmRpJx4OYvIWUaQs=
github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
@ -21,8 +21,6 @@ github.com/mattn/go-sqlite3 v1.14.0 h1:mLyGNKR8+Vv9CAU7PphKa2hkEqxxhn8i32J6FPj1/
github.com/mattn/go-sqlite3 v1.14.0/go.mod h1:JIl7NbARA7phWnGvh0LKTyg7S9BA+6gx71ShQilpsus=
github.com/mssola/user_agent v0.5.2 h1:CZkTUahjL1+OcZ5zv3kZr8QiJ8jy2H08vZIEkBeRbxo=
github.com/mssola/user_agent v0.5.2/go.mod h1:TTPno8LPY3wAIEKRpAtkdMT0f8SE24pLRGPahjCH4uw=
github.com/nareix/joy5 v0.0.0-20200710135721-d57196c8d506 h1:LclgHa9TCVfOXhpmIEyz8wc2ohx0AF/PJu3wLUZZJ8c=
github.com/nareix/joy5 v0.0.0-20200710135721-d57196c8d506/go.mod h1:XmAOs6UJXpNXRwKk+KY/nv5kL6xXYXyellk+A1pTlko=
github.com/nareix/joy5 v0.0.0-20200712071056-a55089207c88 h1:CXq5QLPMcfGEZMx8uBMyLdDiUNV72vlkSiyqg+jf7AI=
github.com/nareix/joy5 v0.0.0-20200712071056-a55089207c88/go.mod h1:XmAOs6UJXpNXRwKk+KY/nv5kL6xXYXyellk+A1pTlko=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
@ -33,6 +31,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/radovskyb/watcher v1.0.7 h1:AYePLih6dpmS32vlHfhCeli8127LzkIgwJGcwwe8tUE=
github.com/radovskyb/watcher v1.0.7/go.mod h1:78okwvY5wPdzcb1UYnip1pvrZNIVEIh/Cm+ZuvsUYIg=
github.com/shirou/gopsutil v2.20.7+incompatible h1:Ymv4OD12d6zm+2yONe39VSmp2XooJe8za7ngOLW/o/w=
github.com/shirou/gopsutil v2.20.7+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/spf13/cobra v0.0.4-0.20190109003409-7547e83b2d85/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ=
@ -50,8 +50,6 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn
golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200602114024-627f9648deb9 h1:pNX+40auqi2JqRfOP1akLGtYcn15TUbkhwuCO3foqqM=
golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@ -60,19 +58,13 @@ golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/p
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20uW+C3Rm0FD/WLDX8884=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f h1:gWF768j/LaZugp8dyS4UwsslYCYz9XgFxvlgsn0n9H8=
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200808120158-1030fc2bf1d9 h1:yi1hN8dcqI9l8klZfy4B8mJvFmmAxJEePIQQFNSd7Cs=
golang.org/x/sys v0.0.0-20200808120158-1030fc2bf1d9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

View file

@ -9,6 +9,7 @@ import (
"github.com/gabek/owncast/config"
"github.com/gabek/owncast/core"
"github.com/gabek/owncast/metrics"
"github.com/gabek/owncast/router"
)
@ -55,6 +56,8 @@ func main() {
config.Config.ChatDatabaseFilePath = "chat.db"
}
metrics.Start()
// starts the core
if err := core.Start(); err != nil {
log.Error("failed to start the core package")

41
metrics/alerting.go Normal file
View file

@ -0,0 +1,41 @@
package metrics
import (
log "github.com/sirupsen/logrus"
)
const maxCPUAlertingThresholdPCT = 95
const maxRAMAlertingThresholdPCT = 95
const alertingError = "The %s utilization of %d%% is higher than the alerting threshold of %d%%. This can cause issues with video generation and delivery. Please visit the documentation at https://github.com/gabek/owncast/blob/master/doc/troubleshooting.md to help troubleshoot this issue."
func handleAlerting() {
handleCPUAlerting()
handleRAMAlerting()
}
func handleCPUAlerting() {
if len(Metrics.CPUUtilizations) < 2 {
return
}
avg := recentAverage(Metrics.CPUUtilizations)
if avg > maxCPUAlertingThresholdPCT {
log.Errorf(alertingError, "CPU", avg, maxCPUAlertingThresholdPCT)
}
}
func handleRAMAlerting() {
if len(Metrics.RAMUtilizations) < 2 {
return
}
avg := recentAverage(Metrics.RAMUtilizations)
if avg > maxRAMAlertingThresholdPCT {
log.Errorf(alertingError, "memory", avg, maxRAMAlertingThresholdPCT)
}
}
func recentAverage(values []value) int {
return int((values[len(values)-1].Value + values[len(values)-2].Value) / 2)
}

35
metrics/hardware.go Normal file
View file

@ -0,0 +1,35 @@
package metrics
import (
"time"
"github.com/shirou/gopsutil/cpu"
"github.com/shirou/gopsutil/mem"
)
// Max number of metrics we want to keep.
const maxCollectionValues = 500
func collectCPUUtilization() {
if len(Metrics.CPUUtilizations) > maxCollectionValues {
Metrics.CPUUtilizations = Metrics.CPUUtilizations[1:]
}
v, err := cpu.Percent(0, false)
if err != nil {
panic(err)
}
metricValue := value{time.Now(), int(v[0])}
Metrics.CPUUtilizations = append(Metrics.CPUUtilizations, metricValue)
}
func collectRAMUtilization() {
if len(Metrics.RAMUtilizations) > maxCollectionValues {
Metrics.RAMUtilizations = Metrics.RAMUtilizations[1:]
}
memoryUsage, _ := mem.VirtualMemory()
metricValue := value{time.Now(), int(memoryUsage.UsedPercent)}
Metrics.RAMUtilizations = append(Metrics.RAMUtilizations, metricValue)
}

39
metrics/metrics.go Normal file
View file

@ -0,0 +1,39 @@
package metrics
import (
"time"
)
// How often we poll for updates
const metricsPollingInterval = 15 * time.Second
type value struct {
Time time.Time
Value int
}
type metrics struct {
CPUUtilizations []value
RAMUtilizations []value
}
// Metrics is the shared Metrics instance
var Metrics *metrics
// Start will begin the metrics collection and alerting
func Start() {
Metrics = new(metrics)
for range time.Tick(metricsPollingInterval) {
handlePolling()
}
}
func handlePolling() {
// Collect hardware stats
collectCPUUtilization()
collectRAMUtilization()
// Alerting
handleAlerting()
}