Hardware status admin api (#218)

* Add metrics for disk usage

* Add admin API for hardware metrics

* Fix error message alert
This commit is contained in:
Gabe Kangas 2020-10-02 12:18:08 -07:00 committed by GitHub
parent f4fdc6c951
commit e042c85f88
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 62 additions and 11 deletions

View file

@ -0,0 +1,16 @@
package admin
import (
"encoding/json"
"net/http"
"github.com/gabek/owncast/metrics"
)
// GetHardwareStats will return hardware utilization over time
func GetHardwareStats(w http.ResponseWriter, r *http.Request) {
metrics := metrics.Metrics
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(metrics)
}

View file

@ -6,12 +6,14 @@ import (
const maxCPUAlertingThresholdPCT = 95 const maxCPUAlertingThresholdPCT = 95
const maxRAMAlertingThresholdPCT = 95 const maxRAMAlertingThresholdPCT = 95
const maxDiskAlertingThresholdPCT = 95
const alertingError = "The %s utilization of %d%% is higher than the alerting threshold of %d%%. This can cause issues with video generation and delivery. Please visit the documentation at http://owncast.online/docs/troubleshooting/ to help troubleshoot this issue." const alertingError = "The %s utilization of %d%% can cause issues with video generation and delivery. Please visit the documentation at http://owncast.online/docs/troubleshooting/ to help troubleshoot this issue."
func handleAlerting() { func handleAlerting() {
handleCPUAlerting() handleCPUAlerting()
handleRAMAlerting() handleRAMAlerting()
handleDiskAlerting()
} }
func handleCPUAlerting() { func handleCPUAlerting() {
@ -21,7 +23,7 @@ func handleCPUAlerting() {
avg := recentAverage(Metrics.CPUUtilizations) avg := recentAverage(Metrics.CPUUtilizations)
if avg > maxCPUAlertingThresholdPCT { if avg > maxCPUAlertingThresholdPCT {
log.Errorf(alertingError, "CPU", avg, maxCPUAlertingThresholdPCT) log.Errorf(alertingError, "CPU", maxCPUAlertingThresholdPCT)
} }
} }
@ -32,7 +34,19 @@ func handleRAMAlerting() {
avg := recentAverage(Metrics.RAMUtilizations) avg := recentAverage(Metrics.RAMUtilizations)
if avg > maxRAMAlertingThresholdPCT { if avg > maxRAMAlertingThresholdPCT {
log.Errorf(alertingError, "memory", avg, maxRAMAlertingThresholdPCT) log.Errorf(alertingError, "memory", maxRAMAlertingThresholdPCT)
}
}
func handleDiskAlerting() {
if len(Metrics.DiskUtilizations) < 2 {
return
}
avg := recentAverage(Metrics.DiskUtilizations)
if avg > maxDiskAlertingThresholdPCT {
log.Errorf(alertingError, "disk", maxRAMAlertingThresholdPCT)
} }
} }

View file

@ -4,6 +4,7 @@ import (
"time" "time"
"github.com/shirou/gopsutil/cpu" "github.com/shirou/gopsutil/cpu"
"github.com/shirou/gopsutil/disk"
"github.com/shirou/gopsutil/mem" "github.com/shirou/gopsutil/mem"
) )
@ -33,3 +34,15 @@ func collectRAMUtilization() {
metricValue := timestampedValue{time.Now(), int(memoryUsage.UsedPercent)} metricValue := timestampedValue{time.Now(), int(memoryUsage.UsedPercent)}
Metrics.RAMUtilizations = append(Metrics.RAMUtilizations, metricValue) Metrics.RAMUtilizations = append(Metrics.RAMUtilizations, metricValue)
} }
func collectDiskUtilization() {
path := "./"
diskUse, _ := disk.Usage(path)
if len(Metrics.DiskUtilizations) > maxCollectionValues {
Metrics.DiskUtilizations = Metrics.DiskUtilizations[1:]
}
metricValue := timestampedValue{time.Now(), int(diskUse.UsedPercent)}
Metrics.DiskUtilizations = append(Metrics.DiskUtilizations, metricValue)
}

View file

@ -5,21 +5,25 @@ import (
) )
// How often we poll for updates // How often we poll for updates
const metricsPollingInterval = 15 * time.Second const metricsPollingInterval = 1 * time.Minute
type metrics struct { // CollectedMetrics stores different collected + timestamped values
CPUUtilizations []timestampedValue type CollectedMetrics struct {
RAMUtilizations []timestampedValue CPUUtilizations []timestampedValue `json:"cpu"`
Viewers []timestampedValue RAMUtilizations []timestampedValue `json:"memory"`
DiskUtilizations []timestampedValue `json:"disk"`
Viewers []timestampedValue `json:"-"`
} }
// Metrics is the shared Metrics instance // Metrics is the shared Metrics instance
var Metrics *metrics var Metrics *CollectedMetrics
// Start will begin the metrics collection and alerting // Start will begin the metrics collection and alerting
func Start() { func Start() {
Metrics = new(metrics) Metrics = new(CollectedMetrics)
startViewerCollectionMetrics() go startViewerCollectionMetrics()
handlePolling()
for range time.Tick(metricsPollingInterval) { for range time.Tick(metricsPollingInterval) {
handlePolling() handlePolling()
@ -30,6 +34,7 @@ func handlePolling() {
// Collect hardware stats // Collect hardware stats
collectCPUUtilization() collectCPUUtilization()
collectRAMUtilization() collectRAMUtilization()
collectDiskUtilization()
// Alerting // Alerting
handleAlerting() handleAlerting()

View file

@ -66,6 +66,9 @@ func Start() error {
// Get viewer count over time // Get viewer count over time
http.HandleFunc("/api/admin/viewersOverTime", middleware.RequireAdminAuth(admin.GetViewersOverTime)) http.HandleFunc("/api/admin/viewersOverTime", middleware.RequireAdminAuth(admin.GetViewersOverTime))
// Get hardware stats
http.HandleFunc("/api/admin/hardwarestats", middleware.RequireAdminAuth(admin.GetHardwareStats))
port := config.Config.GetPublicWebServerPort() port := config.Config.GetPublicWebServerPort()
log.Infof("Web server running on port: %d", port) log.Infof("Web server running on port: %d", port)