Hardware status admin api (#218)

* Add metrics for disk usage

* Add admin API for hardware metrics

* Fix error message alert
This commit is contained in:
Gabe Kangas 2020-10-02 12:18:08 -07:00 committed by GitHub
parent f4fdc6c951
commit e042c85f88
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 62 additions and 11 deletions

View file

@ -0,0 +1,16 @@
package admin
import (
"encoding/json"
"net/http"
"github.com/gabek/owncast/metrics"
)
// GetHardwareStats will return hardware utilization over time
func GetHardwareStats(w http.ResponseWriter, r *http.Request) {
metrics := metrics.Metrics
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(metrics)
}

View file

@ -6,12 +6,14 @@ import (
const maxCPUAlertingThresholdPCT = 95
const maxRAMAlertingThresholdPCT = 95
const maxDiskAlertingThresholdPCT = 95
const alertingError = "The %s utilization of %d%% is higher than the alerting threshold of %d%%. This can cause issues with video generation and delivery. Please visit the documentation at http://owncast.online/docs/troubleshooting/ to help troubleshoot this issue."
const alertingError = "The %s utilization of %d%% can cause issues with video generation and delivery. Please visit the documentation at http://owncast.online/docs/troubleshooting/ to help troubleshoot this issue."
func handleAlerting() {
handleCPUAlerting()
handleRAMAlerting()
handleDiskAlerting()
}
func handleCPUAlerting() {
@ -21,7 +23,7 @@ func handleCPUAlerting() {
avg := recentAverage(Metrics.CPUUtilizations)
if avg > maxCPUAlertingThresholdPCT {
log.Errorf(alertingError, "CPU", avg, maxCPUAlertingThresholdPCT)
log.Errorf(alertingError, "CPU", maxCPUAlertingThresholdPCT)
}
}
@ -32,7 +34,19 @@ func handleRAMAlerting() {
avg := recentAverage(Metrics.RAMUtilizations)
if avg > maxRAMAlertingThresholdPCT {
log.Errorf(alertingError, "memory", avg, maxRAMAlertingThresholdPCT)
log.Errorf(alertingError, "memory", maxRAMAlertingThresholdPCT)
}
}
func handleDiskAlerting() {
if len(Metrics.DiskUtilizations) < 2 {
return
}
avg := recentAverage(Metrics.DiskUtilizations)
if avg > maxDiskAlertingThresholdPCT {
log.Errorf(alertingError, "disk", maxRAMAlertingThresholdPCT)
}
}

View file

@ -4,6 +4,7 @@ import (
"time"
"github.com/shirou/gopsutil/cpu"
"github.com/shirou/gopsutil/disk"
"github.com/shirou/gopsutil/mem"
)
@ -33,3 +34,15 @@ func collectRAMUtilization() {
metricValue := timestampedValue{time.Now(), int(memoryUsage.UsedPercent)}
Metrics.RAMUtilizations = append(Metrics.RAMUtilizations, metricValue)
}
func collectDiskUtilization() {
path := "./"
diskUse, _ := disk.Usage(path)
if len(Metrics.DiskUtilizations) > maxCollectionValues {
Metrics.DiskUtilizations = Metrics.DiskUtilizations[1:]
}
metricValue := timestampedValue{time.Now(), int(diskUse.UsedPercent)}
Metrics.DiskUtilizations = append(Metrics.DiskUtilizations, metricValue)
}

View file

@ -5,21 +5,25 @@ import (
)
// How often we poll for updates
const metricsPollingInterval = 15 * time.Second
const metricsPollingInterval = 1 * time.Minute
type metrics struct {
CPUUtilizations []timestampedValue
RAMUtilizations []timestampedValue
Viewers []timestampedValue
// CollectedMetrics stores different collected + timestamped values
type CollectedMetrics struct {
CPUUtilizations []timestampedValue `json:"cpu"`
RAMUtilizations []timestampedValue `json:"memory"`
DiskUtilizations []timestampedValue `json:"disk"`
Viewers []timestampedValue `json:"-"`
}
// Metrics is the shared Metrics instance
var Metrics *metrics
var Metrics *CollectedMetrics
// Start will begin the metrics collection and alerting
func Start() {
Metrics = new(metrics)
startViewerCollectionMetrics()
Metrics = new(CollectedMetrics)
go startViewerCollectionMetrics()
handlePolling()
for range time.Tick(metricsPollingInterval) {
handlePolling()
@ -30,6 +34,7 @@ func handlePolling() {
// Collect hardware stats
collectCPUUtilization()
collectRAMUtilization()
collectDiskUtilization()
// Alerting
handleAlerting()

View file

@ -66,6 +66,9 @@ func Start() error {
// Get viewer count over time
http.HandleFunc("/api/admin/viewersOverTime", middleware.RequireAdminAuth(admin.GetViewersOverTime))
// Get hardware stats
http.HandleFunc("/api/admin/hardwarestats", middleware.RequireAdminAuth(admin.GetHardwareStats))
port := config.Config.GetPublicWebServerPort()
log.Infof("Web server running on port: %d", port)