mirror of
https://github.com/openlibrecommunity/olcrtc.git
synced 2026-05-26 23:19:47 +00:00
270 lines
6.7 KiB
Go
270 lines
6.7 KiB
Go
// Package supervisor runs ordered session profiles with failover.
|
|
package supervisor
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/openlibrecommunity/olcrtc/internal/app/session"
|
|
)
|
|
|
|
// DefaultRetryDelay is used between profile attempts when Config.RetryDelay is unset.
|
|
const DefaultRetryDelay = 2 * time.Second
|
|
|
|
// DefaultHistoryLimit bounds emitted status history when Config.HistoryLimit is unset.
|
|
const DefaultHistoryLimit = 20
|
|
|
|
const (
|
|
// EventProfileStart marks a profile attempt starting.
|
|
EventProfileStart = "profile_start"
|
|
// EventProfileEnd marks a profile attempt ending.
|
|
EventProfileEnd = "profile_end"
|
|
)
|
|
|
|
var (
|
|
// ErrNoProfiles is returned when the supervisor is started without profiles.
|
|
ErrNoProfiles = errors.New("supervisor: no profiles configured")
|
|
// ErrMaxCyclesExceeded is returned after MaxCycles complete profile-list passes.
|
|
ErrMaxCyclesExceeded = errors.New("supervisor: max failover cycles exceeded")
|
|
errProfileCleanEnd = errors.New("profile ended")
|
|
)
|
|
|
|
// Profile is one runnable session configuration in an ordered failover list.
|
|
type Profile struct {
|
|
Name string
|
|
Config session.Config
|
|
}
|
|
|
|
// ProfileStatus summarizes one profile's failover history.
|
|
type ProfileStatus struct {
|
|
Name string
|
|
Starts int
|
|
Failures int
|
|
CleanEnds int
|
|
LastStarted time.Time
|
|
LastEnded time.Time
|
|
LastError string
|
|
}
|
|
|
|
// Event is one bounded failover history entry.
|
|
type Event struct {
|
|
Time time.Time
|
|
Type string
|
|
Profile string
|
|
Cycle int
|
|
Error string
|
|
}
|
|
|
|
// Status is a point-in-time view of the supervisor.
|
|
type Status struct {
|
|
Cycle int
|
|
ActiveProfile string
|
|
ActiveProfileIndex int
|
|
Profiles []ProfileStatus
|
|
History []Event
|
|
LastError string
|
|
}
|
|
|
|
// Runner starts one session profile and blocks until it ends or fails.
|
|
type Runner func(ctx context.Context, cfg session.Config) error
|
|
|
|
// Config controls ordered failover behavior.
|
|
type Config struct {
|
|
Profiles []Profile
|
|
RetryDelay time.Duration
|
|
MaxCycles int
|
|
|
|
OnProfileStart func(profile Profile, cycle int)
|
|
OnProfileEnd func(profile Profile, cycle int, err error)
|
|
OnStatus func(status Status)
|
|
HistoryLimit int
|
|
}
|
|
|
|
// Run starts profiles in order. If a profile exits while ctx is still active,
|
|
// the supervisor waits RetryDelay and advances to the next profile.
|
|
func Run(ctx context.Context, cfg Config, run Runner) error {
|
|
if len(cfg.Profiles) == 0 {
|
|
return ErrNoProfiles
|
|
}
|
|
if cfg.RetryDelay == 0 {
|
|
cfg.RetryDelay = DefaultRetryDelay
|
|
}
|
|
state := newStatusTracker(cfg.Profiles, cfg.HistoryLimit, cfg.OnStatus)
|
|
|
|
var lastErr error
|
|
for cycle := 1; ; cycle++ {
|
|
if err := runCycle(ctx, cfg, run, state, cycle, &lastErr); err != nil {
|
|
return err
|
|
}
|
|
if ctx.Err() != nil {
|
|
return nil //nolint:nilerr // context cancellation is normal supervisor shutdown
|
|
}
|
|
}
|
|
}
|
|
|
|
func runCycle(
|
|
ctx context.Context,
|
|
cfg Config,
|
|
run Runner,
|
|
state *statusTracker,
|
|
cycle int,
|
|
lastErr *error,
|
|
) error {
|
|
for i, profile := range cfg.Profiles {
|
|
if err := runProfile(ctx, cfg, run, state, cycle, i, profile, lastErr); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func runProfile(
|
|
ctx context.Context,
|
|
cfg Config,
|
|
run Runner,
|
|
state *statusTracker,
|
|
cycle int,
|
|
profileIndex int,
|
|
profile Profile,
|
|
lastErr *error,
|
|
) error {
|
|
if ctx.Err() != nil {
|
|
return nil //nolint:nilerr // context cancellation is normal supervisor shutdown
|
|
}
|
|
state.start(profileIndex, cycle)
|
|
if cfg.OnProfileStart != nil {
|
|
cfg.OnProfileStart(profile, cycle)
|
|
}
|
|
|
|
err := run(ctx, profile.Config)
|
|
if ctx.Err() != nil {
|
|
return nil //nolint:nilerr // context cancellation is normal supervisor shutdown
|
|
}
|
|
*lastErr = profileResultError(profile.Name, err)
|
|
state.end(profileIndex, cycle, err)
|
|
if cfg.OnProfileEnd != nil {
|
|
cfg.OnProfileEnd(profile, cycle, err)
|
|
}
|
|
|
|
if cfg.MaxCycles > 0 && cycle >= cfg.MaxCycles && profileIndex == len(cfg.Profiles)-1 {
|
|
return fmt.Errorf("%w after %d cycle(s): %w", ErrMaxCyclesExceeded, cycle, *lastErr)
|
|
}
|
|
if err := waitRetryDelay(ctx, cfg.RetryDelay); err != nil {
|
|
return nil //nolint:nilerr // context cancellation during retry delay is normal shutdown
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func profileResultError(name string, err error) error {
|
|
if err != nil {
|
|
return fmt.Errorf("profile %q: %w", name, err)
|
|
}
|
|
return fmt.Errorf("profile %q: %w", name, errProfileCleanEnd)
|
|
}
|
|
|
|
type statusTracker struct {
|
|
status Status
|
|
notify func(Status)
|
|
historyLimit int
|
|
}
|
|
|
|
func newStatusTracker(profiles []Profile, historyLimit int, notify func(Status)) *statusTracker {
|
|
if historyLimit == 0 {
|
|
historyLimit = DefaultHistoryLimit
|
|
}
|
|
statusProfiles := make([]ProfileStatus, 0, len(profiles))
|
|
for _, profile := range profiles {
|
|
statusProfiles = append(statusProfiles, ProfileStatus{Name: profile.Name})
|
|
}
|
|
return &statusTracker{
|
|
status: Status{
|
|
ActiveProfileIndex: -1,
|
|
Profiles: statusProfiles,
|
|
},
|
|
notify: notify,
|
|
historyLimit: historyLimit,
|
|
}
|
|
}
|
|
|
|
func (t *statusTracker) start(profileIndex, cycle int) {
|
|
now := time.Now()
|
|
profile := &t.status.Profiles[profileIndex]
|
|
profile.Starts++
|
|
profile.LastStarted = now
|
|
t.status.Cycle = cycle
|
|
t.status.ActiveProfile = profile.Name
|
|
t.status.ActiveProfileIndex = profileIndex
|
|
t.appendHistory(Event{
|
|
Time: now,
|
|
Type: EventProfileStart,
|
|
Profile: profile.Name,
|
|
Cycle: cycle,
|
|
})
|
|
t.emit()
|
|
}
|
|
|
|
func (t *statusTracker) end(profileIndex, cycle int, err error) {
|
|
now := time.Now()
|
|
profile := &t.status.Profiles[profileIndex]
|
|
profile.LastEnded = now
|
|
event := Event{
|
|
Time: now,
|
|
Type: EventProfileEnd,
|
|
Profile: profile.Name,
|
|
Cycle: cycle,
|
|
}
|
|
if err != nil {
|
|
profile.Failures++
|
|
profile.LastError = err.Error()
|
|
t.status.LastError = fmt.Sprintf("profile %q: %v", profile.Name, err)
|
|
event.Error = err.Error()
|
|
} else {
|
|
profile.CleanEnds++
|
|
profile.LastError = ""
|
|
t.status.LastError = fmt.Sprintf("profile %q ended", profile.Name)
|
|
}
|
|
t.status.ActiveProfile = ""
|
|
t.status.ActiveProfileIndex = -1
|
|
t.appendHistory(event)
|
|
t.emit()
|
|
}
|
|
|
|
func (t *statusTracker) appendHistory(event Event) {
|
|
if t.historyLimit < 0 {
|
|
return
|
|
}
|
|
t.status.History = append(t.status.History, event)
|
|
if len(t.status.History) > t.historyLimit {
|
|
t.status.History = t.status.History[len(t.status.History)-t.historyLimit:]
|
|
}
|
|
}
|
|
|
|
func (t *statusTracker) emit() {
|
|
if t.notify == nil {
|
|
return
|
|
}
|
|
t.notify(cloneStatus(t.status))
|
|
}
|
|
|
|
func cloneStatus(status Status) Status {
|
|
status.Profiles = append([]ProfileStatus(nil), status.Profiles...)
|
|
status.History = append([]Event(nil), status.History...)
|
|
return status
|
|
}
|
|
|
|
func waitRetryDelay(ctx context.Context, delay time.Duration) error {
|
|
if delay <= 0 {
|
|
return nil
|
|
}
|
|
timer := time.NewTimer(delay)
|
|
defer timer.Stop()
|
|
select {
|
|
case <-ctx.Done():
|
|
return fmt.Errorf("retry delay canceled: %w", ctx.Err())
|
|
case <-timer.C:
|
|
return nil
|
|
}
|
|
}
|