monitor: add ps-style support to current tasks

jtolio · jtolio · commit 6e4a95d4d5d8 · 2014-11-20T15:53:07.000-07:00
Change-Id: I26a0e811081ddfaf8451f11429c138a7008e9dd6
diff --git a/group_disabled.go b/group_disabled.go
@@ -22,6 +22,8 @@ import (
 
 func (g *MonitorGroup) Stats(cb func(name string, val float64)) {}
 
+func (g *MonitorGroup) Running(cb func(name string, current []*TaskCtx)) {}
+
 func (g *MonitorGroup) Datapoints(reset bool, cb func(name string,
 	data [][]float64, total uint64, clipped bool, fraction float64)) {
 }
diff --git a/group_enabled.go b/group_enabled.go
@@ -20,9 +20,9 @@ import (
 	"fmt"
 	"strings"
 
-	"golang.org/x/net/context"
 	"github.com/spacemonkeygo/errors"
 	"github.com/spacemonkeygo/monitor/trace"
+	"golang.org/x/net/context"
 )
 
 // Stats conforms to the Monitor interface. Stats aggregates all statistics
@@ -41,6 +41,22 @@ func (g *MonitorGroup) Stats(cb func(name string, val float64)) {
 	}
 }
 
+// Running collects lists of all running tasks by name
+func (g *MonitorGroup) Running(cb func(name string, current []*TaskCtx)) {
+	snapshot := g.monitors.Snapshot()
+	for _, name := range sortedStringKeys(snapshot) {
+		cache_val := snapshot[name]
+		mon, ok := cache_val.(*TaskMonitor)
+		if !ok {
+			continue
+		}
+		current := mon.Running()
+		if len(current) > 0 {
+			cb(fmt.Sprintf("%s.%s", g.group_name, name), current)
+		}
+	}
+}
+
 // Datapoints conforms to the DataCollection interface. Datapoints aggregates
 // all datasets attached to this group.
 func (g *MonitorGroup) Datapoints(reset bool, cb func(name string,
@@ -276,3 +292,5 @@ func (self *MonitorGroup) TracedTask(ctx *context.Context) func(*error) {
 		}
 	}
 }
+
+var _ RunningTasksCollector = (*MonitorGroup)(nil)
diff --git a/http.go b/http.go
@@ -17,12 +17,37 @@ package monitor
 import (
 	"fmt"
 	"net/http"
+	"sort"
+	"strings"
+	"time"
 )
 
+type durationSort []time.Duration
+
+func (d durationSort) Len() int           { return len(d) }
+func (d durationSort) Less(i, j int) bool { return d[i] < d[j] }
+func (d durationSort) Swap(i, j int)      { d[i], d[j] = d[j], d[i] }
+
 // ServeHTTP dumps all of the MonitorStore's keys and values to the requester.
 // This method allows a MonitorStore to be registered as an HTTP handler.
 func (s *MonitorStore) ServeHTTP(w http.ResponseWriter, req *http.Request) {
 	w.Header().Set("Content-Type", "text/plain")
+
+	if strings.HasSuffix(req.URL.Path, "running") {
+		s.Running(func(name string, current []*TaskCtx) {
+			fmt.Fprintf(w, "%s - %d tasks\n", name, len(current))
+			durs := make([]time.Duration, 0, len(current))
+			for _, task := range current {
+				durs = append(durs, task.ElapsedTime())
+			}
+			sort.Sort(sort.Reverse(durationSort(durs)))
+			for _, dur := range durs {
+				fmt.Fprintf(w, "\t%s\n", dur)
+			}
+		})
+		return
+	}
+
 	s.Stats(func(name string, val float64) {
 		fmt.Fprintf(w, "%s\t%f\n", name, val)
 	})
diff --git a/monitor.go b/monitor.go
@@ -41,6 +41,11 @@ type Monitor interface {
 	Stats(cb func(name string, val float64))
 }
 
+// RunningTasksCollector keeps track of tasks that are currently in process.
+type RunningTasksCollector interface {
+	Running(cb func(name string, current []*TaskCtx))
+}
+
 // DataCollection is the basic key/vector interface. Anything that implements
 // the DataCollection interface can be connected to the monitor system for
 // later processing.
@@ -75,6 +80,11 @@ func sortedStringKeys(snapshot map[interface{}]interface{}) []string {
 // Stats calls cb with all the statistics registered on the default store.
 func Stats(cb func(name string, val float64)) { DefaultStore.Stats(cb) }
 
+// Running calls cb with lists of currently running tasks by name.
+func Running(cb func(name string, current []*TaskCtx)) {
+	DefaultStore.Running(cb)
+}
+
 // Datapoints calls cb with all the datasets registered on the default store.
 func Datapoints(reset bool, cb func(name string, data [][]float64, total uint64,
 	clipped bool, fraction float64)) {
diff --git a/store.go b/store.go
@@ -43,6 +43,17 @@ func (s *MonitorStore) Stats(cb func(name string, val float64)) {
 	}
 }
 
+// Running collects lists of all running tasks by name
+func (s *MonitorStore) Running(cb func(name string, current []*TaskCtx)) {
+	snapshot := s.groups.Snapshot()
+	for _, name := range sortedStringKeys(snapshot) {
+		cache_val := snapshot[name]
+		if mon, ok := cache_val.(RunningTasksCollector); ok {
+			mon.Running(cb)
+		}
+	}
+}
+
 // Datapoints conforms to the DataCollection interface
 func (s *MonitorStore) Datapoints(reset bool, cb func(name string,
 	data [][]float64, total uint64, clipped bool, fraction float64)) {
@@ -83,3 +94,5 @@ func (s *MonitorStore) GetMonitorsNamed(group_name string) *MonitorGroup {
 func (s *MonitorStore) GetMonitors() *MonitorGroup {
 	return s.GetMonitorsNamed(PackageName())
 }
+
+var _ RunningTasksCollector = (*MonitorStore)(nil)
diff --git a/tasks.go b/tasks.go
@@ -16,6 +16,9 @@ package monitor
 
 import (
 	"sync"
+	"time"
+
+	"github.com/spacemonkeygo/monotime"
 )
 
 // TaskMonitor is a type for keeping track of tasks. A TaskMonitor will keep
@@ -39,14 +42,26 @@ type TaskMonitor struct {
 	total_timing    *IntValueMonitor
 	errors          map[string]uint64
 	panics          uint64
+	running         map[*TaskCtx]bool
 }
 
 // NewTaskMonitor returns a new TaskMonitor. You probably want to create
 // a TaskMonitor using MonitorGroup.Task instead.
 func NewTaskMonitor() *TaskMonitor {
 	return &TaskMonitor{
-		errors:         make(map[string]uint64),
 		success_timing: NewIntValueMonitor(),
 		error_timing:   NewIntValueMonitor(),
-		total_timing:   NewIntValueMonitor()}
+		total_timing:   NewIntValueMonitor(),
+		errors:         make(map[string]uint64),
+		running:        make(map[*TaskCtx]bool)}
+}
+
+// TaskCtx keeps track of a task as it is running.
+type TaskCtx struct {
+	start   time.Duration
+	monitor *TaskMonitor
+}
+
+func (t TaskCtx) ElapsedTime() time.Duration {
+	return monotime.Monotonic() - t.start
 }
diff --git a/tasks_disabled.go b/tasks_disabled.go
@@ -21,6 +21,6 @@ func (t *TaskMonitor) Stats(cb func(name string, val float64)) {}
 func (t *TaskMonitor) Start() func(*error)  { return func(*error) {} }
 func (t *TaskMonitor) NewContext() *TaskCtx { return &TaskCtx{} }
 
-type TaskCtx struct{}
-
 func (c *TaskCtx) Finish(err_ref *error, rec interface{}) {}
+
+func (t *TaskMonitor) Running() (rv []*TaskCtx) { return nil }
diff --git a/tasks_enabled.go b/tasks_enabled.go
@@ -19,7 +19,6 @@ package monitor
 import (
 	"fmt"
 	"sort"
-	"time"
 
 	"github.com/spacemonkeygo/errors"
 	"github.com/spacemonkeygo/monotime"
@@ -30,12 +29,6 @@ const (
 	microsecondInNanoseconds = 1000
 )
 
-// TaskCtx keeps track of a task as it is running.
-type TaskCtx struct {
-	start   time.Duration
-	monitor *TaskMonitor
-}
-
 // Start is a helper method for watching a task in a less error-prone way.
 // Managing a task context yourself is tricky to get right - recover only works
 // in deferred methods. Call out of a method that was deferred and it no longer
@@ -48,14 +41,16 @@ func (t *TaskMonitor) Start() func(*error) {
 // NewContext creates a new context that is watching a live task. See Start
 // or MonitorGroup.Task
 func (t *TaskMonitor) NewContext() *TaskCtx {
+	c := &TaskCtx{start: monotime.Monotonic(), monitor: t}
 	t.mtx.Lock()
 	t.current += 1
 	t.total_started += 1
 	if t.current > t.highwater {
 		t.highwater = t.current
 	}
+	t.running[c] = true
 	t.mtx.Unlock()
-	return &TaskCtx{start: monotime.Monotonic(), monitor: t}
+	return c
 }
 
 // Stats conforms to the Monitor interface
@@ -121,7 +116,7 @@ func (t *TaskMonitor) Stats(cb func(name string, val float64)) {
 // Finish will re-panic any recovered panics (provided it wasn't a nil panic)
 // after bookkeeping.
 func (c *TaskCtx) Finish(err_ref *error, rec interface{}) {
-	duration_nanoseconds := int64(monotime.Monotonic() - c.start)
+	duration_nanoseconds := int64(c.ElapsedTime())
 	var error_name string
 	var err error
 	if err_ref != nil {
@@ -151,6 +146,7 @@ func (c *TaskCtx) Finish(err_ref *error, rec interface{}) {
 	c.monitor.mtx.Lock()
 	c.monitor.current -= 1
 	c.monitor.total_completed += 1
+	delete(c.monitor.running, c)
 	if err != nil {
 		c.monitor.errors[error_name] += 1
 		if rec != nil {
@@ -170,3 +166,17 @@ func (c *TaskCtx) Finish(err_ref *error, rec interface{}) {
 		panic(rec)
 	}
 }
+
+// Running returns a list of tasks that are currently running. Each TaskCtx
+// can tell how long it's been since the task was started, though keep in mind
+// that the task might finish between calling (*TaskMonitor).Running() and
+// (*TaskCtx).ElapsedTime()
+func (t *TaskMonitor) Running() (rv []*TaskCtx) {
+	t.mtx.Lock()
+	rv = make([]*TaskCtx, 0, len(t.running))
+	for task_ctx := range t.running {
+		rv = append(rv, task_ctx)
+	}
+	t.mtx.Unlock()
+	return rv
+}

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,8 @@ import (`
`22`	`22`
`23`	`23`	`func (g *MonitorGroup) Stats(cb func(name string, val float64)) {}`
`24`	`24`
	`25`	`+func (g MonitorGroup) Running(cb func(name string, current []TaskCtx)) {}`
	`26`	`+`
`25`	`27`	`func (g *MonitorGroup) Datapoints(reset bool, cb func(name string,`
`26`	`28`	`data [][]float64, total uint64, clipped bool, fraction float64)) {`
`27`	`29`	`}`