// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package trace
import (
"container/heap"
"math"
"sort"
"strings"
"time"
)
// MutatorUtil is a change in mutator utilization at a particular
// time. Mutator utilization functions are represented as a
// time-ordered []MutatorUtil.
type MutatorUtil struct {
Time int64
// Util is the mean mutator utilization starting at Time. This
// is in the range [0, 1].
Util float64
}
// UtilFlags controls the behavior of MutatorUtilization.
type UtilFlags int
const (
// UtilSTW means utilization should account for STW events.
UtilSTW UtilFlags = 1 << iota
// UtilBackground means utilization should account for
// background mark workers.
UtilBackground
// UtilAssist means utilization should account for mark
// assists.
UtilAssist
// UtilSweep means utilization should account for sweeping.
UtilSweep
// UtilPerProc means each P should be given a separate
// utilization function. Otherwise, there is a single function
// and each P is given a fraction of the utilization.
UtilPerProc
)
// MutatorUtilization returns a set of mutator utilization functions
// for the given trace. Each function will always end with 0
// utilization. The bounds of each function are implicit in the first
// and last event; outside of these bounds each function is undefined.
//
// If the UtilPerProc flag is not given, this always returns a single
// utilization function. Otherwise, it returns one function per P.
func MutatorUtilization(events []*Event, flags UtilFlags) [][]MutatorUtil {
if len(events) == 0 {
return nil
}
type perP struct {
// gc > 0 indicates that GC is active on this P.
gc int
// series the logical series number for this P. This
// is necessary because Ps may be removed and then
// re-added, and then the new P needs a new series.
series int
}
ps := []perP{}
stw := 0
out := [][]MutatorUtil{}
assists := map[uint64]bool{}
block := map[uint64]*Event{}
bgMark := map[uint64]bool{}
for _, ev := range events {
switch ev.Type {
case EvGomaxprocs:
gomaxprocs := int(ev.Args[0])
if len(ps) > gomaxprocs {
if flags&UtilPerProc != 0 {
// End each P's series.
for _, p := range ps[gomaxprocs:] {
out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, 0})
}
}
ps = ps[:gomaxprocs]
}
for len(ps) < gomaxprocs {
// Start new P's series.
series := 0
if flags&UtilPerProc != 0 || len(out) == 0 {
series = len(out)
out = append(out, []MutatorUtil{{ev.Ts, 1}})
}
ps = append(ps, perP{series: series})
}
case EvGCSTWStart:
if flags&UtilSTW != 0 {
stw++
}
case EvGCSTWDone:
if flags&UtilSTW != 0 {
stw--
}
case EvGCMarkAssistStart:
if flags&UtilAssist != 0 {
ps[ev.P].gc++
assists[ev.G] = true
}
case EvGCMarkAssistDone:
if flags&UtilAssist != 0 {
ps[ev.P].gc--
delete(assists, ev.G)
}
case EvGCSweepStart:
if flags&UtilSweep != 0 {
ps[ev.P].gc++
}
case EvGCSweepDone:
if flags&UtilSweep != 0 {
ps[ev.P].gc--
}
case EvGoStartLabel:
if flags&UtilBackground != 0 && strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" {
// Background mark worker.
//
// If we're in per-proc mode, we don't
// count dedicated workers because
// they kick all of the goroutines off
// that P, so don't directly
// contribute to goroutine latency.
if !(flags&UtilPerProc != 0 && ev.SArgs[0] == "GC (dedicated)") {
bgMark[ev.G] = true
ps[ev.P].gc++
}
}
fallthrough
case EvGoStart:
if assists[ev.G] {
// Unblocked during assist.
ps[ev.P].gc++
}
block[ev.G] = ev.Link
default:
if ev != block[ev.G] {
continue
}
if assists[ev.G] {
// Blocked during assist.
ps[ev.P].gc--
}
if bgMark[ev.G] {
// Background mark worker done.
ps[ev.P].gc--
delete(bgMark, ev.G)
}
delete(block, ev.G)
}
if flags&UtilPerProc == 0 {
// Compute the current average utilization.
if len(ps) == 0 {
continue
}
gcPs := 0
if stw > 0 {
gcPs = len(ps)
} else {
for i := range ps {
if ps[i].gc > 0 {
gcPs++
}
}
}
mu := MutatorUtil{ev.Ts, 1 - float64(gcPs)/float64(len(ps))}
// Record the utilization change. (Since
// len(ps) == len(out), we know len(out) > 0.)
out[0] = addUtil(out[0], mu)
} else {
// Check for per-P utilization changes.
for i := range ps {
p := &ps[i]
util := 1.0
if stw > 0 || p.gc > 0 {
util = 0.0
}
out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, util})
}
}
}
// Add final 0 utilization event to any remaining series. This
// is important to mark the end of the trace. The exact value
// shouldn't matter since no window should extend beyond this,
// but using 0 is symmetric with the start of the trace.
mu := MutatorUtil{events[len(events)-1].Ts, 0}
for i := range ps {
out[ps[i].series] = addUtil(out[ps[i].series], mu)
}
return out
}
func addUtil(util []MutatorUtil, mu MutatorUtil) []MutatorUtil {
if len(util) > 0 {
if mu.Util == util[len(util)-1].Util {
// No change.
return util
}
if mu.Time == util[len(util)-1].Time {
// Take the lowest utilization at a time stamp.
if mu.Util < util[len(util)-1].Util {
util[len(util)-1] = mu
}
return util
}
}
return append(util, mu)
}
// totalUtil is total utilization, measured in nanoseconds. This is a
// separate type primarily to distinguish it from mean utilization,
// which is also a float64.
type totalUtil float64
func totalUtilOf(meanUtil float64, dur int64) totalUtil {
return totalUtil(meanUtil * float64(dur))
}
// mean returns the mean utilization over dur.
func (u totalUtil) mean(dur time.Duration) float64 {
return float64(u) / float64(dur)
}
// An MMUCurve is the minimum mutator utilization curve across
// multiple window sizes.
type MMUCurve struct {
series []mmuSeries
}
type mmuSeries struct {
util []MutatorUtil
// sums[j] is the cumulative sum of util[:j].
sums []totalUtil
// bands summarizes util in non-overlapping bands of duration
// bandDur.
bands []mmuBand
// bandDur is the duration of each band.
bandDur int64
}
type mmuBand struct {
// minUtil is the minimum instantaneous mutator utilization in
// this band.
minUtil float64
// cumUtil is the cumulative total mutator utilization between
// time 0 and the left edge of this band.
cumUtil totalUtil
// integrator is the integrator for the left edge of this
// band.
integrator integrator
}
// NewMMUCurve returns an MMU curve for the given mutator utilization
// function.
func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve {
series := make([]mmuSeries, len(utils))
for i, util := range utils {
series[i] = newMMUSeries(util)
}
return &MMUCurve{series}
}
// bandsPerSeries is the number of bands to divide each series into.
// This is only changed by tests.
var bandsPerSeries = 1000
func newMMUSeries(util []MutatorUtil) mmuSeries {
// Compute cumulative sum.
sums := make([]totalUtil, len(util))
var prev MutatorUtil
var sum totalUtil
for j, u := range util {
sum += totalUtilOf(prev.Util, u.Time-prev.Time)
sums[j] = sum
prev = u
}
// Divide the utilization curve up into equal size
// non-overlapping "bands" and compute a summary for each of
// these bands.
//
// Compute the duration of each band.
numBands := bandsPerSeries
if numBands > len(util) {
// There's no point in having lots of bands if there
// aren't many events.
numBands = len(util)
}
dur := util[len(util)-1].Time - util[0].Time
bandDur := (dur + int64(numBands) - 1) / int64(numBands)
if bandDur < 1 {
bandDur = 1
}
// Compute the bands. There are numBands+1 bands in order to
// record the final cumulative sum.
bands := make([]mmuBand, numBands+1)
s := mmuSeries{util, sums, bands, bandDur}
leftSum := integrator{&s, 0}
for i := range bands {
startTime, endTime := s.bandTime(i)
cumUtil := leftSum.advance(startTime)
predIdx := leftSum.pos
minUtil := 1.0
for i := predIdx; i < len(util) && util[i].Time < endTime; i++ {
minUtil = math.Min(minUtil, util[i].Util)
}
bands[i] = mmuBand{minUtil, cumUtil, leftSum}
}
return s
}
func (s *mmuSeries) bandTime(i int) (start, end int64) {
start = int64(i)*s.bandDur + s.util[0].Time
end = start + s.bandDur
return
}
type bandUtil struct {
// Utilization series index
series int
// Band index
i int
// Lower bound of mutator utilization for all windows
// with a left edge in this band.
utilBound float64
}
type bandUtilHeap []bandUtil
func (h bandUtilHeap) Len() int {
return len(h)
}
func (h bandUtilHeap) Less(i, j int) bool {
return h[i].utilBound < h[j].utilBound
}
func (h bandUtilHeap) Swap(i, j int) {
h[i], h[j] = h[j], h[i]
}
func (h *bandUtilHeap) Push(x interface{}) {
*h = append(*h, x.(bandUtil))
}
func (h *bandUtilHeap) Pop() interface{} {
x := (*h)[len(*h)-1]
*h = (*h)[:len(*h)-1]
return x
}
// UtilWindow is a specific window at Time.
type UtilWindow struct {
Time int64
// MutatorUtil is the mean mutator utilization in this window.
MutatorUtil float64
}
type utilHeap []UtilWindow
func (h utilHeap) Len() int {
return len(h)
}
func (h utilHeap) Less(i, j int) bool {
if h[i].MutatorUtil != h[j].MutatorUtil {
return h[i].MutatorUtil > h[j].MutatorUtil
}
return h[i].Time > h[j].Time
}
func (h utilHeap) Swap(i, j int) {
h[i], h[j] = h[j], h[i]
}
func (h *utilHeap) Push(x interface{}) {
*h = append(*h, x.(UtilWindow))
}
func (h *utilHeap) Pop() interface{} {
x := (*h)[len(*h)-1]
*h = (*h)[:len(*h)-1]
return x
}
// An accumulator takes a windowed mutator utilization function and
// tracks various statistics for that function.
type accumulator struct {
mmu float64
// bound is the mutator utilization bound where adding any
// mutator utilization above this bound cannot affect the
// accumulated statistics.
bound float64
// Worst N window tracking
nWorst int
wHeap utilHeap
// Mutator utilization distribution tracking
mud *mud
// preciseMass is the distribution mass that must be precise
// before accumulation is stopped.
preciseMass float64
// lastTime and lastMU are the previous point added to the
// windowed mutator utilization function.
lastTime int64
lastMU float64
}
// resetTime declares a discontinuity in the windowed mutator
// utilization function by resetting the current time.
func (acc *accumulator) resetTime() {
// This only matters for distribution collection, since that's
// the only thing that depends on the progression of the
// windowed mutator utilization function.
acc.lastTime = math.MaxInt64
}
// addMU adds a point to the windowed mutator utilization function at
// (time, mu). This must be called for monotonically increasing values
// of time.
//
// It returns true if further calls to addMU would be pointless.
func (acc *accumulator) addMU(time int64, mu float64, window time.Duration) bool {
if mu < acc.mmu {
acc.mmu = mu
}
acc.bound = acc.mmu
if acc.nWorst == 0 {
// If the minimum has reached zero, it can't go any
// lower, so we can stop early.
return mu == 0
}
// Consider adding this window to the n worst.
if len(acc.wHeap) < acc.nWorst || mu < acc.wHeap[0].MutatorUtil {
// This window is lower than the K'th worst window.
//
// Check if there's any overlapping window
// already in the heap and keep whichever is
// worse.
for i, ui := range acc.wHeap {
if time+int64(window) > ui.Time && ui.Time+int64(window) > time {
if ui.MutatorUtil <= mu {
// Keep the first window.
goto keep
} else {
// Replace it with this window.
heap.Remove(&acc.wHeap, i)
break
}
}
}
heap.Push(&acc.wHeap, UtilWindow{time, mu})
if len(acc.wHeap) > acc.nWorst {
heap.Pop(&acc.wHeap)
}
keep:
}
if len(acc.wHeap) < acc.nWorst {
// We don't have N windows yet, so keep accumulating.
acc.bound = 1.0
} else {
// Anything above the least worst window has no effect.
acc.bound = math.Max(acc.bound, acc.wHeap[0].MutatorUtil)
}
if acc.mud != nil {
if acc.lastTime != math.MaxInt64 {
// Update distribution.
acc.mud.add(acc.lastMU, mu, float64(time-acc.lastTime))
}
acc.lastTime, acc.lastMU = time, mu
if _, mudBound, ok := acc.mud.approxInvCumulativeSum(); ok {
acc.bound = math.Max(acc.bound, mudBound)
} else {
// We haven't accumulated enough total precise
// mass yet to even reach our goal, so keep
// accumulating.
acc.bound = 1
}
// It's not worth checking percentiles every time, so
// just keep accumulating this band.
return false
}
// If we've found enough 0 utilizations, we can stop immediately.
return len(acc.wHeap) == acc.nWorst && acc.wHeap[0].MutatorUtil == 0
}
// MMU returns the minimum mutator utilization for the given time
// window. This is the minimum utilization for all windows of this
// duration across the execution. The returned value is in the range
// [0, 1].
func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
acc := accumulator{mmu: 1.0, bound: 1.0}
c.mmu(window, &acc)
return acc.mmu
}
// Examples returns n specific examples of the lowest mutator
// utilization for the given window size. The returned windows will be
// disjoint (otherwise there would be a huge number of
// mostly-overlapping windows at the single lowest point). There are
// no guarantees on which set of disjoint windows this returns.
func (c *MMUCurve) Examples(window time.Duration, n int) (worst []UtilWindow) {
acc := accumulator{mmu: 1.0, bound: 1.0, nWorst: n}
c.mmu(window, &acc)
sort.Sort(sort.Reverse(acc.wHeap))
return ([]UtilWindow)(acc.wHeap)
}
// MUD returns mutator utilization distribution quantiles for the
// given window size.
//
// The mutator utilization distribution is the distribution of mean
// mutator utilization across all windows of the given window size in
// the trace.
//
// The minimum mutator utilization is the minimum (0th percentile) of
// this distribution. (However, if only the minimum is desired, it's
// more efficient to use the MMU method.)
func (c *MMUCurve) MUD(window time.Duration, quantiles []float64) []float64 {
if len(quantiles) == 0 {
return []float64{}
}
// Each unrefined band contributes a known total mass to the
// distribution (bandDur except at the end), but in an unknown
// way. However, we know that all the mass it contributes must
// be at or above its worst-case mean mutator utilization.
//
// Hence, we refine bands until the highest desired
// distribution quantile is less than the next worst-case mean
// mutator utilization. At this point, all further
// contributions to the distribution must be beyond the
// desired quantile and hence cannot affect it.
//
// First, find the highest desired distribution quantile.
maxQ := quantiles[0]
for _, q := range quantiles {
if q > maxQ {
maxQ = q
}
}
// The distribution's mass is in units of time (it's not
// normalized because this would make it more annoying to
// account for future contributions of unrefined bands). The
// total final mass will be the duration of the trace itself
// minus the window size. Using this, we can compute the mass
// corresponding to quantile maxQ.
var duration int64
for _, s := range c.series {
duration1 := s.util[len(s.util)-1].Time - s.util[0].Time
if duration1 >= int64(window) {
duration += duration1 - int64(window)
}
}
qMass := float64(duration) * maxQ
// Accumulate the MUD until we have precise information for
// everything to the left of qMass.
acc := accumulator{mmu: 1.0, bound: 1.0, preciseMass: qMass, mud: new(mud)}
acc.mud.setTrackMass(qMass)
c.mmu(window, &acc)
// Evaluate the quantiles on the accumulated MUD.
out := make([]float64, len(quantiles))
for i := range out {
mu, _ := acc.mud.invCumulativeSum(float64(duration) * quantiles[i])
if math.IsNaN(mu) {
// There are a few legitimate ways this can
// happen:
//
// 1. If the window is the full trace
// duration, then the windowed MU function is
// only defined at a single point, so the MU
// distribution is not well-defined.
//
// 2. If there are no events, then the MU
// distribution has no mass.
//
// Either way, all of the quantiles will have
// converged toward the MMU at this point.
mu = acc.mmu
}
out[i] = mu
}
return out
}
func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
if window <= 0 {
acc.mmu = 0
return
}
var bandU bandUtilHeap
windows := make([]time.Duration, len(c.series))
for i, s := range c.series {
windows[i] = window
if max := time.Duration(s.util[len(s.util)-1].Time - s.util[0].Time); window > max {
windows[i] = max
}
bandU1 := bandUtilHeap(s.mkBandUtil(i, windows[i]))
if bandU == nil {
bandU = bandU1
} else {
bandU = append(bandU, bandU1...)
}
}
// Process bands from lowest utilization bound to highest.
heap.Init(&bandU)
// Refine each band into a precise window and MMU until
// refining the next lowest band can no longer affect the MMU
// or windows.
for len(bandU) > 0 && bandU[0].utilBound < acc.bound {
i := bandU[0].series
c.series[i].bandMMU(bandU[0].i, windows[i], acc)
heap.Pop(&bandU)
}
}
func (c *mmuSeries) mkBandUtil(series int, window time.Duration) []bandUtil {
// For each band, compute the worst-possible total mutator
// utilization for all windows that start in that band.
// minBands is the minimum number of bands a window can span
// and maxBands is the maximum number of bands a window can
// span in any alignment.
minBands := int((int64(window) + c.bandDur - 1) / c.bandDur)
maxBands := int((int64(window) + 2*(c.bandDur-1)) / c.bandDur)
if window > 1 && maxBands < 2 {
panic("maxBands < 2")
}
tailDur := int64(window) % c.bandDur
nUtil := len(c.bands) - maxBands + 1
if nUtil < 0 {
nUtil = 0
}
bandU := make([]bandUtil, nUtil)
for i := range bandU {
// To compute the worst-case MU, we assume the minimum
// for any bands that are only partially overlapped by
// some window and the mean for any bands that are
// completely covered by all windows.
var util totalUtil
// Find the lowest and second lowest of the partial
// bands.
l := c.bands[i].minUtil
r1 := c.bands[i+minBands-1].minUtil
r2 := c.bands[i+maxBands-1].minUtil
minBand := math.Min(l, math.Min(r1, r2))
// Assume the worst window maximally overlaps the
// worst minimum and then the rest overlaps the second
// worst minimum.
if minBands == 1 {
util += totalUtilOf(minBand, int64(window))
} else {
util += totalUtilOf(minBand, c.bandDur)
midBand := 0.0
switch {
case minBand == l:
midBand = math.Min(r1, r2)
case minBand == r1:
midBand = math.Min(l, r2)
case minBand == r2:
midBand = math.Min(l, r1)
}
util += totalUtilOf(midBand, tailDur)
}
// Add the total mean MU of bands that are completely
// overlapped by all windows.
if minBands > 2 {
util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil
}
bandU[i] = bandUtil{series, i, util.mean(window)}
}
return bandU
}
// bandMMU computes the precise minimum mutator utilization for
// windows with a left edge in band bandIdx.
func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator) {
util := c.util
// We think of the mutator utilization over time as the
// box-filtered utilization function, which we call the
// "windowed mutator utilization function". The resulting
// function is continuous and piecewise linear (unless
// window==0, which we handle elsewhere), where the boundaries
// between segments occur when either edge of the window
// encounters a change in the instantaneous mutator
// utilization function. Hence, the minimum of this function
// will always occur when one of the edges of the window
// aligns with a utilization change, so these are the only
// points we need to consider.
//
// We compute the mutator utilization function incrementally
// by tracking the integral from t=0 to the left edge of the
// window and to the right edge of the window.
left := c.bands[bandIdx].integrator
right := left
time, endTime := c.bandTime(bandIdx)
if utilEnd := util[len(util)-1].Time - int64(window); utilEnd < endTime {
endTime = utilEnd
}
acc.resetTime()
for {
// Advance edges to time and time+window.
mu := (right.advance(time+int64(window)) - left.advance(time)).mean(window)
if acc.addMU(time, mu, window) {
break
}
if time == endTime {
break
}
// The maximum slope of the windowed mutator
// utilization function is 1/window, so we can always
// advance the time by at least (mu - mmu) * window
// without dropping below mmu.
minTime := time + int64((mu-acc.bound)*float64(window))
// Advance the window to the next time where either
// the left or right edge of the window encounters a
// change in the utilization curve.
if t1, t2 := left.next(time), right.next(time+int64(window))-int64(window); t1 < t2 {
time = t1
} else {
time = t2
}
if time < minTime {
time = minTime
}
if time >= endTime {
// For MMUs we could stop here, but for MUDs
// it's important that we span the entire
// band.
time = endTime
}
}
}
// An integrator tracks a position in a utilization function and
// integrates it.
type integrator struct {
u *mmuSeries
// pos is the index in u.util of the current time's non-strict
// predecessor.
pos int
}
// advance returns the integral of the utilization function from 0 to
// time. advance must be called on monotonically increasing values of
// times.
func (in *integrator) advance(time int64) totalUtil {
util, pos := in.u.util, in.pos
// Advance pos until pos+1 is time's strict successor (making
// pos time's non-strict predecessor).
//
// Very often, this will be nearby, so we optimize that case,
// but it may be arbitrarily far away, so we handled that
// efficiently, too.
const maxSeq = 8
if pos+maxSeq < len(util) && util[pos+maxSeq].Time > time {
// Nearby. Use a linear scan.
for pos+1 < len(util) && util[pos+1].Time <= time {
pos++
}
} else {
// Far. Binary search for time's strict successor.
l, r := pos, len(util)
for l < r {
h := int(uint(l+r) >> 1)
if util[h].Time <= time {
l = h + 1
} else {
r = h
}
}
pos = l - 1 // Non-strict predecessor.
}
in.pos = pos
var partial totalUtil
if time != util[pos].Time {
partial = totalUtilOf(util[pos].Util, time-util[pos].Time)
}
return in.u.sums[pos] + partial
}
// next returns the smallest time t' > time of a change in the
// utilization function.
func (in *integrator) next(time int64) int64 {
for _, u := range in.u.util[in.pos:] {
if u.Time > time {
return u.Time
}
}
return 1<<63 - 1
}