From cbfee7092e39e528d40c762a001f7e4e51cd42c4 Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Thu, 20 Jun 2024 16:07:57 +0200 Subject: [PATCH] pkg/fuzzer/throttler: avoid repetitive crashes Track the crash rate for the individual syscalls and rate limit the execution of those which happen too often. For determining the most unsafe syscalls, let's keep a sliding window of the last executed progs on every instance. * If a program has been evicted from the sliding window, it's safe enough. * If an instance crashed, all programs in the window are under suspicion. For preventing the execution of banned syscalls, let's add a special (skip) call parameter that is understood by the executor. The parameter is not supposed to leak into the corpus, so add appropriate assertions. --- executor/executor.cc | 58 ++--- pkg/corpus/corpus.go | 14 ++ pkg/csource/csource.go | 4 + pkg/fuzzer/fuzzer.go | 27 ++- pkg/fuzzer/fuzzer_test.go | 26 +++ pkg/fuzzer/queue/queue.go | 8 +- pkg/fuzzer/queue/retry.go | 4 +- pkg/fuzzer/throttler/wrapper.go | 334 +++++++++++++++++++++++++++ pkg/fuzzer/throttler/wrapper_test.go | 115 +++++++++ prog/encoding_test.go | 10 +- prog/encodingexec_test.go | 20 +- prog/prog.go | 1 + syz-manager/http.go | 35 ++- syz-manager/manager.go | 16 +- syz-manager/rpc.go | 14 +- 15 files changed, 629 insertions(+), 57 deletions(-) create mode 100644 pkg/fuzzer/throttler/wrapper.go create mode 100644 pkg/fuzzer/throttler/wrapper_test.go diff --git a/executor/executor.cc b/executor/executor.cc index cb2245c69fa6..8e007a4aefb5 100644 --- a/executor/executor.cc +++ b/executor/executor.cc @@ -1236,34 +1236,38 @@ void execute_call(thread_t* th) th->soft_fail_state = true; } - if (flag_coverage) - cover_reset(&th->cov); - // For pseudo-syscalls and user-space functions NONFAILING can abort before assigning to th->res. - // Arrange for res = -1 and errno = EFAULT result for such case. - th->res = -1; - errno = EFAULT; - NONFAILING(th->res = execute_syscall(call, th->args)); - th->reserrno = errno; - // Our pseudo-syscalls may misbehave. - if ((th->res == -1 && th->reserrno == 0) || call->attrs.ignore_return) - th->reserrno = EINVAL; - // Reset the flag before the first possible fail(). - th->soft_fail_state = false; - - if (flag_coverage) { - cover_collect(&th->cov); - if (th->cov.size >= kCoverSize) - failmsg("too much cover", "thr=%d, cov=%u", th->id, th->cov.size); - } - th->fault_injected = false; + if (th->call_props.skip > 0) { + th->reserrno = ENOSYS; + } else { + if (flag_coverage) + cover_reset(&th->cov); + // For pseudo-syscalls and user-space functions NONFAILING can abort before assigning to th->res. + // Arrange for res = -1 and errno = EFAULT result for such case. + th->res = -1; + errno = EFAULT; + NONFAILING(th->res = execute_syscall(call, th->args)); + th->reserrno = errno; + // Our pseudo-syscalls may misbehave. + if ((th->res == -1 && th->reserrno == 0) || call->attrs.ignore_return) + th->reserrno = EINVAL; + // Reset the flag before the first possible fail(). + th->soft_fail_state = false; + + if (flag_coverage) { + cover_collect(&th->cov); + if (th->cov.size >= kCoverSize) + failmsg("too much cover", "thr=%d, cov=%u", th->id, th->cov.size); + } + th->fault_injected = false; - if (th->call_props.fail_nth > 0) - th->fault_injected = fault_injected(fail_fd); + if (th->call_props.fail_nth > 0) + th->fault_injected = fault_injected(fail_fd); - // If required, run the syscall some more times. - // But let's still return res, errno and coverage from the first execution. - for (int i = 0; i < th->call_props.rerun; i++) - NONFAILING(execute_syscall(call, th->args)); + // If required, run the syscall some more times. + // But let's still return res, errno and coverage from the first execution. + for (int i = 0; i < th->call_props.rerun; i++) + NONFAILING(execute_syscall(call, th->args)); + } debug("#%d [%llums] <- %s=0x%llx", th->id, current_time_ms() - start_time_ms, call->name, (uint64)th->res); @@ -1275,6 +1279,8 @@ void execute_call(thread_t* th) debug(" fault=%d", th->fault_injected); if (th->call_props.rerun > 0) debug(" rerun=%d", th->call_props.rerun); + if (th->call_props.skip > 0) + debug(" skipped"); debug("\n"); } diff --git a/pkg/corpus/corpus.go b/pkg/corpus/corpus.go index 5b2b9983f458..e6dce8f1d818 100644 --- a/pkg/corpus/corpus.go +++ b/pkg/corpus/corpus.go @@ -90,6 +90,8 @@ type NewItemEvent struct { } func (corpus *Corpus) Save(inp NewInput) { + validateCorpusProg(inp.Prog) + progData := inp.Prog.Serialize() sig := hash.String(progData) @@ -150,6 +152,18 @@ func (corpus *Corpus) Save(inp NewInput) { } } } + +func validateCorpusProg(p *prog.Prog) { + for _, call := range p.Calls { + if call.Props.Async { + panic("attempting to save a Async=true prog to corpus") + } + if call.Props.Skip { + panic("attempting to save a Skip=true prog to corpus") + } + } +} + func (corpus *Corpus) Signal() signal.Signal { corpus.mu.RLock() defer corpus.mu.RUnlock() diff --git a/pkg/csource/csource.go b/pkg/csource/csource.go index 198ddf7df4a5..b40c168eac02 100644 --- a/pkg/csource/csource.go +++ b/pkg/csource/csource.go @@ -253,6 +253,10 @@ func (ctx *context) generateCalls(p prog.ExecProg, trace bool) ([]string, []uint var calls []string csumSeq := 0 for ci, call := range p.Calls { + if call.Props.Skip { + continue + } + w := new(bytes.Buffer) // Copyin. for _, copyin := range call.Copyin { diff --git a/pkg/fuzzer/fuzzer.go b/pkg/fuzzer/fuzzer.go index 92d8b0f8d2e8..4ee8b7123f41 100644 --- a/pkg/fuzzer/fuzzer.go +++ b/pkg/fuzzer/fuzzer.go @@ -131,12 +131,15 @@ func (fuzzer *Fuzzer) processResult(req *queue.Request, res *queue.Result, flags fuzzer.triageProgCall(req.Prog, res.Info.Extra, -1, &triage) if len(triage) != 0 { + p := req.Prog.Clone() + triage = removeSkippedCalls(p, triage) + queue, stat := fuzzer.triageQueue, fuzzer.statJobsTriage if flags&progCandidate > 0 { queue, stat = fuzzer.triageCandidateQueue, fuzzer.statJobsTriageCandidate } fuzzer.startJob(stat, &triageJob{ - p: req.Prog.Clone(), + p: p, flags: flags, queue: queue.Append(), calls: triage, @@ -150,7 +153,7 @@ func (fuzzer *Fuzzer) processResult(req *queue.Request, res *queue.Result, flags // Corpus candidates may have flaky coverage, so we give them a second chance. maxCandidateAttempts := 3 - if req.Risky() { + if req.Risky { maxCandidateAttempts = 2 } if len(triage) == 0 && flags&ProgFromCorpus != 0 && attempt < maxCandidateAttempts { @@ -163,6 +166,26 @@ func (fuzzer *Fuzzer) processResult(req *queue.Request, res *queue.Result, flags return true } +func removeSkippedCalls(p *prog.Prog, triage map[int]*triageCall) map[int]*triageCall { + ret := map[int]*triageCall{} + if info := triage[-1]; info != nil { + ret[-1] = info + } + oldPos := 0 + for i := 0; i < len(p.Calls); oldPos++ { + if p.Calls[i].Props.Skip { + p.RemoveCall(i) + continue + } + info := triage[oldPos] + if info != nil { + ret[i] = info + } + i++ + } + return ret +} + type Config struct { Debug bool Corpus *corpus.Corpus diff --git a/pkg/fuzzer/fuzzer_test.go b/pkg/fuzzer/fuzzer_test.go index 206469fda648..3e36a3cb48a9 100644 --- a/pkg/fuzzer/fuzzer_test.go +++ b/pkg/fuzzer/fuzzer_test.go @@ -181,6 +181,32 @@ func TestRotate(t *testing.T) { assert.Equal(t, 700, minus.Len()) } +func TestRemoveSkippedCalls(t *testing.T) { + target, err := prog.GetTarget(targets.TestOS, targets.TestArch64Fuzz) + if err != nil { + t.Fatal(err) + } + p, err := target.Deserialize([]byte(` +serialize0(&AUTO) (skip) +serialize1(&AUTO) +serialize2(&AUTO) (skip) +serialize3(&AUTO) +`), prog.NonStrict) + if err != nil { + t.Fatal(err) + } + mm := map[int]*triageCall{ + 1: {}, + } + newMm := removeSkippedCalls(p, mm) + assert.Len(t, newMm, 1) + assert.NotNil(t, newMm[0]) + + assert.Len(t, p.Calls, 2) + assert.Equal(t, "serialize1", p.Calls[0].Meta.Name) + assert.Equal(t, "serialize3", p.Calls[1].Meta.Name) +} + // Based on the example from Go documentation. var crc32q = crc32.MakeTable(0xD5828281) diff --git a/pkg/fuzzer/queue/queue.go b/pkg/fuzzer/queue/queue.go index 46df9d234b7a..a9a8f9d51e01 100644 --- a/pkg/fuzzer/queue/queue.go +++ b/pkg/fuzzer/queue/queue.go @@ -42,6 +42,9 @@ type Request struct { // Important requests will be retried even from crashed VMs. Important bool + // Risky requests will not be retried, even if they are important. + Risky bool + // The callback will be called on request completion in the LIFO order. // If it returns false, all further processing will be stopped. // It allows wrappers to intercept Done() requests. @@ -95,11 +98,6 @@ func (r *Request) Wait(ctx context.Context) *Result { } } -// Risky() returns true if there's a substantial risk of the input crashing the VM. -func (r *Request) Risky() bool { - return r.onceCrashed -} - func (r *Request) Validate() error { collectSignal := r.ExecOpts.ExecFlags&flatrpc.ExecFlagCollectSignal > 0 if len(r.ReturnAllSignal) != 0 && !collectSignal { diff --git a/pkg/fuzzer/queue/retry.go b/pkg/fuzzer/queue/retry.go index c59a2c0481a1..416ad8f97da6 100644 --- a/pkg/fuzzer/queue/retry.go +++ b/pkg/fuzzer/queue/retry.go @@ -34,8 +34,8 @@ func (r *retryer) done(req *Request, res *Result) bool { return false } // Retry important requests from crashed VMs once. - if res.Status == Crashed && req.Important && !req.onceCrashed { - req.onceCrashed = true + if res.Status == Crashed && req.Important && !req.Risky { + req.Risky = true r.pq.Submit(req) return false } diff --git a/pkg/fuzzer/throttler/wrapper.go b/pkg/fuzzer/throttler/wrapper.go new file mode 100644 index 000000000000..3b6b91000aa1 --- /dev/null +++ b/pkg/fuzzer/throttler/wrapper.go @@ -0,0 +1,334 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +// The throttler package implements the functionality to rate limit the repetitive kernel crashes. + +package throttler + +import ( + "sort" + "sync" + "sync/atomic" + + "github.com/google/syzkaller/pkg/fuzzer/queue" + "github.com/google/syzkaller/pkg/stats" + "github.com/google/syzkaller/prog" + _ "github.com/google/syzkaller/sys/targets" +) + +type WrapperObject struct { + base queue.Source + syscalls []perSyscall + + disableThreshold float64 + targetCrashRate float64 + maxDisabled int + windowSize int + newPolicyExecs int + newPolicyCrashes int + + statRiskyExecs *stats.Val + statDeniedExecs *stats.Val + statImportantExecs *stats.Val + statThrottledCalls *stats.Val + + mu sync.Mutex + + // Policy is "syscall" -> "execute it no more often than once in N executions". + policy map[int]int + policyExecs int + policyCrashes int + + counter int64 // number of executed programs. + nextUnsafe int64 +} + +type perSyscall struct { + crashed atomic.Int64 + denied atomic.Int64 + lastExec int64 + + rate weightedAvg +} + +func Wrapper(baseSource queue.Source, syscalls []*prog.Syscall) *WrapperObject { + maxID := 0 + for _, call := range syscalls { + maxID = max(maxID, call.ID) + } + + ret := &WrapperObject{ + base: baseSource, + + // Start disabling syscalls after the risk exceeds 3%. + disableThreshold: 0.03, + // Disable no more than 10% of syscalls. + maxDisabled: max(1, len(syscalls)/10), + // Sample dangerous calls to the 0.1% crash risk. + targetCrashRate: 0.0005, + windowSize: 20, + // Regenerate the policy every 10k requests or every 10 new crashes. + newPolicyExecs: 10000, + newPolicyCrashes: 10, + + syscalls: make([]perSyscall, maxID+1), + statDeniedExecs: stats.Create("denied execs", "Risky calls denied from execution", + stats.Rate{}, stats.StackedGraph("throttle")), + statRiskyExecs: stats.Create("risky execs", "Risky calls executed (exploration)", + stats.Rate{}, stats.StackedGraph("throttle")), + statImportantExecs: stats.Create("important risky execs", "Risky calls in important execs", + stats.Rate{}, stats.StackedGraph("throttle")), + } + ret.statThrottledCalls = stats.Create("throttled syscalls", + "The number of syscalls blocked by the current policy", + stats.Graph("throttled syscalls"), + func() int { + ret.mu.Lock() + defer ret.mu.Unlock() + return len(ret.policy) + }) + return ret +} + +func (w *WrapperObject) Next() *queue.Request { + req := w.base.Next() + if req == nil { + return nil + } + if req.Prog == nil { + // Ignore binaries executions. + return req + } + + w.updatePolicy() + + w.mu.Lock() + defer w.mu.Unlock() + + w.policyExecs++ + w.counter++ + + for _, call := range req.Prog.Calls { + id := call.Meta.ID + onceIn := int64(w.policy[id]) + if onceIn == 0 { + continue + } + + req.Risky = true + if req.Important { + // Don't patch important requests. + w.statImportantExecs.Add(1) + continue + } + + mayRisk := w.nextUnsafe < w.counter + + // Demand that: + // 1. In general, we execute an unsafe program no more often than once per the window. + // 2. The individual rate limits are obeyed. + + info := &w.syscalls[id] + if mayRisk && (info.lastExec == w.counter || w.counter-info.lastExec > onceIn) { + w.nextUnsafe = max(w.nextUnsafe, w.counter+int64(w.windowSize+1)) + info.lastExec = w.counter + w.statRiskyExecs.Add(1) + } else { + call.Props.Skip = true + w.syscalls[id].denied.Add(1) + w.statDeniedExecs.Add(1) + } + } + return req +} + +func (w *WrapperObject) updatePolicy() { + w.mu.Lock() + update := false + if w.policyCrashes > w.newPolicyCrashes { + update = true + w.policyCrashes = 0 + } + if w.policyExecs > w.newPolicyExecs { + update = true + w.policyExecs = 0 + } + w.mu.Unlock() + if update { + policy := w.generatePolicy() + w.mu.Lock() + w.policy = policy + w.mu.Unlock() + } +} + +func (w *WrapperObject) generatePolicy() map[int]int { + type record struct { + id int + prob float64 + } + rates := []record{} + for id := range w.syscalls { + info := &w.syscalls[id] + crashed := float64(info.crashed.Load()) + if crashed < 5 { + // We consider everything below as still too noisy. + continue + } + // Only consider the syscalls that cause a high enough crash rate. + prob := info.rate.Get() + if prob > w.disableThreshold { + rates = append(rates, record{id, prob}) + } + } + sort.Slice(rates, func(i, j int) bool { + return rates[i].prob > rates[j].prob + }) + + newPolicy := map[int]int{} + for _, obj := range rates[:min(len(rates), w.maxDisabled)] { + newPolicy[obj.id] = max(w.windowSize, int(obj.prob/w.targetCrashRate)) + } + return newPolicy +} + +func (w *WrapperObject) recordCalls(calls []*prog.Syscall, crashed bool) { + if len(calls) == 0 { + return + } + + w.mu.Lock() + policy := w.policy + if crashed { + w.policyCrashes++ + } + w.mu.Unlock() + + onlyDisabled := false + if crashed { + // If we crashed and there have been already executed disabled calls, + // only update the information for them. + known := 0 + for _, call := range calls { + if policy[call.ID] > 0 { + onlyDisabled = true + known++ + } + } + } + for _, call := range calls { + if onlyDisabled && policy[call.ID] == 0 { + continue + } + info := &w.syscalls[call.ID] + if crashed { + info.rate.Save(1.0) + info.crashed.Add(1) + } else { + info.rate.Save(0) + } + } +} + +func (w *WrapperObject) InstanceMonitor() InstanceMonitor { + return &callTracker{ + window: make([][]*prog.Syscall, w.windowSize), + record: w.recordCalls, + } +} + +type CallInfo struct { + Crashed int64 + Denied int64 + CrashRate float64 + Throttled bool +} + +func (w *WrapperObject) Info(call *prog.Syscall) CallInfo { + w.mu.Lock() + defer w.mu.Unlock() + data := &w.syscalls[call.ID] + return CallInfo{ + Crashed: data.crashed.Load(), + Denied: data.denied.Load(), + CrashRate: data.rate.Get(), + Throttled: w.policy[call.ID] > 0, + } +} + +type InstanceMonitor interface { + // Record() is assumed to get called once an instance starts executing a program. + Record(req *queue.Request) + // Shutdown() is expected to be called as soon as possible after the VM has crashed. + Shutdown(crashed bool) +} + +// callTracker is a sliding window of the last executed syscalls. +// Due to delayed crashed, we only consider a program to be unharmful after +// the whole window has passed afterwards. +type callTracker struct { + mu sync.Mutex + pos int + window [][]*prog.Syscall + record func(calls []*prog.Syscall, crashed bool) +} + +func (t *callTracker) Record(req *queue.Request) { + if req.Prog == nil { + return + } + calls := make([]*prog.Syscall, 0, len(req.Prog.Calls)) + for _, info := range req.Prog.Calls { + if !info.Props.Skip { + calls = append(calls, info.Meta) + } + } + var prev []*prog.Syscall + t.mu.Lock() + prev = t.window[t.pos] + t.window[t.pos] = calls + t.pos = (t.pos + 1) % len(t.window) + t.mu.Unlock() + + t.record(prev, false) +} + +func (t *callTracker) Shutdown(crashed bool) { + if !crashed { + return + } + m := map[*prog.Syscall]struct{}{} + for _, calls := range t.window { + for _, call := range calls { + m[call] = struct{}{} + } + } + var merged []*prog.Syscall + for call := range m { + merged = append(merged, call) + } + t.record(merged, true) +} + +// Until we need to configure it, let it stay const. +const weightedAvgStep = 0.005 + +type weightedAvg struct { + mu sync.Mutex + total int64 + val float64 +} + +func (wa *weightedAvg) Save(val float64) { + wa.mu.Lock() + defer wa.mu.Unlock() + wa.total++ + step := max(weightedAvgStep, 1.0/float64(wa.total)) + wa.val += (val - wa.val) * step +} + +func (wa *weightedAvg) Get() float64 { + wa.mu.Lock() + defer wa.mu.Unlock() + return wa.val +} diff --git a/pkg/fuzzer/throttler/wrapper_test.go b/pkg/fuzzer/throttler/wrapper_test.go new file mode 100644 index 000000000000..1ea02cb0f161 --- /dev/null +++ b/pkg/fuzzer/throttler/wrapper_test.go @@ -0,0 +1,115 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package throttler + +import ( + "math/rand" + "testing" + + "github.com/google/syzkaller/pkg/fuzzer/queue" + "github.com/google/syzkaller/pkg/testutil" + "github.com/google/syzkaller/prog" + _ "github.com/google/syzkaller/sys" + "github.com/google/syzkaller/sys/targets" + "github.com/stretchr/testify/assert" +) + +func TestThrottler(t *testing.T) { + target, err := prog.GetTarget(targets.TestOS, targets.TestArch64) + if err != nil { + t.Fatal(err) + } + + rs := testutil.RandSource(t) + + var calls []*prog.Syscall + bad := map[*prog.Syscall]bool{} + for i, call := range target.Syscalls { + calls = append(calls, call) + if i <= 2 { + bad[call] = true + } + if i == 40 { + break + } + } + + rnd := rand.New(rs) + + // Generate random progs. + limit := 100000 + base := queue.Callback(func() *queue.Request { + limit-- + if limit < 0 { + return nil + } + return &queue.Request{ + Prog: &prog.Prog{ + Calls: []*prog.Call{ + { + Meta: calls[rnd.Intn(len(calls))], + }, + }, + }, + } + }) + + wrapper := Wrapper(base, calls) + crashes := 0 +outer: + for { + monitor := wrapper.InstanceMonitor() + crashIn := 10000 + count := 0 + for { + count++ + req := wrapper.Next() + if req == nil { + break outer + } + monitor.Record(req) + if crashIn == 0 { + monitor.Shutdown(true) + crashes++ + t.Logf("crashed after %d progs", count) + break + } + crashIn-- + call := req.Prog.Calls[0] + // Fail bad calls with a 33% probability, but delay it. + if !call.Props.Skip && bad[call.Meta] && rnd.Intn(3) == 0 { + crashIn = min(crashIn, rnd.Intn(10)) + } + } + } + + t.Logf("total crashes: %v", crashes) + t.Logf("total risky calls run: %v", wrapper.statRiskyExecs.Val()) + totalDenied := wrapper.statDeniedExecs.Val() + t.Logf("total denied calls: %v", totalDenied) + + var badDenials int64 + for _, call := range calls { + info := wrapper.Info(call) + typ := "good" + if bad[call] { + typ = "bad" + badDenials += info.Denied + } + if info.Throttled { + typ += " [IN POLICY]" + } + assert.Equal(t, bad[call], info.Throttled, "all & only bad calls must be throttled") + t.Logf("[%d] %s call %s: %d crashed (%.2f%%), %d denied", + call.ID, typ, call.Name, info.Crashed, + info.CrashRate*100.0, info.Denied) + } + + // There can be 100000*(1/3)*(3/40) > 2.5k crashes if the algorithm did not work. + // Let's consider it a success if it finished with less than 333. + assert.Less(t, crashes, 333) + + // Also, demand that >2/3 of all execution denials happened on the 3 bad calls. + assert.Greater(t, badDenials, int64(totalDenied*2/3)) +} diff --git a/prog/encoding_test.go b/prog/encoding_test.go index 24c479948b39..b4e42a07bcc1 100644 --- a/prog/encoding_test.go +++ b/prog/encoding_test.go @@ -471,7 +471,7 @@ func TestSerializeCallProps(t *testing.T) { }, { "serialize0(0x0) (fail_nth: 5)\n", - []CallProps{{5, false, 0}}, + []CallProps{{5, false, 0, false}}, }, { "serialize0(0x0) (fail_nth)\n", @@ -483,11 +483,15 @@ func TestSerializeCallProps(t *testing.T) { }, { "serialize0(0x0) (async)\n", - []CallProps{{0, true, 0}}, + []CallProps{{0, true, 0, false}}, }, { "serialize0(0x0) (async, rerun: 10)\n", - []CallProps{{0, true, 10}}, + []CallProps{{0, true, 10, false}}, + }, + { + "serialize0(0x0) (skip)\n", + []CallProps{{0, false, 0, true}}, }, } diff --git a/prog/encodingexec_test.go b/prog/encodingexec_test.go index f968b6cf2e7f..e044d32d810c 100644 --- a/prog/encodingexec_test.go +++ b/prog/encodingexec_test.go @@ -578,13 +578,16 @@ func TestSerializeForExec(t *testing.T) { `test() (fail_nth: 3) test() (fail_nth: 4) test() (async, rerun: 10) +test() (skip) `, []any{ - execInstrSetProps, 3, 0, 0, + execInstrSetProps, 3, 0, 0, 0, callID("test"), ExecNoCopyout, 0, - execInstrSetProps, 4, 0, 0, + execInstrSetProps, 4, 0, 0, 0, callID("test"), ExecNoCopyout, 0, - execInstrSetProps, 0, 1, 10, + execInstrSetProps, 0, 1, 10, 0, + callID("test"), ExecNoCopyout, 0, + execInstrSetProps, 0, 0, 0, 1, callID("test"), ExecNoCopyout, 0, execInstrEOF, }, @@ -593,17 +596,22 @@ test() (async, rerun: 10) { Meta: target.SyscallMap["test"], Index: ExecNoCopyout, - Props: CallProps{3, false, 0}, + Props: CallProps{3, false, 0, false}, + }, + { + Meta: target.SyscallMap["test"], + Index: ExecNoCopyout, + Props: CallProps{4, false, 0, false}, }, { Meta: target.SyscallMap["test"], Index: ExecNoCopyout, - Props: CallProps{4, false, 0}, + Props: CallProps{0, true, 10, false}, }, { Meta: target.SyscallMap["test"], Index: ExecNoCopyout, - Props: CallProps{0, true, 10}, + Props: CallProps{0, false, 0, true}, }, }, }, diff --git a/prog/prog.go b/prog/prog.go index 6fabaa680514..c6add105fe45 100644 --- a/prog/prog.go +++ b/prog/prog.go @@ -35,6 +35,7 @@ type CallProps struct { FailNth int `key:"fail_nth"` Async bool `key:"async"` Rerun int `key:"rerun"` + Skip bool `key:"skip"` } type Call struct { diff --git a/syz-manager/http.go b/syz-manager/http.go index ea57ba3be02b..3cf211e44d65 100644 --- a/syz-manager/http.go +++ b/syz-manager/http.go @@ -135,16 +135,30 @@ func (mgr *Manager) httpSyscalls(w http.ResponseWriter, r *http.Request) { data := &UISyscallsData{ Name: mgr.cfg.Name, } + throttler := mgr.throttler.Load() for c, cc := range mgr.collectSyscallInfo() { + syscall, ok := mgr.target.SyscallMap[c] var syscallID *int - if syscall, ok := mgr.target.SyscallMap[c]; ok { + if ok { syscallID = &syscall.ID } + + policy := "" + if throttler != nil && syscall != nil { + info := throttler.Info(syscall) + denied := "" + if info.Throttled { + denied = "[THROTTLED] " + } + policy = fmt.Sprintf("%s%d denied, %.2f%% crash rate, %d crashed", + denied, info.Denied, info.CrashRate*100.0, info.Crashed) + } data.Calls = append(data.Calls, UICallType{ - Name: c, - ID: syscallID, - Inputs: cc.Count, - Cover: len(cc.Cover), + Name: c, + ID: syscallID, + Inputs: cc.Count, + Cover: len(cc.Cover), + PolicyInfo: policy, }) } sort.Slice(data.Calls, func(i, j int) bool { @@ -761,10 +775,11 @@ type UIStat struct { } type UICallType struct { - Name string - ID *int - Inputs int - Cover int + Name string + ID *int + Inputs int + Cover int + PolicyInfo string } type UICorpus struct { @@ -862,6 +877,7 @@ var syscallsTemplate = pages.Create(` Inputs Coverage Prio + Policy Info {{range $c := $.Calls}} @@ -869,6 +885,7 @@ var syscallsTemplate = pages.Create(` {{$c.Inputs}} {{$c.Cover}} prio + {{$c.PolicyInfo}} {{end}} diff --git a/syz-manager/manager.go b/syz-manager/manager.go index 2aef4047522c..01950b622b72 100644 --- a/syz-manager/manager.go +++ b/syz-manager/manager.go @@ -28,6 +28,7 @@ import ( "github.com/google/syzkaller/pkg/flatrpc" "github.com/google/syzkaller/pkg/fuzzer" "github.com/google/syzkaller/pkg/fuzzer/queue" + "github.com/google/syzkaller/pkg/fuzzer/throttler" "github.com/google/syzkaller/pkg/gce" "github.com/google/syzkaller/pkg/hash" "github.com/google/syzkaller/pkg/instance" @@ -87,6 +88,7 @@ type Manager struct { mu sync.Mutex fuzzer atomic.Pointer[fuzzer.Fuzzer] + throttler atomic.Pointer[throttler.WrapperObject] phase int targetEnabledSyscalls map[*prog.Syscall]bool @@ -785,7 +787,11 @@ func (mgr *Manager) runInstance(index int) (*Crash, error) { // Use unique instance names to prevent name collisions in case of untimely RPC messages. instanceName := fmt.Sprintf("vm-%d", mgr.nextInstanceID.Add(1)) injectExec := make(chan bool, 10) - mgr.serv.createInstance(instanceName, injectExec) + var monitor throttler.InstanceMonitor + if obj := mgr.throttler.Load(); obj != nil { + monitor = obj.InstanceMonitor() + } + mgr.serv.createInstance(instanceName, injectExec, monitor) rep, vmInfo, err := mgr.runInstanceInner(index, instanceName, injectExec) lastExec, machineInfo := mgr.serv.shutdownInstance(instanceName, rep != nil) @@ -1493,7 +1499,13 @@ func (mgr *Manager) machineChecked(features flatrpc.Feature, enabledSyscalls map go mgr.dashboardReproTasks() } } - return fuzzerObj + var syscalls []*prog.Syscall + for call := range enabledSyscalls { + syscalls = append(syscalls, call) + } + throttler := throttler.Wrapper(fuzzerObj, syscalls) + mgr.throttler.Store(throttler) + return throttler } else if mgr.mode == ModeCorpusRun { return &corpusRunner{ candidates: corpus, diff --git a/syz-manager/rpc.go b/syz-manager/rpc.go index 422a6be007f7..e89ef85d41b0 100644 --- a/syz-manager/rpc.go +++ b/syz-manager/rpc.go @@ -19,6 +19,7 @@ import ( "github.com/google/syzkaller/pkg/cover/backend" "github.com/google/syzkaller/pkg/flatrpc" "github.com/google/syzkaller/pkg/fuzzer/queue" + "github.com/google/syzkaller/pkg/fuzzer/throttler" "github.com/google/syzkaller/pkg/ipc" "github.com/google/syzkaller/pkg/log" "github.com/google/syzkaller/pkg/mgrconfig" @@ -75,6 +76,7 @@ type Runner struct { executing map[int64]bool lastExec *LastExecuting rnd *rand.Rand + monitor throttler.InstanceMonitor } type BugFrames struct { @@ -139,7 +141,7 @@ func (serv *RPCServer) handleConn(conn *flatrpc.Conn) { if serv.cfg.VMLess { // There is no VM loop, so minic what it would do. - serv.createInstance(name, nil) + serv.createInstance(name, nil, nil) defer func() { serv.stopFuzzing(name) serv.shutdownInstance(name, false) @@ -387,7 +389,11 @@ func (serv *RPCServer) handleExecutingMessage(runner *Runner, msg *flatrpc.Execu } else { serv.statExecRetries.Add(1) } + // TODO: LastExecuting may be better integrated with the InstanceMonitor interface. runner.lastExec.Note(proc, req.Prog.Serialize(), osutil.MonotonicNano()) + if runner.monitor != nil { + runner.monitor.Record(req) + } select { case runner.injectExec <- true: default: @@ -527,13 +533,14 @@ func validateRequest(req *queue.Request) error { return nil } -func (serv *RPCServer) createInstance(name string, injectExec chan<- bool) { +func (serv *RPCServer) createInstance(name string, injectExec chan<- bool, monitor throttler.InstanceMonitor) { runner := &Runner{ injectExec: injectExec, finished: make(chan bool), requests: make(map[int64]*queue.Request), executing: make(map[int64]bool), lastExec: MakeLastExecuting(serv.cfg.Procs, 6), + monitor: monitor, rnd: rand.New(rand.NewSource(time.Now().UnixNano())), } serv.mu.Lock() @@ -574,6 +581,9 @@ func (serv *RPCServer) shutdownInstance(name string, crashed bool) ([]ExecRecord } req.Done(&queue.Result{Status: status}) } + if runner.monitor != nil { + runner.monitor.Shutdown(crashed) + } return runner.lastExec.Collect(), runner.machineInfo }