Skip to content

Commit

Permalink
pkg/fuzzer/throttler: avoid repetitive crashes
Browse files Browse the repository at this point in the history
Track the crash rate for the individual syscalls and rate limit the
execution of those which happen too often.

For determining the most unsafe syscalls, let's keep a sliding window of
the last executed progs on every instance.
* If a program has been evicted from the sliding window, it's safe
  enough.
* If an instance crashed, all programs in the window are under
  suspicion.

For preventing the execution of banned syscalls, let's add a special
(skip) call parameter that is understood by the executor.

The parameter is not supposed to leak into the corpus, so add
appropriate assertions.
  • Loading branch information
a-nogikh committed Jun 21, 2024
1 parent edc5149 commit cbfee70
Show file tree
Hide file tree
Showing 15 changed files with 629 additions and 57 deletions.
58 changes: 32 additions & 26 deletions executor/executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1236,34 +1236,38 @@ void execute_call(thread_t* th)
th->soft_fail_state = true;
}

if (flag_coverage)
cover_reset(&th->cov);
// For pseudo-syscalls and user-space functions NONFAILING can abort before assigning to th->res.
// Arrange for res = -1 and errno = EFAULT result for such case.
th->res = -1;
errno = EFAULT;
NONFAILING(th->res = execute_syscall(call, th->args));
th->reserrno = errno;
// Our pseudo-syscalls may misbehave.
if ((th->res == -1 && th->reserrno == 0) || call->attrs.ignore_return)
th->reserrno = EINVAL;
// Reset the flag before the first possible fail().
th->soft_fail_state = false;

if (flag_coverage) {
cover_collect(&th->cov);
if (th->cov.size >= kCoverSize)
failmsg("too much cover", "thr=%d, cov=%u", th->id, th->cov.size);
}
th->fault_injected = false;
if (th->call_props.skip > 0) {
th->reserrno = ENOSYS;
} else {
if (flag_coverage)
cover_reset(&th->cov);
// For pseudo-syscalls and user-space functions NONFAILING can abort before assigning to th->res.
// Arrange for res = -1 and errno = EFAULT result for such case.
th->res = -1;
errno = EFAULT;
NONFAILING(th->res = execute_syscall(call, th->args));
th->reserrno = errno;
// Our pseudo-syscalls may misbehave.
if ((th->res == -1 && th->reserrno == 0) || call->attrs.ignore_return)
th->reserrno = EINVAL;
// Reset the flag before the first possible fail().
th->soft_fail_state = false;

if (flag_coverage) {
cover_collect(&th->cov);
if (th->cov.size >= kCoverSize)
failmsg("too much cover", "thr=%d, cov=%u", th->id, th->cov.size);
}
th->fault_injected = false;

if (th->call_props.fail_nth > 0)
th->fault_injected = fault_injected(fail_fd);
if (th->call_props.fail_nth > 0)
th->fault_injected = fault_injected(fail_fd);

// If required, run the syscall some more times.
// But let's still return res, errno and coverage from the first execution.
for (int i = 0; i < th->call_props.rerun; i++)
NONFAILING(execute_syscall(call, th->args));
// If required, run the syscall some more times.
// But let's still return res, errno and coverage from the first execution.
for (int i = 0; i < th->call_props.rerun; i++)
NONFAILING(execute_syscall(call, th->args));
}

debug("#%d [%llums] <- %s=0x%llx",
th->id, current_time_ms() - start_time_ms, call->name, (uint64)th->res);
Expand All @@ -1275,6 +1279,8 @@ void execute_call(thread_t* th)
debug(" fault=%d", th->fault_injected);
if (th->call_props.rerun > 0)
debug(" rerun=%d", th->call_props.rerun);
if (th->call_props.skip > 0)
debug(" skipped");
debug("\n");
}

Expand Down
14 changes: 14 additions & 0 deletions pkg/corpus/corpus.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ type NewItemEvent struct {
}

func (corpus *Corpus) Save(inp NewInput) {
validateCorpusProg(inp.Prog)

progData := inp.Prog.Serialize()
sig := hash.String(progData)

Expand Down Expand Up @@ -150,6 +152,18 @@ func (corpus *Corpus) Save(inp NewInput) {
}
}
}

func validateCorpusProg(p *prog.Prog) {
for _, call := range p.Calls {
if call.Props.Async {
panic("attempting to save a Async=true prog to corpus")
}
if call.Props.Skip {
panic("attempting to save a Skip=true prog to corpus")
}
}
}

func (corpus *Corpus) Signal() signal.Signal {
corpus.mu.RLock()
defer corpus.mu.RUnlock()
Expand Down
4 changes: 4 additions & 0 deletions pkg/csource/csource.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,10 @@ func (ctx *context) generateCalls(p prog.ExecProg, trace bool) ([]string, []uint
var calls []string
csumSeq := 0
for ci, call := range p.Calls {
if call.Props.Skip {
continue
}

w := new(bytes.Buffer)
// Copyin.
for _, copyin := range call.Copyin {
Expand Down
27 changes: 25 additions & 2 deletions pkg/fuzzer/fuzzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,15 @@ func (fuzzer *Fuzzer) processResult(req *queue.Request, res *queue.Result, flags
fuzzer.triageProgCall(req.Prog, res.Info.Extra, -1, &triage)

if len(triage) != 0 {
p := req.Prog.Clone()
triage = removeSkippedCalls(p, triage)

queue, stat := fuzzer.triageQueue, fuzzer.statJobsTriage
if flags&progCandidate > 0 {
queue, stat = fuzzer.triageCandidateQueue, fuzzer.statJobsTriageCandidate
}
fuzzer.startJob(stat, &triageJob{
p: req.Prog.Clone(),
p: p,
flags: flags,
queue: queue.Append(),
calls: triage,
Expand All @@ -150,7 +153,7 @@ func (fuzzer *Fuzzer) processResult(req *queue.Request, res *queue.Result, flags

// Corpus candidates may have flaky coverage, so we give them a second chance.
maxCandidateAttempts := 3
if req.Risky() {
if req.Risky {
maxCandidateAttempts = 2
}
if len(triage) == 0 && flags&ProgFromCorpus != 0 && attempt < maxCandidateAttempts {
Expand All @@ -163,6 +166,26 @@ func (fuzzer *Fuzzer) processResult(req *queue.Request, res *queue.Result, flags
return true
}

func removeSkippedCalls(p *prog.Prog, triage map[int]*triageCall) map[int]*triageCall {
ret := map[int]*triageCall{}
if info := triage[-1]; info != nil {
ret[-1] = info
}
oldPos := 0
for i := 0; i < len(p.Calls); oldPos++ {
if p.Calls[i].Props.Skip {
p.RemoveCall(i)
continue
}
info := triage[oldPos]
if info != nil {
ret[i] = info
}
i++
}
return ret
}

type Config struct {
Debug bool
Corpus *corpus.Corpus
Expand Down
26 changes: 26 additions & 0 deletions pkg/fuzzer/fuzzer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,32 @@ func TestRotate(t *testing.T) {
assert.Equal(t, 700, minus.Len())
}

func TestRemoveSkippedCalls(t *testing.T) {
target, err := prog.GetTarget(targets.TestOS, targets.TestArch64Fuzz)
if err != nil {
t.Fatal(err)
}
p, err := target.Deserialize([]byte(`
serialize0(&AUTO) (skip)
serialize1(&AUTO)
serialize2(&AUTO) (skip)
serialize3(&AUTO)
`), prog.NonStrict)
if err != nil {
t.Fatal(err)
}
mm := map[int]*triageCall{
1: {},
}
newMm := removeSkippedCalls(p, mm)
assert.Len(t, newMm, 1)
assert.NotNil(t, newMm[0])

assert.Len(t, p.Calls, 2)
assert.Equal(t, "serialize1", p.Calls[0].Meta.Name)
assert.Equal(t, "serialize3", p.Calls[1].Meta.Name)
}

// Based on the example from Go documentation.
var crc32q = crc32.MakeTable(0xD5828281)

Expand Down
8 changes: 3 additions & 5 deletions pkg/fuzzer/queue/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ type Request struct {
// Important requests will be retried even from crashed VMs.
Important bool

// Risky requests will not be retried, even if they are important.
Risky bool

// The callback will be called on request completion in the LIFO order.
// If it returns false, all further processing will be stopped.
// It allows wrappers to intercept Done() requests.
Expand Down Expand Up @@ -95,11 +98,6 @@ func (r *Request) Wait(ctx context.Context) *Result {
}
}

// Risky() returns true if there's a substantial risk of the input crashing the VM.
func (r *Request) Risky() bool {
return r.onceCrashed
}

func (r *Request) Validate() error {
collectSignal := r.ExecOpts.ExecFlags&flatrpc.ExecFlagCollectSignal > 0
if len(r.ReturnAllSignal) != 0 && !collectSignal {
Expand Down
4 changes: 2 additions & 2 deletions pkg/fuzzer/queue/retry.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ func (r *retryer) done(req *Request, res *Result) bool {
return false
}
// Retry important requests from crashed VMs once.
if res.Status == Crashed && req.Important && !req.onceCrashed {
req.onceCrashed = true
if res.Status == Crashed && req.Important && !req.Risky {
req.Risky = true
r.pq.Submit(req)
return false
}
Expand Down
Loading

0 comments on commit cbfee70

Please sign in to comment.