Skip to content

Commit

Permalink
Implement breakpoint() using PTX brkpt instruction
Browse files Browse the repository at this point in the history
See https://docs.nvidia.com/cuda/parallel-thread-execution/#miscellaneous-instructions-brkpt

This is implemented using the low-level API because an overload with the
high-level API causes the breakpoint to look like a function call that
the code has entered, which is a bit confusing for the user.
  • Loading branch information
gmarkall committed Dec 18, 2024
1 parent f57706f commit 2510732
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 0 deletions.
5 changes: 5 additions & 0 deletions numba_cuda/numba/cuda/cudadecl.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,11 @@ def generic(self, args, kws):
return signature(dty, ary, idx, dty, dty)


@register_global(breakpoint)
class Cuda_breakpoint(ConcreteTemplate):
cases = [signature(types.none)]


@register
class Cuda_nanosleep(ConcreteTemplate):
key = cuda.nanosleep
Expand Down
8 changes: 8 additions & 0 deletions numba_cuda/numba/cuda/cudaimpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,14 @@ def ptx_atomic_cas(context, builder, sig, args):

# -----------------------------------------------------------------------------


@lower(breakpoint)
def ptx_brkpt(context, builder, sig, args):
brkpt = ir.InlineAsm(ir.FunctionType(ir.VoidType(), []),
"brkpt;", '', side_effect=True)
builder.call(brkpt, ())


@lower(stubs.nanosleep, types.uint32)
def ptx_nanosleep(context, builder, sig, args):
nanosleep = ir.InlineAsm(ir.FunctionType(ir.VoidType(), [ir.IntType(32)]),
Expand Down

0 comments on commit 2510732

Please sign in to comment.