Skip to content

Commit

Permalink
enable to specify grid/block param for gpu execution
Browse files Browse the repository at this point in the history
  • Loading branch information
hidetatz committed Jan 27, 2025
1 parent 8881d4c commit 666cdee
Show file tree
Hide file tree
Showing 2 changed files with 150 additions and 49 deletions.
178 changes: 131 additions & 47 deletions tensor2/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func initBackend() {
* Generate a sequence of IR from tensor AST
*/

func generateIR(t *Tensor) (irs []*instruction, err error) {
func generateIR(t *Tensor, gpu bool) (irs []*instruction, err error) {
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("%v", r.(string))
Expand Down Expand Up @@ -84,18 +84,37 @@ func generateIR(t *Tensor) (irs []*instruction, err error) {
/*
* define kernel
*/
paramIdx := pushK(inst(&mnKernParam{typ: t_int}))
paramx := pushK(inst(&mnKernParam{typ: t_floats}))
paramresult := pushK(inst(&mnKernParam{typ: t_floats}))
kern := pushK(inst(&mnKernel{params: []instid{paramIdx, paramx, paramresult}}))
target := pushK(inst(&mnInit{from: paramx, idx: paramIdx}))
var op alu1op
if t.op == op_recip {
op = alu1_recip

var kern instid

if gpu {
paramx := pushK(inst(&mnKernParam{typ: t_floats}))
paramresult := pushK(inst(&mnKernParam{typ: t_floats}))
kern = pushK(inst(&mnKernel{params: []instid{paramx, paramresult}}))
idx := pushK(inst(&mnThreadPosition{dimensions: 1}))
target := pushK(inst(&mnInit{from: paramx, idx: idx}))
var op alu1op
if t.op == op_recip {
op = alu1_recip
}
alu1 := pushK(inst(&mnALU1{val: target, op: op}))
pushK(inst(&mnAssign{left: paramresult, lidx: idx, right: alu1}))
pushK(inst(&mnEndKernel{}))

} else {
paramIdx := pushK(inst(&mnKernParam{typ: t_int}))
paramx := pushK(inst(&mnKernParam{typ: t_floats}))
paramresult := pushK(inst(&mnKernParam{typ: t_floats}))
kern = pushK(inst(&mnKernel{params: []instid{paramIdx, paramx, paramresult}}))
target := pushK(inst(&mnInit{from: paramx, idx: paramIdx}))
var op alu1op
if t.op == op_recip {
op = alu1_recip
}
alu1 := pushK(inst(&mnALU1{val: target, op: op}))
pushK(inst(&mnAssign{left: paramresult, lidx: paramIdx, right: alu1}))
pushK(inst(&mnEndKernel{}))
}
alu1 := pushK(inst(&mnALU1{val: target, op: op}))
pushK(inst(&mnAssign{left: paramresult, lidx: paramIdx, right: alu1}))
pushK(inst(&mnEndKernel{}))

/*
* call kernel from entry
Expand All @@ -104,9 +123,19 @@ func generateIR(t *Tensor) (irs []*instruction, err error) {
// define result to store
result := pushE(inst(&mnDecl{typ: t_floats, length: size}))

// start loop and invokes kernel
loop := pushE(inst(&mnLoop{countImm: size}))
pushE(inst(&mnInvokeKernel{kernel: kern, args: []instid{loop, inputid, result}}))
if gpu {
pushE(inst(&mnInvokeKernel{
kernel: kern,
parallelLevel1: &kernelParallelizationParam{x: 1},
parallelLevel2: &kernelParallelizationParam{x: size},
args: []instid{inputid, result},
}))
} else {
// start loop and invokes kernel
loop := pushE(inst(&mnLoop{countImm: size}))
pushE(inst(&mnInvokeKernel{kernel: kern, args: []instid{loop, inputid, result}}))
}

pushE(inst(&mnEndLoop{}))

return result
Expand All @@ -125,42 +154,84 @@ func generateIR(t *Tensor) (irs []*instruction, err error) {
* define kernel
*/

paramIdx := pushK(inst(&mnKernParam{typ: t_int}))
paraml := pushK(inst(&mnKernParam{typ: t_floats}))
paramr := pushK(inst(&mnKernParam{typ: t_floats}))
paramresult := pushK(inst(&mnKernParam{typ: t_floats}))
kern := pushK(inst(&mnKernel{params: []instid{paramIdx, paraml, paramr, paramresult}}))
var kern instid

// assume vector
// todo: support 2 or more dimensions
if gpu {
paraml := pushK(inst(&mnKernParam{typ: t_floats}))
paramr := pushK(inst(&mnKernParam{typ: t_floats}))
paramresult := pushK(inst(&mnKernParam{typ: t_floats}))
kern = pushK(inst(&mnKernel{params: []instid{paraml, paramr, paramresult}}))

// compute stride, considering broadcast
lstride := pushK(inst(&mnInitImm{typ: t_int, val: l.dim.strides[0]}))
rstride := pushK(inst(&mnInitImm{typ: t_int, val: r.dim.strides[0]}))
// assume vector
// todo: support 2 or more dimensions

// define index
lidx := pushK(inst(&mnALU2{left: paramIdx, op: alu2_mul, right: lstride}))
ridx := pushK(inst(&mnALU2{left: paramIdx, op: alu2_mul, right: rstride}))
idx := pushK(inst(&mnThreadPosition{dimensions: 1}))

// load value to be computed from left and right
loadl := pushK(inst(&mnInit{from: paraml, idx: lidx}))
loadr := pushK(inst(&mnInit{from: paramr, idx: ridx}))
// compute stride, considering broadcast
lstride := pushK(inst(&mnInitImm{typ: t_int, val: l.dim.strides[0]}))
rstride := pushK(inst(&mnInitImm{typ: t_int, val: r.dim.strides[0]}))

var op alu2op
if t.op == op_add {
op = alu2_add
} else {
op = alu2_mul
}
// define index
lidx := pushK(inst(&mnALU2{left: idx, op: alu2_mul, right: lstride}))
ridx := pushK(inst(&mnALU2{left: idx, op: alu2_mul, right: rstride}))

// do compute
alu2 := pushK(inst(&mnALU2{left: loadl, op: op, right: loadr}))
// load value to be computed from left and right
loadl := pushK(inst(&mnInit{from: paraml, idx: lidx}))
loadr := pushK(inst(&mnInit{from: paramr, idx: ridx}))

// assign computed to result
pushK(inst(&mnAssign{left: paramresult, lidx: paramIdx, right: alu2}))
var op alu2op
if t.op == op_add {
op = alu2_add
} else {
op = alu2_mul
}

// finish kernel
pushK(inst(&mnEndKernel{}))
// do compute
alu2 := pushK(inst(&mnALU2{left: loadl, op: op, right: loadr}))

// assign computed to result
pushK(inst(&mnAssign{left: paramresult, lidx: idx, right: alu2}))

// finish kernel
pushK(inst(&mnEndKernel{}))
} else {
paramIdx := pushK(inst(&mnKernParam{typ: t_int}))
paraml := pushK(inst(&mnKernParam{typ: t_floats}))
paramr := pushK(inst(&mnKernParam{typ: t_floats}))
paramresult := pushK(inst(&mnKernParam{typ: t_floats}))
kern = pushK(inst(&mnKernel{params: []instid{paramIdx, paraml, paramr, paramresult}}))

// assume vector
// todo: support 2 or more dimensions

// compute stride, considering broadcast
lstride := pushK(inst(&mnInitImm{typ: t_int, val: l.dim.strides[0]}))
rstride := pushK(inst(&mnInitImm{typ: t_int, val: r.dim.strides[0]}))

// define index
lidx := pushK(inst(&mnALU2{left: paramIdx, op: alu2_mul, right: lstride}))
ridx := pushK(inst(&mnALU2{left: paramIdx, op: alu2_mul, right: rstride}))

// load value to be computed from left and right
loadl := pushK(inst(&mnInit{from: paraml, idx: lidx}))
loadr := pushK(inst(&mnInit{from: paramr, idx: ridx}))

var op alu2op
if t.op == op_add {
op = alu2_add
} else {
op = alu2_mul
}

// do compute
alu2 := pushK(inst(&mnALU2{left: loadl, op: op, right: loadr}))

// assign computed to result
pushK(inst(&mnAssign{left: paramresult, lidx: paramIdx, right: alu2}))

// finish kernel
pushK(inst(&mnEndKernel{}))
}

/*
* call kernel from entry
Expand All @@ -169,9 +240,19 @@ func generateIR(t *Tensor) (irs []*instruction, err error) {
// define result to store
result := pushE(inst(&mnDecl{typ: t_floats, length: sizel}))

// start loop and invoke kernel
loop := pushE(inst(&mnLoop{countImm: sizel}))
pushE(inst(&mnInvokeKernel{kernel: kern, args: []instid{loop, lid, rid, result}}))
if gpu {
pushE(inst(&mnInvokeKernel{
kernel: kern,
parallelLevel1: &kernelParallelizationParam{x: 1},
parallelLevel2: &kernelParallelizationParam{x: sizel},
args: []instid{lid, rid, result},
}))
} else {
// start loop and invoke kernel
loop := pushE(inst(&mnLoop{countImm: sizel}))
pushE(inst(&mnInvokeKernel{kernel: kern, args: []instid{loop, lid, rid, result}}))
}

pushE(inst(&mnEndLoop{}))

return result
Expand Down Expand Up @@ -433,19 +514,22 @@ func compute(t *Tensor) ([]float32, error) {
var (
renderer renderer
executor executor
gpu bool
)

switch backend {
case be_golang:
renderer = &cLikeRenderer{lang: &gorenderer{}}
executor = &goexecutor{}
gpu = false

case be_cuda:
// renderer = &cLikeRenderer{lang: &cudarenderer{}}
// executor = &cudaexecutor{}
// gpu = true
}

irs, err := generateIR(t)
irs, err := generateIR(t, gpu)
if err != nil {
return nil, err
}
Expand Down
21 changes: 19 additions & 2 deletions tensor2/ir.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,18 @@ func (m *mnEndKernel) String() string {
return "{endkernel}"
}

type kernelParallelizationParam struct {
x, y, z int
}

// invokes kernel function.
type mnInvokeKernel struct {
mnemonic

kernel instid
args []instid
kernel instid
parallelLevel1 *kernelParallelizationParam
parallelLevel2 *kernelParallelizationParam
args []instid
}

func (m *mnInvokeKernel) String() string {
Expand Down Expand Up @@ -304,3 +310,14 @@ type mnALU2 struct {
func (m *mnALU2) String() string {
return fmt.Sprintf("{alu2 %v %v %v}", m.left, m.op, m.right)
}

// for GPU
type mnThreadPosition struct {
mnemonic

dimensions int
}

func (m *mnThreadPosition) String() string {
return fmt.Sprintf("{thread_position (dim%v)}", m.dimensions)
}

0 comments on commit 666cdee

Please sign in to comment.