Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding data copy between host and device #323

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion include/taco/format.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class ModeFormat {
/// Properties of a mode format
enum Property {
FULL, NOT_FULL, ORDERED, NOT_ORDERED, UNIQUE, NOT_UNIQUE, BRANCHLESS,
NOT_BRANCHLESS, COMPACT, NOT_COMPACT
NOT_BRANCHLESS, COMPACT, NOT_COMPACT, ZEROLESS, NOT_ZEROLESS
};

/// Instantiates an undefined mode format
Expand Down Expand Up @@ -126,6 +126,7 @@ class ModeFormat {
bool isUnique() const;
bool isBranchless() const;
bool isCompact() const;
bool isZeroless() const;

/// Returns true if a mode format has a specific capability, false otherwise
bool hasCoordValIter() const;
Expand Down
1 change: 1 addition & 0 deletions include/taco/lower/iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class Iterator : public util::Comparable<Iterator> {
bool isUnique() const;
bool isBranchless() const;
bool isCompact() const;
bool isZeroless() const;

/// Capabilities supported by levels being iterated.
bool hasCoordIter() const;
Expand Down
2 changes: 1 addition & 1 deletion include/taco/lower/mode_format_compressed.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class CompressedModeFormat : public ModeFormatImpl {
public:
CompressedModeFormat();
CompressedModeFormat(bool isFull, bool isOrdered,
bool isUnique, long long allocSize = DEFAULT_ALLOC_SIZE);
bool isUnique, bool isZeroless, long long allocSize = DEFAULT_ALLOC_SIZE);

~CompressedModeFormat() override {}

Expand Down
2 changes: 1 addition & 1 deletion include/taco/lower/mode_format_dense.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace taco {
class DenseModeFormat : public ModeFormatImpl {
public:
DenseModeFormat();
DenseModeFormat(const bool isOrdered, const bool isUnique);
DenseModeFormat(const bool isOrdered, const bool isUnique, const bool isZeroless);

~DenseModeFormat() override {}

Expand Down
3 changes: 2 additions & 1 deletion include/taco/lower/mode_format_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ std::ostream& operator<<(std::ostream&, const ModeFunction&);
class ModeFormatImpl {
public:
ModeFormatImpl(std::string name, bool isFull, bool isOrdered, bool isUnique,
bool isBranchless, bool isCompact, bool hasCoordValIter,
bool isBranchless, bool isCompact, bool isZeroless, bool hasCoordValIter,
bool hasCoordPosIter, bool hasLocate, bool hasInsert,
bool hasAppend);

Expand Down Expand Up @@ -162,6 +162,7 @@ class ModeFormatImpl {
const bool isUnique;
const bool isBranchless;
const bool isCompact;
const bool isZeroless;

const bool hasCoordValIter;
const bool hasCoordPosIter;
Expand Down
2 changes: 1 addition & 1 deletion include/taco/lower/mode_format_singleton.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class SingletonModeFormat : public ModeFormatImpl {
public:
SingletonModeFormat();
SingletonModeFormat(bool isFull, bool isOrdered,
bool isUnique, long long allocSize = DEFAULT_ALLOC_SIZE);
bool isUnique, bool isZeroless, long long allocSize = DEFAULT_ALLOC_SIZE);

~SingletonModeFormat() override {}

Expand Down
93 changes: 80 additions & 13 deletions src/codegen/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,16 +230,47 @@ string CodeGen::printTensorProperty(string varname, const GetProperty* op, bool
}

string CodeGen::unpackTensorProperty(string varname, const GetProperty* op,
bool is_output_prop) {
bool is_output_prop, int flag, string output_tensor) {
stringstream ret;
ret << " ";

auto tensor = op->tensor.as<Var>();
if (op->property == TensorProperty::Values) {
// for the values, it's in the last slot
ret << printType(tensor->type, true);
ret << " " << restrictKeyword() << " " << varname << " = (" << printType(tensor->type, true) << ")(";
ret << tensor->name << "->vals);\n";
switch(flag) {
case PRINT_FUNC:
ret << printType(tensor->type, true);
ret << " " << restrictKeyword() << " " << varname << " = (" << printType(tensor->type, true) << ")(";
ret << tensor->name << "->vals);\n";
break;
case PRINT_MEM_HOST_TO_DEV:
ret << "gpuErrchk(cudaMalloc((void **)&";
ret << tensor->name << "_dev" << "->vals, ";
ret << "malloc_usable_size(";
ret << tensor->name << "->vals)));\n";

ret << " ";
ret << "cudaMemcpy(";
ret << tensor->name << "_dev" << "->vals, ";
ret << tensor->name << "->vals, ";
ret << "malloc_usable_size(";
ret << tensor->name << "->vals), ";
ret << "cudaMemcpyHostToDevice);\n";
break;
case PRINT_MEM_DEV_TO_HOST:
if(output_tensor == tensor->name) {
ret << "cudaMemcpy(";
ret << tensor->name << "->vals, ";
ret << tensor->name << "_dev->vals, ";
ret << "malloc_usable_size(";
ret << tensor->name << "->vals), ";
ret << "cudaMemcpyDevicetToHost);\n";
ret << " ";
}
ret << "cudaFree(";
ret << tensor->name << "_dev" << "->vals);\n";
break;
}
return ret.str();
} else if (op->property == TensorProperty::ValuesSize) {
ret << "int " << varname << " = " << tensor->name << "->vals_size;\n";
Expand All @@ -252,18 +283,54 @@ string CodeGen::unpackTensorProperty(string varname, const GetProperty* op,
// for a Fixed level, ptr is an int
// all others are int*
if (op->property == TensorProperty::Dimension) {
tp = "int";
ret << tp << " " << varname << " = (int)(" << tensor->name
<< "->dimensions[" << op->mode << "]);\n";
switch(flag) {
case PRINT_FUNC:
tp = "int";
ret << tp << " " << varname << " = (int)(" << tensor->name
<< "->dimensions[" << op->mode << "]);\n";
break;
case PRINT_MEM_HOST_TO_DEV:
ret << tensor->name << "_dev->dimension[" << op->mode << "] = " << tensor->name << "->dimension[" << op->mode << "];\n";
break;
}
} else {
taco_iassert(op->property == TensorProperty::Indices);
tp = "int*";
auto nm = op->index;
ret << tp << " " << restrictKeyword() << " " << varname << " = ";
ret << "(int*)(" << tensor->name << "->indices[" << op->mode;
ret << "][" << nm << "]);\n";
switch(flag) {
case PRINT_FUNC:
ret << tp << " " << restrictKeyword() << " " << varname << " = ";
ret << "(int*)(" << tensor->name << "->indices[" << op->mode;
ret << "][" << nm << "]);\n";
break;
case PRINT_MEM_HOST_TO_DEV:
ret << "gpuErrchk(cudaMalloc((void **)&";
ret << tensor->name << "_dev" << "->indices[" << op->mode << "][" << nm << "], ";
ret << "malloc_usable_size(";
ret << tensor->name << "->indices[" << op->mode << "][" << nm << "])));\n";

ret << " ";
ret << "cudaMemcpy(";
ret << tensor->name << "_dev" << "->indices[" << op->mode << "][" << nm << "], ";
ret << tensor->name << "->indices[" << op->mode << "][" << nm << "], ";
ret << "malloc_usable_size(";
ret << tensor->name << "->indices[" << op->mode << "][" << nm << "]), ";
ret << "cudaMemcpyHostToDevice);\n";
break;
case PRINT_MEM_DEV_TO_HOST:
if(output_tensor == tensor->name) {
ret << "cudaMemcpy(";
ret << tensor->name << "->indices[" << op->mode << "][" << nm << "], ";
ret << tensor->name << "->indices[" << op->mode << "][" << nm << "], ";
ret << "malloc_usable_size(";
ret << tensor->name << "_dev->indices[" << op->mode << "][" << nm << "]), ";
ret << "cudaMemcpyDevicetToHost);\n";
}
ret << "cudaFree(";
ret << tensor->name << "_dev" << "->indices[" << op->mode << "][" << nm << "]);\n";
break;
}
}

return ret.str();
}

Expand Down Expand Up @@ -312,7 +379,7 @@ string CodeGen::pointTensorProperty(std::string varname) {

// helper to print declarations
string CodeGen::printDecls(map<Expr, string, ExprCompare> varMap,
vector<Expr> inputs, vector<Expr> outputs) {
vector<Expr> inputs, vector<Expr> outputs, int flag, string output_tensor) {
stringstream ret;
unordered_set<string> propsAlreadyGenerated;

Expand Down Expand Up @@ -367,7 +434,7 @@ string CodeGen::printDecls(map<Expr, string, ExprCompare> varMap,
break;
}
} else {
ret << unpackTensorProperty(varMap[prop], prop, isOutputProp);
ret << unpackTensorProperty(varMap[prop], prop, isOutputProp, flag, output_tensor);
}
propsAlreadyGenerated.insert(varMap[prop]);
}
Expand Down
8 changes: 6 additions & 2 deletions src/codegen/codegen.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#ifndef TACO_CODEGEN_H
#define TACO_CODEGEN_H

#define PRINT_FUNC 0
#define PRINT_MEM_HOST_TO_DEV 1
#define PRINT_MEM_DEV_TO_HOST 2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should replace these with an enum.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I see. I will replace them with an enum.


#include <memory>
#include "taco/ir/ir.h"
#include "taco/ir/ir_printer.h"
Expand Down Expand Up @@ -43,7 +47,7 @@ class CodeGen : public IRPrinter {
std::vector<Expr> localVars, int labels,
std::string funcName);
std::string printDecls(std::map<Expr, std::string, ExprCompare> varMap,
std::vector<Expr> inputs, std::vector<Expr> outputs);
std::vector<Expr> inputs, std::vector<Expr> outputs, int flag, std::string output_tensor);
std::string printPack(std::map<std::tuple<Expr, TensorProperty, int, int>,
std::string> outputProperties, std::vector<Expr> outputs);
std::string printCoroutineFinish(int numYields, std::string funcName);
Expand All @@ -63,7 +67,7 @@ class CodeGen : public IRPrinter {

std::string printTensorProperty(std::string varname, const GetProperty* op, bool is_ptr);
std::string unpackTensorProperty(std::string varname, const GetProperty* op,
bool is_output_prop);
bool is_output_prop, int flag, std::string output_tensor);
std::string packTensorProperty(std::string varname, Expr tnsr, TensorProperty property,
int mode, int index);
std::string pointTensorProperty(std::string varname);
Expand Down
2 changes: 1 addition & 1 deletion src/codegen/codegen_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ void CodeGen_C::visit(const Function* func) {
localVars = varFinder.localVars;

// Print variable declarations
out << printDecls(varFinder.varDecls, func->inputs, func->outputs) << endl;
out << printDecls(varFinder.varDecls, func->inputs, func->outputs, PRINT_FUNC, "") << endl;

if (emittingCoroutine) {
out << printContextDeclAndInit(varMap, localVars, numYields, func->name)
Expand Down
Loading