diff --git a/examples/sparse/README.md b/examples/sparse/README.md
new file mode 100644
index 00000000..c0cd3efc
--- /dev/null
+++ b/examples/sparse/README.md
@@ -0,0 +1,3 @@
+# Sparse data
+
+This example trains a youtube comment spam classifier on a sparse dataset. The comments as raw strings are converted to a sparse matrix of word counts using the `CountVectorizer` from scikit-learn.
diff --git a/examples/sparse/sparse.py b/examples/sparse/sparse.py
new file mode 100644
index 00000000..525376c6
--- /dev/null
+++ b/examples/sparse/sparse.py
@@ -0,0 +1,50 @@
+import pandas as pd
+from sklearn.datasets import fetch_openml
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.model_selection import train_test_split
+
+import legateboost as lb
+
+# Alberto, T. & Lochter, J. (2015). YouTube Spam Collection [Dataset].
+# UCI Machine Learning Repository. https://doi.org/10.24432/C58885.
+dataset_names = [
+    "youtube-spam-psy",
+    "youtube-spam-shakira",
+    "youtube-spam-lmfao",
+    "youtube-spam-eminem",
+    "youtube-spam-katyperry",
+]
+X = []
+for dataset_name in dataset_names:
+    dataset = fetch_openml(name=dataset_name, as_frame=True)
+    X.append(dataset.data)
+
+X = pd.concat(X)
+X.reset_index(drop=True, inplace=True)
+y = X["CLASS"]
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.3, random_state=42
+)
+vectorizer = CountVectorizer()
+X_train_vectorized = vectorizer.fit_transform(X_train["CONTENT"])
+X_test_vectorized = vectorizer.transform(X_test["CONTENT"])
+
+model = lb.LBClassifier().fit(
+    X_train_vectorized, y_train, eval_set=[(X_test_vectorized, y_test)]
+)
+
+example_a = X_test.iloc[15]["CONTENT"]
+print("Comment: {}".format(example_a))
+print(
+    "Probability of spam: {}".format(
+        model.predict_proba(vectorizer.transform([example_a]))[0, 1]
+    )
+)
+
+example_b = X_test.iloc[3]["CONTENT"]
+print("Comment: {}".format(example_b))
+print(
+    "Probability of spam: {}".format(
+        model.predict_proba(vectorizer.transform([example_b]))[0, 1]
+    )
+)
diff --git a/src/models/tree/build_tree.cu b/src/models/tree/build_tree.cu
index 8945205b..c2f224fa 100644
--- a/src/models/tree/build_tree.cu
+++ b/src/models/tree/build_tree.cu
@@ -1339,8 +1339,8 @@ struct build_tree_csr_fn {
     auto [h, h_shape, h_accessor] = GetInputStore<double, 3>(context.input(4).data());
 
     auto num_rows    = std::max<int64_t>(X_offsets_shape.hi[0] - X_offsets_shape.lo[0] + 1, 0);
-    auto num_outputs = g_shape.hi[1] - g_shape.lo[1] + 1;
-    EXPECT(g_shape.lo[1] == 0, "Outputs should not be split between workers.");
+    auto num_outputs = g_shape.hi[2] - g_shape.lo[2] + 1;
+    EXPECT(g_shape.lo[2] == 0, "Outputs should not be split between workers.");
 
     // Scalars
     auto max_depth     = context.scalars().at(0).value<int>();