diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index c540c14e..d32f1e19 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -2,6 +2,7 @@ name: Main
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
 
 on:
   pull_request:
@@ -24,7 +25,7 @@ jobs:
   checks:
     name: ${{ matrix.task.name }} (py ${{ matrix.python }})
     runs-on: [ubuntu-latest]
-    timeout-minutes: 10
+    timeout-minutes: 15
     strategy:
       fail-fast: false
       matrix:
diff --git a/src/olmo_core/distributed/sharded_flat_parameter.py b/src/olmo_core/distributed/sharded_flat_parameter.py
index 30dab056..73cb0dfb 100644
--- a/src/olmo_core/distributed/sharded_flat_parameter.py
+++ b/src/olmo_core/distributed/sharded_flat_parameter.py
@@ -31,3 +31,10 @@ def __new__(cls, data: Optional[torch.Tensor] = None, requires_grad: bool = True
             setattr(param, cls.SHARDED_FLAT_TENSOR_METADATA_NAME, {})
 
         return param
+
+    def __repr__(self) -> str:
+        r = torch.Tensor.__repr__(self)
+        if r.startswith("Parameter("):  # )  -- the open parenthesis confuses treesitter sometimes
+            r = r.replace("Parameter(", "", 1)  # )  -- the open parenthesis confuses treesitter sometimes
+            r = r[:-1]
+        return r