diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c540c14e..d32f1e19 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -2,6 +2,7 @@ name: Main concurrency: group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true on: pull_request: @@ -24,7 +25,7 @@ jobs: checks: name: ${{ matrix.task.name }} (py ${{ matrix.python }}) runs-on: [ubuntu-latest] - timeout-minutes: 10 + timeout-minutes: 15 strategy: fail-fast: false matrix: diff --git a/src/olmo_core/distributed/sharded_flat_parameter.py b/src/olmo_core/distributed/sharded_flat_parameter.py index 30dab056..73cb0dfb 100644 --- a/src/olmo_core/distributed/sharded_flat_parameter.py +++ b/src/olmo_core/distributed/sharded_flat_parameter.py @@ -31,3 +31,10 @@ def __new__(cls, data: Optional[torch.Tensor] = None, requires_grad: bool = True setattr(param, cls.SHARDED_FLAT_TENSOR_METADATA_NAME, {}) return param + + def __repr__(self) -> str: + r = torch.Tensor.__repr__(self) + if r.startswith("Parameter("): # ) -- the open parenthesis confuses treesitter sometimes + r = r.replace("Parameter(", "", 1) # ) -- the open parenthesis confuses treesitter sometimes + r = r[:-1] + return r