|
74 | 74 | {"id":"CodeContextBench-b41","title":"Add DependEval and LinuxFLBench to TASK_CATALOG.md","status":"closed","priority":1,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-06T14:26:04.732271812Z","created_by":"LoCoBench Bot","updated_at":"2026-02-06T14:30:46.293910463Z","closed_at":"2026-02-06T14:30:46.293910463Z","close_reason":"Closed"} |
75 | 75 | {"id":"CodeContextBench-bgq","title":"US-013: Ensure all new tasks have Dockerfiles","status":"closed","priority":2,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-16T00:42:19.445469836Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T00:50:42.281410025Z","closed_at":"2026-02-16T00:50:42.281410025Z","close_reason":"US-013 complete: duplicate issue, all Dockerfiles verified"} |
76 | 76 | {"id":"CodeContextBench-c0h","title":"US-002: Create docgen-arch-002 Istio Pilot discovery architecture doc","status":"in_progress","priority":2,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-16T16:02:21.400634759Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T16:02:27.452076279Z"} |
77 | | -{"id":"CodeContextBench-c6m","title":"Phase 3: Dockerfile.sg_only for write-only suites","status":"open","priority":2,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-16T18:42:40.749772061Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T18:42:40.749772061Z","dependencies":[{"issue_id":"CodeContextBench-c6m","depends_on_id":"CodeContextBench-zku","type":"blocks","created_at":"2026-02-16T18:42:50.554721937Z","created_by":"LoCoBench Bot"}]} |
| 77 | +{"id":"CodeContextBench-c6m","title":"Phase 3: Dockerfile.sg_only for write-only suites","status":"closed","priority":2,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-16T18:42:40.749772061Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T18:51:31.383411361Z","closed_at":"2026-02-16T18:51:31.383411361Z","close_reason":"14 Dockerfile.sg_only files (5 K8s, 5 LFL, 4 INV) + sgonly_writeonly_2config.sh run script created","dependencies":[{"issue_id":"CodeContextBench-c6m","depends_on_id":"CodeContextBench-zku","type":"blocks","created_at":"2026-02-16T18:42:50.554721937Z","created_by":"LoCoBench Bot"}]} |
78 | 78 | {"id":"CodeContextBench-cey","title":"US-012: Build failure analysis engine","status":"closed","priority":1,"issue_type":"feature","owner":"locobench@anthropic.com","created_at":"2026-02-15T13:53:47.854221697Z","created_by":"LoCoBench Bot","updated_at":"2026-02-15T13:57:20.769673188Z","closed_at":"2026-02-15T13:57:20.769673188Z","close_reason":"US-012 implemented and all ACs verified"} |
79 | 79 | {"id":"CodeContextBench-d00","title":"US-001: Create inv-deep-001 Envoy filter chain deep causal task","status":"closed","priority":1,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-16T15:08:15.0008813Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T15:13:36.330715999Z","closed_at":"2026-02-16T15:13:36.330715999Z","close_reason":"US-001 complete: inv-deep-001 Envoy deep causal chain task created and committed"} |
80 | 80 | {"id":"CodeContextBench-d5q","title":"US-003: Create inv-deep-003 - Deep causal chain in Terraform","status":"closed","priority":1,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-16T15:28:45.184016129Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T15:40:01.995896974Z","closed_at":"2026-02-16T15:40:01.995896974Z","close_reason":"US-003 complete: inv-deep-003 created with Terraform sensitive marks bug"} |
81 | 81 | {"id":"CodeContextBench-dfp","title":"Run LoCoBench baseline and SG_full configs","description":"QA audit H2: LoCoBench only has SG_base results in MANIFEST (25/25 tasks). Need baseline and SG_full runs for complete 3-config comparison. SG_full should use the updated Deep Search preamble.","status":"closed","priority":2,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-06T14:50:17.265852053Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T01:33:58.368434048Z","closed_at":"2026-02-16T01:33:58.368434048Z","close_reason":"Stale - LoCoBench dropped in favor of enterprise largerepo tasks (25 tasks across 5 categories). New tasks don't need separate LoCoBench runs.","dependencies":[{"issue_id":"CodeContextBench-dfp","depends_on_id":"CodeContextBench-17e","type":"blocks","created_at":"2026-02-06T21:09:35.481295416Z","created_by":"LoCoBench Bot"}]} |
82 | 82 | {"id":"CodeContextBench-ega","title":"US-008b: Scaffold remaining 3 governance tasks","status":"closed","priority":1,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-15T14:39:32.981506882Z","created_by":"LoCoBench Bot","updated_at":"2026-02-15T14:45:09.007512651Z","closed_at":"2026-02-15T14:45:09.007512651Z","close_reason":"US-008b complete: 3 governance tasks scaffolded (cross-team-boundary, audit-trail, degraded-context)"} |
83 | 83 | {"id":"CodeContextBench-f0x","title":"US-001: Create nlqa-arch-001 Envoy HTTP filter chain task","status":"closed","priority":1,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-16T15:58:19.87022273Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T16:01:50.336839468Z","closed_at":"2026-02-16T16:01:50.336839468Z","close_reason":"US-001 complete: nlqa-arch-001 task created with all acceptance criteria passing"} |
84 | | -{"id":"CodeContextBench-f2q","title":"Phase 4: sg_only for build-requiring suites","status":"open","priority":2,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-16T18:42:43.836243874Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T18:42:43.836243874Z","dependencies":[{"issue_id":"CodeContextBench-f2q","depends_on_id":"CodeContextBench-zku","type":"blocks","created_at":"2026-02-16T18:42:50.620537139Z","created_by":"LoCoBench Bot"}]} |
| 84 | +{"id":"CodeContextBench-f2q","title":"Phase 4: sg_only for build-requiring suites","status":"closed","priority":2,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-16T18:42:43.836243874Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T18:54:26.909852825Z","closed_at":"2026-02-16T18:54:26.909852825Z","close_reason":"Agent _setup_sgonly_truncation() method, sgonly_verifier_wrapper.sh, sgonly_build_2config.sh run script","dependencies":[{"issue_id":"CodeContextBench-f2q","depends_on_id":"CodeContextBench-zku","type":"blocks","created_at":"2026-02-16T18:42:50.620537139Z","created_by":"LoCoBench Bot"}]} |
85 | 85 | {"id":"CodeContextBench-fph","title":"Design blind-bug task variants — vague instructions requiring discovery","description":"For 5-10 existing SWE-bench Pro tasks, create instruction variants where the bug location is NOT given. Current instructions often point to specific files/functions. Variants use realistic user-reported symptoms: e.g., 'Fix NULL handling in album.go' becomes 'Users report albums occasionally fail to load. Investigate and fix.' Same verifier, same Dockerfile, different instruction.md. SG semantic search genuinely helps with discovery when the agent doesn't know WHERE to look. Implementation: create instruction_blind.md variants, add --instruction-variant flag to agent, compare baseline vs SG on discovery success rate. Select tasks where the original instruction reveals the bug location.","status":"closed","priority":1,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-07T13:00:27.062045186Z","created_by":"LoCoBench Bot","updated_at":"2026-02-12T10:28:43.11808592Z","closed_at":"2026-02-12T10:28:43.11808592Z","close_reason":"Design complete: docs/DESIGN_blind_bug_variants.md. Phase 1 MVP: 3 HIGH-rated tasks (qutebrowser CertificateErrorWrapper, element-web VoiceBroadcastLiveness, openlibrary WorkSearchScheme). Env var INSTRUCTION_VARIANT=blind selects instruction_blind.md. ~400 LOC across 6-8 files. Key metric: (B-A)-(Y-X) measures MCP discovery value."} |
86 | 86 | {"id":"CodeContextBench-fvh","title":"Archive broken first-attempt governance/enterprise runs, regenerate MANIFEST","status":"closed","priority":1,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-15T18:28:31.19415021Z","created_by":"LoCoBench Bot","updated_at":"2026-02-15T18:29:50.70796578Z","closed_at":"2026-02-15T18:29:50.70796578Z","close_reason":"Archived 2 broken first-attempt runs, regenerated MANIFEST: 495 tasks / 43 runs with governance and enterprise properly included"} |
87 | 87 | {"id":"CodeContextBench-gd4","title":"US-003: Create cr-terraform-001 multi-file Go defect review","status":"closed","priority":1,"issue_type":"task","owner":"locobench@anthropic.com","created_at":"2026-02-16T15:41:56.001924582Z","created_by":"LoCoBench Bot","updated_at":"2026-02-16T15:48:45.705225232Z","closed_at":"2026-02-16T15:48:45.705225232Z","close_reason":"Completed: cr-terraform-001 task created with 6 defects across 4 Go files"} |
|
0 commit comments