From e057e57891b0b0826889562f26c0d42fa27d97af Mon Sep 17 00:00:00 2001 From: Clayton <118192227+claytonlin1110@users.noreply.github.com> Date: Tue, 17 Mar 2026 08:55:08 -0500 Subject: [PATCH 1/5] feat: add react-component/image repo as bronze tier (#296) --- gittensor/validator/weights/master_repositories.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gittensor/validator/weights/master_repositories.json b/gittensor/validator/weights/master_repositories.json index 52b8eaf5..3c7c20eb 100644 --- a/gittensor/validator/weights/master_repositories.json +++ b/gittensor/validator/weights/master_repositories.json @@ -4445,6 +4445,10 @@ "tier": "Bronze", "weight": 0.17 }, + "react-component/image": { + "tier": "Bronze", + "weight": 0.1 + }, "react-component/picker": { "tier": "Bronze", "weight": 0.1 From 7e02144a70d6675a90ffd8c6801c17836a9530c4 Mon Sep 17 00:00:00 2001 From: e35ventura Date: Tue, 17 Mar 2026 08:57:51 -0500 Subject: [PATCH 2/5] chore(weights): 18 tier changes, adjust 19 weights (#295) Co-authored-by: Ander <61125407+anderdc@users.noreply.github.com> --- .../weights/master_repositories.json | 74 +++++++++---------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/gittensor/validator/weights/master_repositories.json b/gittensor/validator/weights/master_repositories.json index 3c7c20eb..56009784 100644 --- a/gittensor/validator/weights/master_repositories.json +++ b/gittensor/validator/weights/master_repositories.json @@ -194,8 +194,8 @@ }, "ant-design/ant-design": { "additional_acceptable_branches": ["feature"], - "tier": "Silver", - "weight": 3.98 + "tier": "Bronze", + "weight": 1 }, "ant-design/ant-design-pro": { "tier": "Bronze", @@ -553,8 +553,8 @@ "weight": 0.12 }, "azerothcore/azerothcore-wotlk": { - "tier": "Silver", - "weight": 3.51 + "tier": "Bronze", + "weight": 0.88 }, "Azure-Samples/azure-search-openai-demo": { "tier": "Bronze", @@ -953,8 +953,8 @@ "weight": 0.13 }, "cloudflare/cloudflare-docs": { - "tier": "Silver", - "weight": 3.42 + "tier": "Bronze", + "weight": 0.86 }, "cloudwu/skynet": { "tier": "Bronze", @@ -1560,8 +1560,8 @@ "weight": 0.13 }, "erxes/erxes": { - "tier": "Silver", - "weight": 3.46 + "tier": "Bronze", + "weight": 0.87 }, "eslint/eslint": { "tier": "Bronze", @@ -1858,8 +1858,8 @@ "inactive_at": "2026-03-14" }, "frappe/frappe": { - "tier": "Silver", - "weight": 3.73, + "tier": "Bronze", + "weight": 0.93, "inactive_at": "2026-03-14" }, "frappe/gantt": { @@ -2520,8 +2520,8 @@ "weight": 0.17 }, "janhq/jan": { - "tier": "Silver", - "weight": 3.85 + "tier": "Bronze", + "weight": 0.96 }, "jbeder/yaml-cpp": { "tier": "Bronze", @@ -2878,8 +2878,8 @@ "weight": 0.11 }, "langgenius/dify": { - "tier": "Silver", - "weight": 3.2 + "tier": "Bronze", + "weight": 0.8 }, "laradock/laradock": { "tier": "Bronze", @@ -3417,8 +3417,8 @@ "weight": 0.21 }, "mrdoob/three.js": { - "tier": "Silver", - "weight": 4.12 + "tier": "Bronze", + "weight": 1.03 }, "MudBlazor/MudBlazor": { "inactive_at": "2025-11-29T17:45:38.525Z", @@ -3628,8 +3628,8 @@ "weight": 0.55 }, "nushell/nushell": { - "tier": "Silver", - "weight": 3.91 + "tier": "Bronze", + "weight": 0.98 }, "nuxt/nuxt": { "tier": "Bronze", @@ -3696,8 +3696,8 @@ "weight": 0.13 }, "ohmyzsh/ohmyzsh": { - "tier": "Silver", - "weight": 4.05 + "tier": "Bronze", + "weight": 1.01 }, "oldratlee/useful-scripts": { "tier": "Bronze", @@ -3919,8 +3919,8 @@ "weight": 0.13 }, "openwrt/luci": { - "tier": "Silver", - "weight": 3.67 + "tier": "Bronze", + "weight": 0.92 }, "openwrt/openwrt": { "tier": "Bronze", @@ -4317,8 +4317,8 @@ "weight": 0.23 }, "python-telegram-bot/python-telegram-bot": { - "tier": "Silver", - "weight": 3.32 + "tier": "Bronze", + "weight": 0.83 }, "python-visualization/folium": { "tier": "Bronze", @@ -4362,8 +4362,8 @@ "weight": 0.15 }, "Qiskit/qiskit": { - "tier": "Silver", - "weight": 3.62 + "tier": "Bronze", + "weight": 0.91 }, "qist/tvbox": { "tier": "Bronze", @@ -4657,7 +4657,7 @@ "sbt/sbt": { "additional_acceptable_branches": ["1.12.x"], "tier": "Bronze", - "weight": 1.2 + "weight": 0.3 }, "scala/scala": { "tier": "Bronze", @@ -4932,8 +4932,8 @@ "weight": 0.15 }, "Stirling-Tools/Stirling-PDF": { - "tier": "Silver", - "weight": 3.24 + "tier": "Bronze", + "weight": 0.81 }, "stleary/JSON-java": { "tier": "Bronze", @@ -4978,8 +4978,8 @@ "weight": 0.91 }, "sveltejs/svelte": { - "tier": "Silver", - "weight": 3.28 + "tier": "Bronze", + "weight": 0.82 }, "svenfuchs/rails-i18n": { "tier": "Bronze", @@ -5157,8 +5157,8 @@ "weight": 0.36 }, "TheOdinProject/curriculum": { - "tier": "Silver", - "weight": 3.79 + "tier": "Bronze", + "weight": 0.95 }, "TheOdinProject/theodinproject": { "tier": "Bronze", @@ -5341,8 +5341,8 @@ "weight": 0.19 }, "twentyhq/twenty": { - "tier": "Silver", - "weight": 3.37 + "tier": "Bronze", + "weight": 0.84 }, "typeorm/typeorm": { "tier": "Bronze", @@ -5389,8 +5389,8 @@ "weight": 0.24 }, "umbraco/Umbraco-CMS": { - "tier": "Silver", - "weight": 3.56 + "tier": "Bronze", + "weight": 0.89 }, "unclecode/crawl4ai": { "tier": "Bronze", From 570af611abf05ca823dfde711dcb2e46b9a09d31 Mon Sep 17 00:00:00 2001 From: BitToby <218712309+bittoby@users.noreply.github.com> Date: Tue, 17 Mar 2026 17:28:55 +0200 Subject: [PATCH 3/5] feat: top-K winner-takes-most merge prediction rewards (#292) Co-authored-by: Ander <61125407+anderdc@users.noreply.github.com> --- gittensor/constants.py | 2 + gittensor/validator/forward.py | 40 ++++- .../test_merge_predictions.py | 159 +++++++++++++++++- tests/validator/test_emission_shares.py | 31 +++- 4 files changed, 219 insertions(+), 13 deletions(-) diff --git a/gittensor/constants.py b/gittensor/constants.py index bac5752f..d49735f7 100644 --- a/gittensor/constants.py +++ b/gittensor/constants.py @@ -145,6 +145,8 @@ # Merge Predictions # ============================================================================= PREDICTIONS_EMISSIONS_SHARE = 0.15 # % of emissions allocated to prediction competition +PREDICTIONS_TOP_K = 3 # only top-K miners by EMA receive prediction rewards +PREDICTIONS_TOP_K_SHARES = [0.50, 0.35, 0.15] # fixed reward split for top-K miners (must sum to 1.0) PREDICTIONS_EMA_BETA = 0.1 # EMA decay rate for predictions record PREDICTIONS_CORRECTNESS_EXPONENT = 3 # exponent on correctness to harshly punish incorrect predictions diff --git a/gittensor/validator/forward.py b/gittensor/validator/forward.py index c8864806..771e89fe 100644 --- a/gittensor/validator/forward.py +++ b/gittensor/validator/forward.py @@ -8,7 +8,13 @@ import numpy as np from gittensor.classes import MinerEvaluation -from gittensor.constants import ISSUES_TREASURY_EMISSION_SHARE, ISSUES_TREASURY_UID, PREDICTIONS_EMISSIONS_SHARE +from gittensor.constants import ( + ISSUES_TREASURY_EMISSION_SHARE, + ISSUES_TREASURY_UID, + PREDICTIONS_EMISSIONS_SHARE, + PREDICTIONS_TOP_K, + PREDICTIONS_TOP_K_SHARES, +) from gittensor.utils.uids import get_all_uids from gittensor.validator.issue_competitions.forward import issue_competitions from gittensor.validator.merge_predictions.settlement import merge_predictions @@ -82,7 +88,11 @@ def build_prediction_ema_rewards( miner_uids: set[int], miner_evaluations: Dict[int, MinerEvaluation], ) -> np.ndarray: - """Build rewards array from prediction EMA scores, scaled to PREDICTIONS_EMISSIONS_SHARE. + """Build rewards array from prediction EMA scores using top-K winner-takes-most. + + Only the top PREDICTIONS_TOP_K miners by EMA score receive rewards, + split according to PREDICTIONS_TOP_K_SHARES (50%/35%/15%). + Ties are broken by rounds (more settled issues = higher rank). Maps github_id-keyed EMAs back to UIDs via miner_evaluations. """ @@ -101,6 +111,8 @@ def build_prediction_ema_rewards( if evaluation and evaluation.github_id and evaluation.github_id != '0': github_id_to_uid[evaluation.github_id] = uid + # Collect eligible miners: (ema_score, rounds, uid) + eligible: list[tuple[float, int, int]] = [] for mp_record in all_emas: github_id = mp_record['github_id'] ema_score = mp_record['ema_score'] @@ -112,13 +124,27 @@ def build_prediction_ema_rewards( if uid is None or uid not in miner_uids: continue + rounds = mp_record.get('rounds', 0) or 0 + eligible.append((ema_score, rounds, uid)) + + if not eligible: + return prediction_rewards + + # Rank by EMA descending, then by rounds descending (tiebreaker) + eligible.sort(key=lambda x: (x[0], x[1]), reverse=True) + + # Award top-K miners their fixed shares + top_k = min(PREDICTIONS_TOP_K, len(eligible)) + for rank in range(top_k): + _, _, uid = eligible[rank] idx = sorted_uids.index(uid) - prediction_rewards[idx] = ema_score + prediction_rewards[idx] = PREDICTIONS_TOP_K_SHARES[rank] * PREDICTIONS_EMISSIONS_SHARE - # Normalize to sum=1.0, then scale to prediction share - total = prediction_rewards.sum() - if total > 0: - prediction_rewards = (prediction_rewards / total) * PREDICTIONS_EMISSIONS_SHARE + top_miners_log = ', '.join( + f'UID {uid} (ema={ema:.4f}, rounds={rounds}, share={PREDICTIONS_TOP_K_SHARES[i] * 100:.0f}%)' + for i, (ema, rounds, uid) in enumerate(eligible[:top_k]) + ) + bt.logging.info(f'Merge prediction top-{top_k} rewards: {top_miners_log}') return prediction_rewards diff --git a/tests/validator/merge_predictions/test_merge_predictions.py b/tests/validator/merge_predictions/test_merge_predictions.py index baaa8d7b..09538f5e 100644 --- a/tests/validator/merge_predictions/test_merge_predictions.py +++ b/tests/validator/merge_predictions/test_merge_predictions.py @@ -18,6 +18,7 @@ PREDICTIONS_COOLDOWN_SECONDS, PREDICTIONS_CORRECTNESS_EXPONENT, PREDICTIONS_EMA_BETA, + PREDICTIONS_EMISSIONS_SHARE, PREDICTIONS_MAX_CONSENSUS_BONUS, PREDICTIONS_MAX_ORDER_BONUS, PREDICTIONS_MAX_TIMELINESS_BONUS, @@ -515,7 +516,161 @@ def test_update_ema(self): # ============================================================================= -# 4. Validation +# 4. Top-K reward distribution (build_prediction_ema_rewards) +# ============================================================================= + + +def _make_mock_validator(ema_records: list[dict]) -> MagicMock: + """Create a mock validator with mp_storage returning given EMA records.""" + validator = MagicMock() + validator.mp_storage.get_all_emas.return_value = ema_records + return validator + + +def _make_evaluations(uid_to_github_id: dict[int, str]) -> dict: + """Create mock miner evaluations mapping uid -> github_id.""" + evaluations = {} + for uid, github_id in uid_to_github_id.items(): + ev = MagicMock() + ev.github_id = github_id + evaluations[uid] = ev + return evaluations + + +class TestBuildPredictionEmaRewards: + """Tests for the top-K reward distribution integrated with validator state.""" + + def _call(self, validator, miner_uids, evaluations): + from gittensor.validator.forward import build_prediction_ema_rewards + + return build_prediction_ema_rewards(validator, miner_uids, evaluations) + + def test_standard_top3_split(self): + """3+ miners with positive EMA -> 50/35/15 split.""" + emas = [ + {'github_id': 'a', 'ema_score': 0.9, 'rounds': 10}, + {'github_id': 'b', 'ema_score': 0.7, 'rounds': 8}, + {'github_id': 'c', 'ema_score': 0.5, 'rounds': 6}, + {'github_id': 'd', 'ema_score': 0.3, 'rounds': 4}, + ] + validator = _make_mock_validator(emas) + uids = {1, 2, 3, 4} + evals = _make_evaluations({1: 'a', 2: 'b', 3: 'c', 4: 'd'}) + + rewards = self._call(validator, uids, evals) + sorted_uids = sorted(uids) + + assert rewards[sorted_uids.index(1)] == pytest.approx(0.50 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(2)] == pytest.approx(0.35 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(3)] == pytest.approx(0.15 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(4)] == 0.0 + + def test_two_miners_only(self): + """Only 2 miners with positive EMA -> 50% and 35%, rest unallocated.""" + emas = [ + {'github_id': 'a', 'ema_score': 0.8, 'rounds': 5}, + {'github_id': 'b', 'ema_score': 0.4, 'rounds': 3}, + ] + validator = _make_mock_validator(emas) + uids = {1, 2, 3} + evals = _make_evaluations({1: 'a', 2: 'b', 3: '0'}) + + rewards = self._call(validator, uids, evals) + sorted_uids = sorted(uids) + + assert rewards[sorted_uids.index(1)] == pytest.approx(0.50 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(2)] == pytest.approx(0.35 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(3)] == 0.0 + assert rewards.sum() < PREDICTIONS_EMISSIONS_SHARE + + def test_single_miner(self): + """Single miner -> receives 50%, rest unallocated.""" + emas = [ + {'github_id': 'a', 'ema_score': 0.6, 'rounds': 2}, + ] + validator = _make_mock_validator(emas) + uids = {1, 2} + evals = _make_evaluations({1: 'a', 2: '0'}) + + rewards = self._call(validator, uids, evals) + sorted_uids = sorted(uids) + + assert rewards[sorted_uids.index(1)] == pytest.approx(0.50 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(2)] == 0.0 + + def test_no_positive_ema(self): + """No miners with positive EMA -> all zeros.""" + emas = [ + {'github_id': 'a', 'ema_score': 0.0, 'rounds': 1}, + {'github_id': 'b', 'ema_score': -0.1, 'rounds': 1}, + ] + validator = _make_mock_validator(emas) + uids = {1, 2} + evals = _make_evaluations({1: 'a', 2: 'b'}) + + rewards = self._call(validator, uids, evals) + assert rewards.sum() == 0.0 + + def test_no_emas_at_all(self): + """Empty EMA table -> all zeros.""" + validator = _make_mock_validator([]) + uids = {1, 2} + evals = _make_evaluations({1: 'a', 2: 'b'}) + + rewards = self._call(validator, uids, evals) + assert rewards.sum() == 0.0 + + def test_tie_broken_by_rounds(self): + """Equal EMA scores -> higher rounds count wins.""" + emas = [ + {'github_id': 'a', 'ema_score': 0.5, 'rounds': 3}, + {'github_id': 'b', 'ema_score': 0.5, 'rounds': 10}, + {'github_id': 'c', 'ema_score': 0.5, 'rounds': 7}, + ] + validator = _make_mock_validator(emas) + uids = {1, 2, 3} + evals = _make_evaluations({1: 'a', 2: 'b', 3: 'c'}) + + rewards = self._call(validator, uids, evals) + sorted_uids = sorted(uids) + + assert rewards[sorted_uids.index(2)] == pytest.approx(0.50 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(3)] == pytest.approx(0.35 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(1)] == pytest.approx(0.15 * PREDICTIONS_EMISSIONS_SHARE) + + def test_deregistered_miner_excluded(self): + """Miner with EMA but no evaluation entry (deregistered) is excluded.""" + emas = [ + {'github_id': 'a', 'ema_score': 0.9, 'rounds': 10}, + {'github_id': 'orphan', 'ema_score': 0.8, 'rounds': 8}, + {'github_id': 'c', 'ema_score': 0.5, 'rounds': 6}, + ] + validator = _make_mock_validator(emas) + uids = {1, 3} + evals = _make_evaluations({1: 'a', 3: 'c'}) + + rewards = self._call(validator, uids, evals) + sorted_uids = sorted(uids) + + assert rewards[sorted_uids.index(1)] == pytest.approx(0.50 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(3)] == pytest.approx(0.35 * PREDICTIONS_EMISSIONS_SHARE) + + def test_total_never_exceeds_emission_share(self): + """Total prediction rewards must never exceed PREDICTIONS_EMISSIONS_SHARE.""" + emas = [ + {'github_id': str(i), 'ema_score': 1.0 - i * 0.01, 'rounds': 100 - i} + for i in range(20) + ] + validator = _make_mock_validator(emas) + uids = set(range(20)) + evals = _make_evaluations({i: str(i) for i in range(20)}) + + rewards = self._call(validator, uids, evals) + assert rewards.sum() == pytest.approx(PREDICTIONS_EMISSIONS_SHARE) + + +# ============================================================================= +# 5. Validation # ============================================================================= @@ -547,7 +702,7 @@ def test_total_exceeds_one(self): # ============================================================================= -# 5. Settlement +# 6. Settlement # ============================================================================= diff --git a/tests/validator/test_emission_shares.py b/tests/validator/test_emission_shares.py index d81b3445..ecba39fd 100644 --- a/tests/validator/test_emission_shares.py +++ b/tests/validator/test_emission_shares.py @@ -1,16 +1,24 @@ # Entrius 2025 """ -Guard-rail test: emission shares must never exceed 100% cumulatively. +Guard-rail tests: emission shares and top-K constant configuration. -If ISSUES_TREASURY_EMISSION_SHARE + PREDICTIONS_EMISSIONS_SHARE >= 1.0, -OSS contributions would receive zero or negative share, breaking the reward system. +Ensures: +- Combined non-OSS emission shares (treasury + predictions) never reach 100%. +- PREDICTIONS_TOP_K_SHARES sums to exactly 1.0 and has length == PREDICTIONS_TOP_K. Run: pytest tests/validator/test_emission_shares.py -v """ -from gittensor.constants import ISSUES_TREASURY_EMISSION_SHARE, PREDICTIONS_EMISSIONS_SHARE +import pytest + +from gittensor.constants import ( + ISSUES_TREASURY_EMISSION_SHARE, + PREDICTIONS_EMISSIONS_SHARE, + PREDICTIONS_TOP_K, + PREDICTIONS_TOP_K_SHARES, +) def test_combined_emission_shares_leave_room_for_oss(): @@ -23,3 +31,18 @@ def test_combined_emission_shares_leave_room_for_oss(): f'= {combined}) must be < 1.0, otherwise OSS contributions get nothing' ) assert oss_share > 0.0 + + +def test_top_k_shares_sum_to_one(): + """Top-K shares must sum to exactly 1.0.""" + assert sum(PREDICTIONS_TOP_K_SHARES) == pytest.approx(1.0), ( + f'PREDICTIONS_TOP_K_SHARES must sum to 1.0, got {sum(PREDICTIONS_TOP_K_SHARES)}' + ) + + +def test_top_k_shares_length_matches_top_k(): + """PREDICTIONS_TOP_K_SHARES length must equal PREDICTIONS_TOP_K.""" + assert len(PREDICTIONS_TOP_K_SHARES) == PREDICTIONS_TOP_K, ( + f'PREDICTIONS_TOP_K_SHARES has {len(PREDICTIONS_TOP_K_SHARES)} entries ' + f'but PREDICTIONS_TOP_K is {PREDICTIONS_TOP_K}' + ) From 2bd5ecbed93dda8c5f491988b57bb092b9d44072 Mon Sep 17 00:00:00 2001 From: travellingsoldier85 Date: Sun, 8 Mar 2026 07:52:25 +0000 Subject: [PATCH 4/5] fix: escape file paths in GraphQL queries and add batch-size limits Fix two issues in the file content fetching functions used for token-based PR scoring: 1. **GraphQL injection from unescaped file paths**: File paths containing double quotes or backslashes are interpolated directly into GraphQL query strings, breaking query syntax and causing the entire file content fetch to fail silently. This means PRs touching files with special characters in their paths get scored as 0. - Add _escape_graphql_expression() helper that escapes \ and " - Apply escaping in both fetch_file_contents_batch and fetch_file_contents_with_base 2. **No batch-size limit for large PRs**: PRs with many files generate a single GraphQL query with one object lookup per file. GitHub's GraphQL API has query complexity limits, so large PRs can trigger 502 errors and lose all file contents for scoring. - Add _MAX_FILES_PER_GRAPHQL_BATCH = 50 constant - Split both fetch functions into batched requests - Extract _fetch_file_contents_with_base_batch() as internal helper Tests added for escaping correctness, batch splitting behavior, special character handling, and edge cases (empty input, added/removed files, failed batches). --- gittensor/utils/github_api_tools.py | 177 +++++++++++++-------- tests/utils/test_github_api_tools.py | 226 +++++++++++++++++++++++++++ 2 files changed, 339 insertions(+), 64 deletions(-) diff --git a/gittensor/utils/github_api_tools.py b/gittensor/utils/github_api_tools.py index f1e9c224..7932adc6 100644 --- a/gittensor/utils/github_api_tools.py +++ b/gittensor/utils/github_api_tools.py @@ -1015,6 +1015,28 @@ def check_github_issue_closed(repo: str, issue_number: int, token: str) -> Optio return None +def _escape_graphql_expression(expression: str) -> str: + """Escape special characters in a GraphQL string literal. + + File paths containing backslashes or double quotes break GraphQL query + syntax when interpolated directly. This escapes them so the query remains + valid. + + Args: + expression: Raw string to embed inside a GraphQL double-quoted literal. + + Returns: + Escaped string safe for embedding in GraphQL queries. + """ + return expression.replace('\\', '\\\\').replace('"', '\\"') + + +# Maximum files per GraphQL batch request. GitHub's GraphQL API has query +# complexity limits; batching too many object lookups in a single request can +# cause a 502/complexity error and lose all results. +_MAX_FILES_PER_GRAPHQL_BATCH = 50 + + def fetch_file_contents_batch( repo_owner: str, repo_name: str, @@ -1023,9 +1045,10 @@ def fetch_file_contents_batch( token: str, ) -> Dict[str, Optional[str]]: """ - Fetch multiple file contents from a repository in a single GraphQL request. + Fetch multiple file contents from a repository in batched GraphQL requests. - Uses retry logic with exponential backoff for reliability. + Uses retry logic with exponential backoff for reliability. Batches files + to avoid exceeding GitHub's GraphQL complexity limits. Args: repo_owner: Repository owner @@ -1040,47 +1063,53 @@ def fetch_file_contents_batch( if not file_paths: return {} - # Build GraphQL query with aliased file fields - file_fields = [] - for i, path in enumerate(file_paths): - expression = f'{head_sha}:{path}' - file_fields.append( - f'file{i}: object(expression: "{expression}") {{ ... on Blob {{ text byteSize isBinary }} }}' - ) + results: Dict[str, Optional[str]] = {} - query = f""" - query($owner: String!, $name: String!) {{ - repository(owner: $owner, name: $name) {{ - {' '.join(file_fields)} + # Process files in batches to avoid exceeding GraphQL complexity limits + for batch_start in range(0, len(file_paths), _MAX_FILES_PER_GRAPHQL_BATCH): + batch_paths = file_paths[batch_start : batch_start + _MAX_FILES_PER_GRAPHQL_BATCH] + + # Build GraphQL query with aliased file fields + file_fields = [] + for i, path in enumerate(batch_paths): + expression = _escape_graphql_expression(f'{head_sha}:{path}') + file_fields.append( + f'file{i}: object(expression: "{expression}") {{ ... on Blob {{ text byteSize isBinary }} }}' + ) + + query = f""" + query($owner: String!, $name: String!) {{ + repository(owner: $owner, name: $name) {{ + {' '.join(file_fields)} + }} }} - }} - """ + """ - variables = {'owner': repo_owner, 'name': repo_name} + variables = {'owner': repo_owner, 'name': repo_name} - # Execute with retry logic - data = execute_graphql_query(query, variables, token) - if data is None: - bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}') - return {path: None for path in file_paths} + data = execute_graphql_query(query, variables, token) + if data is None: + bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}') + for path in batch_paths: + results[path] = None + continue - if 'errors' in data: - bt.logging.warning(f'GraphQL errors fetching files: {data["errors"]}') + if 'errors' in data: + bt.logging.warning(f'GraphQL errors fetching files: {data["errors"]}') - repo_data = data.get('data', {}).get('repository', {}) - results = {} + repo_data = data.get('data', {}).get('repository', {}) - for i, path in enumerate(file_paths): - file_data = repo_data.get(f'file{i}') + for i, path in enumerate(batch_paths): + file_data = repo_data.get(f'file{i}') - if file_data is None: - results[path] = None - elif file_data.get('isBinary'): - results[path] = None - elif file_data.get('byteSize', 0) > MAX_FILE_SIZE_BYTES: - results[path] = None - else: - results[path] = file_data.get('text') + if file_data is None: + results[path] = None + elif file_data.get('isBinary'): + results[path] = None + elif file_data.get('byteSize', 0) > MAX_FILE_SIZE_BYTES: + results[path] = None + else: + results[path] = file_data.get('text') return results @@ -1093,7 +1122,7 @@ class FileContentPair: new_content: Optional[str] # None for deleted files -def fetch_file_contents_with_base( +def _fetch_file_contents_with_base_batch( repo_owner: str, repo_name: str, base_sha: str, @@ -1101,44 +1130,23 @@ def fetch_file_contents_with_base( file_changes: List['FileChangeType'], token: str, ) -> Dict[str, FileContentPair]: - """ - Fetch both base and head (old and new) versions of files in a single GraphQL request. - - Args: - repo_owner: Repository owner - repo_name: Repository name - base_sha: The base branch SHA (before PR changes) - head_sha: The head/merge commit SHA (after PR changes) - file_changes: List of FileChange objects (needed for status and previous_filename) - token: GitHub PAT for authentication + """Fetch base and head file contents for a single batch of file changes. - Returns: - Dict mapping file paths to FileContentPair (old_content, new_content) - - For new files: old_content is None - - For deleted files: new_content is None - - For renamed files: old_content fetched from previous_filename + Internal helper called by fetch_file_contents_with_base for each batch. """ - if not file_changes: - return {} - - # Build GraphQL query with both base and head versions file_fields = [] for i, fc in enumerate(file_changes): - # Determine the path to fetch for base version - # For renames, use previous_filename; otherwise use current filename base_path = fc.previous_filename if fc.previous_filename else fc.filename head_path = fc.filename - # Only fetch base version if file wasn't newly added if fc.status != 'added': - base_expr = f'{base_sha}:{base_path}' + base_expr = _escape_graphql_expression(f'{base_sha}:{base_path}') file_fields.append( f'base{i}: object(expression: "{base_expr}") {{ ... on Blob {{ text byteSize isBinary }} }}' ) - # Only fetch head version if file wasn't deleted if fc.status != 'removed': - head_expr = f'{head_sha}:{head_path}' + head_expr = _escape_graphql_expression(f'{head_sha}:{head_path}') file_fields.append( f'head{i}: object(expression: "{head_expr}") {{ ... on Blob {{ text byteSize isBinary }} }}' ) @@ -1156,7 +1164,6 @@ def fetch_file_contents_with_base( variables = {'owner': repo_owner, 'name': repo_name} - # Execute with retry logic data = execute_graphql_query(query, variables, token) if data is None: bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}') @@ -1172,13 +1179,11 @@ def fetch_file_contents_with_base( old_content = None new_content = None - # Extract base (old) content if applicable if fc.status != 'added': base_data = repo_data.get(f'base{i}') if base_data and not base_data.get('isBinary') and base_data.get('byteSize', 0) <= MAX_FILE_SIZE_BYTES: old_content = base_data.get('text') - # Extract head (new) content if applicable if fc.status != 'removed': head_data = repo_data.get(f'head{i}') if head_data and not head_data.get('isBinary') and head_data.get('byteSize', 0) <= MAX_FILE_SIZE_BYTES: @@ -1187,3 +1192,47 @@ def fetch_file_contents_with_base( results[fc.filename] = FileContentPair(old_content=old_content, new_content=new_content) return results + + +def fetch_file_contents_with_base( + repo_owner: str, + repo_name: str, + base_sha: str, + head_sha: str, + file_changes: List['FileChangeType'], + token: str, +) -> Dict[str, FileContentPair]: + """ + Fetch both base and head (old and new) versions of files via batched GraphQL requests. + + Large PRs are split into batches to avoid exceeding GitHub's GraphQL query + complexity limits. File paths are escaped to prevent query syntax errors + from special characters. + + Args: + repo_owner: Repository owner + repo_name: Repository name + base_sha: The base branch SHA (before PR changes) + head_sha: The head/merge commit SHA (after PR changes) + file_changes: List of FileChange objects (needed for status and previous_filename) + token: GitHub PAT for authentication + + Returns: + Dict mapping file paths to FileContentPair (old_content, new_content) + - For new files: old_content is None + - For deleted files: new_content is None + - For renamed files: old_content fetched from previous_filename + """ + if not file_changes: + return {} + + results: Dict[str, FileContentPair] = {} + + for batch_start in range(0, len(file_changes), _MAX_FILES_PER_GRAPHQL_BATCH): + batch = file_changes[batch_start : batch_start + _MAX_FILES_PER_GRAPHQL_BATCH] + batch_results = _fetch_file_contents_with_base_batch( + repo_owner, repo_name, base_sha, head_sha, batch, token + ) + results.update(batch_results) + + return results diff --git a/tests/utils/test_github_api_tools.py b/tests/utils/test_github_api_tools.py index 185202e8..5ebd7c57 100644 --- a/tests/utils/test_github_api_tools.py +++ b/tests/utils/test_github_api_tools.py @@ -914,6 +914,232 @@ def _make_graphql_response(pr_nodes): return mock_response +_escape_graphql_expression = github_api_tools._escape_graphql_expression +_MAX_FILES_PER_GRAPHQL_BATCH = github_api_tools._MAX_FILES_PER_GRAPHQL_BATCH +fetch_file_contents_batch = github_api_tools.fetch_file_contents_batch +fetch_file_contents_with_base = github_api_tools.fetch_file_contents_with_base +FileContentPair = github_api_tools.FileContentPair + + +# ============================================================================ +# GraphQL Expression Escaping Tests +# ============================================================================ + + +class TestEscapeGraphQLExpression: + """Tests for _escape_graphql_expression helper.""" + + def test_plain_path_unchanged(self): + """Normal file paths pass through unmodified.""" + assert _escape_graphql_expression('abc123:src/main.py') == 'abc123:src/main.py' + + def test_double_quotes_escaped(self): + """Double quotes in paths are escaped to prevent query breakage.""" + assert _escape_graphql_expression('abc123:path/with"quote.py') == 'abc123:path/with\\"quote.py' + + def test_backslash_escaped(self): + """Backslashes in paths are escaped.""" + assert _escape_graphql_expression('abc123:path\\file.py') == 'abc123:path\\\\file.py' + + def test_both_quote_and_backslash(self): + """Paths with both special characters are fully escaped.""" + result = _escape_graphql_expression('abc123:dir\\"file.py') + assert result == 'abc123:dir\\\\\\"file.py' + + def test_empty_string(self): + """Empty string returns empty string.""" + assert _escape_graphql_expression('') == '' + + +# ============================================================================ +# File Contents Batch Tests +# ============================================================================ + + +class TestFetchFileContentsBatch: + """Tests for fetch_file_contents_batch batching and escaping.""" + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_empty_paths_returns_empty(self, mock_graphql): + """Empty file list returns empty dict without any API call.""" + result = fetch_file_contents_batch('owner', 'repo', 'abc123', [], 'token') + assert result == {} + mock_graphql.assert_not_called() + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_small_batch_single_request(self, mock_graphql): + """Few files are fetched in a single GraphQL request.""" + mock_graphql.return_value = { + 'data': { + 'repository': { + 'file0': {'text': 'content_a', 'byteSize': 9, 'isBinary': False}, + 'file1': {'text': 'content_b', 'byteSize': 9, 'isBinary': False}, + } + } + } + + result = fetch_file_contents_batch('owner', 'repo', 'abc123', ['a.py', 'b.py'], 'token') + + assert mock_graphql.call_count == 1 + assert result == {'a.py': 'content_a', 'b.py': 'content_b'} + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_large_batch_split_into_multiple_requests(self, mock_graphql): + """More files than _MAX_FILES_PER_GRAPHQL_BATCH triggers multiple requests.""" + total_files = _MAX_FILES_PER_GRAPHQL_BATCH + 10 + paths = [f'file_{i}.py' for i in range(total_files)] + + def side_effect(query, variables, token): + # Count how many file aliases are in the query + count = query.count('... on Blob') + repo_data = {} + for i in range(count): + repo_data[f'file{i}'] = {'text': f'content', 'byteSize': 7, 'isBinary': False} + return {'data': {'repository': repo_data}} + + mock_graphql.side_effect = side_effect + + result = fetch_file_contents_batch('owner', 'repo', 'abc123', paths, 'token') + + assert mock_graphql.call_count == 2, 'Should split into 2 batches' + assert len(result) == total_files + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_special_characters_in_path_escaped(self, mock_graphql): + """File paths with special characters are properly escaped in the query.""" + mock_graphql.return_value = { + 'data': { + 'repository': { + 'file0': {'text': 'ok', 'byteSize': 2, 'isBinary': False}, + } + } + } + + fetch_file_contents_batch('owner', 'repo', 'abc123', ['path/with"quote.py'], 'token') + + query_arg = mock_graphql.call_args[0][0] + assert '\\"' in query_arg, 'Double quotes in path should be escaped in GraphQL query' + assert 'with"quote' not in query_arg, 'Unescaped double quote should not appear' + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_failed_batch_returns_none_for_affected_files(self, mock_graphql): + """Failed GraphQL request returns None for all files in that batch.""" + mock_graphql.return_value = None + + result = fetch_file_contents_batch('owner', 'repo', 'abc123', ['a.py', 'b.py'], 'token') + + assert result == {'a.py': None, 'b.py': None} + + +# ============================================================================ +# File Contents With Base Batch Tests +# ============================================================================ + + +class TestFetchFileContentsWithBase: + """Tests for fetch_file_contents_with_base batching and escaping.""" + + @staticmethod + def _make_file_change(filename, status='modified', previous_filename=None): + """Create a mock FileChange object.""" + fc = Mock() + fc.filename = filename + fc.status = status + fc.previous_filename = previous_filename + return fc + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_empty_file_changes_returns_empty(self, mock_graphql): + """Empty file changes returns empty dict.""" + result = fetch_file_contents_with_base('owner', 'repo', 'base', 'head', [], 'token') + assert result == {} + mock_graphql.assert_not_called() + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_escapes_special_characters_in_paths(self, mock_graphql): + """File paths with special characters are escaped in both base and head expressions.""" + fc = self._make_file_change('path/with"quote.py') + mock_graphql.return_value = { + 'data': { + 'repository': { + 'base0': {'text': 'old', 'byteSize': 3, 'isBinary': False}, + 'head0': {'text': 'new', 'byteSize': 3, 'isBinary': False}, + } + } + } + + fetch_file_contents_with_base('owner', 'repo', 'base_sha', 'head_sha', [fc], 'token') + + query_arg = mock_graphql.call_args[0][0] + assert 'with\\"quote' in query_arg, 'Double quotes should be escaped' + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_large_pr_batched(self, mock_graphql): + """PRs with many files are split into batches.""" + total_files = _MAX_FILES_PER_GRAPHQL_BATCH + 5 + file_changes = [self._make_file_change(f'file_{i}.py') for i in range(total_files)] + + def side_effect(query, variables, token): + repo_data = {} + # Count base/head aliases in the query + for prefix in ('base', 'head'): + i = 0 + while f'{prefix}{i}:' in query: + repo_data[f'{prefix}{i}'] = {'text': 'content', 'byteSize': 7, 'isBinary': False} + i += 1 + return {'data': {'repository': repo_data}} + + mock_graphql.side_effect = side_effect + + result = fetch_file_contents_with_base( + 'owner', 'repo', 'base_sha', 'head_sha', file_changes, 'token' + ) + + assert mock_graphql.call_count == 2, 'Should split into 2 batches' + assert len(result) == total_files + for fc in file_changes: + assert fc.filename in result + assert isinstance(result[fc.filename], FileContentPair) + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_added_file_no_base_fetch(self, mock_graphql): + """Added files should not fetch base content.""" + fc = self._make_file_change('new_file.py', status='added') + mock_graphql.return_value = { + 'data': { + 'repository': { + 'head0': {'text': 'new content', 'byteSize': 11, 'isBinary': False}, + } + } + } + + result = fetch_file_contents_with_base('owner', 'repo', 'base_sha', 'head_sha', [fc], 'token') + + assert result['new_file.py'].old_content is None + assert result['new_file.py'].new_content == 'new content' + query_arg = mock_graphql.call_args[0][0] + assert 'base0' not in query_arg, 'Should not fetch base for added file' + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_removed_file_no_head_fetch(self, mock_graphql): + """Removed files should not fetch head content.""" + fc = self._make_file_change('deleted.py', status='removed') + mock_graphql.return_value = { + 'data': { + 'repository': { + 'base0': {'text': 'old content', 'byteSize': 11, 'isBinary': False}, + } + } + } + + result = fetch_file_contents_with_base('owner', 'repo', 'base_sha', 'head_sha', [fc], 'token') + + assert result['deleted.py'].old_content == 'old content' + assert result['deleted.py'].new_content is None + query_arg = mock_graphql.call_args[0][0] + assert 'head0' not in query_arg, 'Should not fetch head for removed file' + + class TestLoadMinersPrsErrorResilience: """Test that a single bad PR doesn't abort fetching for the entire miner.""" From ed53d392a1b019e65166c715a3db8fca11ab8589 Mon Sep 17 00:00:00 2001 From: Ander <61125407+anderdc@users.noreply.github.com> Date: Tue, 17 Mar 2026 10:50:18 -0500 Subject: [PATCH 5/5] fix formatting (#299) --- tests/validator/merge_predictions/test_merge_predictions.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/validator/merge_predictions/test_merge_predictions.py b/tests/validator/merge_predictions/test_merge_predictions.py index 09538f5e..f96757f6 100644 --- a/tests/validator/merge_predictions/test_merge_predictions.py +++ b/tests/validator/merge_predictions/test_merge_predictions.py @@ -657,10 +657,7 @@ def test_deregistered_miner_excluded(self): def test_total_never_exceeds_emission_share(self): """Total prediction rewards must never exceed PREDICTIONS_EMISSIONS_SHARE.""" - emas = [ - {'github_id': str(i), 'ema_score': 1.0 - i * 0.01, 'rounds': 100 - i} - for i in range(20) - ] + emas = [{'github_id': str(i), 'ema_score': 1.0 - i * 0.01, 'rounds': 100 - i} for i in range(20)] validator = _make_mock_validator(emas) uids = set(range(20)) evals = _make_evaluations({i: str(i) for i in range(20)})