diff --git a/gittensor/constants.py b/gittensor/constants.py index bac5752f..d49735f7 100644 --- a/gittensor/constants.py +++ b/gittensor/constants.py @@ -145,6 +145,8 @@ # Merge Predictions # ============================================================================= PREDICTIONS_EMISSIONS_SHARE = 0.15 # % of emissions allocated to prediction competition +PREDICTIONS_TOP_K = 3 # only top-K miners by EMA receive prediction rewards +PREDICTIONS_TOP_K_SHARES = [0.50, 0.35, 0.15] # fixed reward split for top-K miners (must sum to 1.0) PREDICTIONS_EMA_BETA = 0.1 # EMA decay rate for predictions record PREDICTIONS_CORRECTNESS_EXPONENT = 3 # exponent on correctness to harshly punish incorrect predictions diff --git a/gittensor/utils/github_api_tools.py b/gittensor/utils/github_api_tools.py index f1e9c224..7932adc6 100644 --- a/gittensor/utils/github_api_tools.py +++ b/gittensor/utils/github_api_tools.py @@ -1015,6 +1015,28 @@ def check_github_issue_closed(repo: str, issue_number: int, token: str) -> Optio return None +def _escape_graphql_expression(expression: str) -> str: + """Escape special characters in a GraphQL string literal. + + File paths containing backslashes or double quotes break GraphQL query + syntax when interpolated directly. This escapes them so the query remains + valid. + + Args: + expression: Raw string to embed inside a GraphQL double-quoted literal. + + Returns: + Escaped string safe for embedding in GraphQL queries. + """ + return expression.replace('\\', '\\\\').replace('"', '\\"') + + +# Maximum files per GraphQL batch request. GitHub's GraphQL API has query +# complexity limits; batching too many object lookups in a single request can +# cause a 502/complexity error and lose all results. +_MAX_FILES_PER_GRAPHQL_BATCH = 50 + + def fetch_file_contents_batch( repo_owner: str, repo_name: str, @@ -1023,9 +1045,10 @@ def fetch_file_contents_batch( token: str, ) -> Dict[str, Optional[str]]: """ - Fetch multiple file contents from a repository in a single GraphQL request. + Fetch multiple file contents from a repository in batched GraphQL requests. - Uses retry logic with exponential backoff for reliability. + Uses retry logic with exponential backoff for reliability. Batches files + to avoid exceeding GitHub's GraphQL complexity limits. Args: repo_owner: Repository owner @@ -1040,47 +1063,53 @@ def fetch_file_contents_batch( if not file_paths: return {} - # Build GraphQL query with aliased file fields - file_fields = [] - for i, path in enumerate(file_paths): - expression = f'{head_sha}:{path}' - file_fields.append( - f'file{i}: object(expression: "{expression}") {{ ... on Blob {{ text byteSize isBinary }} }}' - ) + results: Dict[str, Optional[str]] = {} - query = f""" - query($owner: String!, $name: String!) {{ - repository(owner: $owner, name: $name) {{ - {' '.join(file_fields)} + # Process files in batches to avoid exceeding GraphQL complexity limits + for batch_start in range(0, len(file_paths), _MAX_FILES_PER_GRAPHQL_BATCH): + batch_paths = file_paths[batch_start : batch_start + _MAX_FILES_PER_GRAPHQL_BATCH] + + # Build GraphQL query with aliased file fields + file_fields = [] + for i, path in enumerate(batch_paths): + expression = _escape_graphql_expression(f'{head_sha}:{path}') + file_fields.append( + f'file{i}: object(expression: "{expression}") {{ ... on Blob {{ text byteSize isBinary }} }}' + ) + + query = f""" + query($owner: String!, $name: String!) {{ + repository(owner: $owner, name: $name) {{ + {' '.join(file_fields)} + }} }} - }} - """ + """ - variables = {'owner': repo_owner, 'name': repo_name} + variables = {'owner': repo_owner, 'name': repo_name} - # Execute with retry logic - data = execute_graphql_query(query, variables, token) - if data is None: - bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}') - return {path: None for path in file_paths} + data = execute_graphql_query(query, variables, token) + if data is None: + bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}') + for path in batch_paths: + results[path] = None + continue - if 'errors' in data: - bt.logging.warning(f'GraphQL errors fetching files: {data["errors"]}') + if 'errors' in data: + bt.logging.warning(f'GraphQL errors fetching files: {data["errors"]}') - repo_data = data.get('data', {}).get('repository', {}) - results = {} + repo_data = data.get('data', {}).get('repository', {}) - for i, path in enumerate(file_paths): - file_data = repo_data.get(f'file{i}') + for i, path in enumerate(batch_paths): + file_data = repo_data.get(f'file{i}') - if file_data is None: - results[path] = None - elif file_data.get('isBinary'): - results[path] = None - elif file_data.get('byteSize', 0) > MAX_FILE_SIZE_BYTES: - results[path] = None - else: - results[path] = file_data.get('text') + if file_data is None: + results[path] = None + elif file_data.get('isBinary'): + results[path] = None + elif file_data.get('byteSize', 0) > MAX_FILE_SIZE_BYTES: + results[path] = None + else: + results[path] = file_data.get('text') return results @@ -1093,7 +1122,7 @@ class FileContentPair: new_content: Optional[str] # None for deleted files -def fetch_file_contents_with_base( +def _fetch_file_contents_with_base_batch( repo_owner: str, repo_name: str, base_sha: str, @@ -1101,44 +1130,23 @@ def fetch_file_contents_with_base( file_changes: List['FileChangeType'], token: str, ) -> Dict[str, FileContentPair]: - """ - Fetch both base and head (old and new) versions of files in a single GraphQL request. - - Args: - repo_owner: Repository owner - repo_name: Repository name - base_sha: The base branch SHA (before PR changes) - head_sha: The head/merge commit SHA (after PR changes) - file_changes: List of FileChange objects (needed for status and previous_filename) - token: GitHub PAT for authentication + """Fetch base and head file contents for a single batch of file changes. - Returns: - Dict mapping file paths to FileContentPair (old_content, new_content) - - For new files: old_content is None - - For deleted files: new_content is None - - For renamed files: old_content fetched from previous_filename + Internal helper called by fetch_file_contents_with_base for each batch. """ - if not file_changes: - return {} - - # Build GraphQL query with both base and head versions file_fields = [] for i, fc in enumerate(file_changes): - # Determine the path to fetch for base version - # For renames, use previous_filename; otherwise use current filename base_path = fc.previous_filename if fc.previous_filename else fc.filename head_path = fc.filename - # Only fetch base version if file wasn't newly added if fc.status != 'added': - base_expr = f'{base_sha}:{base_path}' + base_expr = _escape_graphql_expression(f'{base_sha}:{base_path}') file_fields.append( f'base{i}: object(expression: "{base_expr}") {{ ... on Blob {{ text byteSize isBinary }} }}' ) - # Only fetch head version if file wasn't deleted if fc.status != 'removed': - head_expr = f'{head_sha}:{head_path}' + head_expr = _escape_graphql_expression(f'{head_sha}:{head_path}') file_fields.append( f'head{i}: object(expression: "{head_expr}") {{ ... on Blob {{ text byteSize isBinary }} }}' ) @@ -1156,7 +1164,6 @@ def fetch_file_contents_with_base( variables = {'owner': repo_owner, 'name': repo_name} - # Execute with retry logic data = execute_graphql_query(query, variables, token) if data is None: bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}') @@ -1172,13 +1179,11 @@ def fetch_file_contents_with_base( old_content = None new_content = None - # Extract base (old) content if applicable if fc.status != 'added': base_data = repo_data.get(f'base{i}') if base_data and not base_data.get('isBinary') and base_data.get('byteSize', 0) <= MAX_FILE_SIZE_BYTES: old_content = base_data.get('text') - # Extract head (new) content if applicable if fc.status != 'removed': head_data = repo_data.get(f'head{i}') if head_data and not head_data.get('isBinary') and head_data.get('byteSize', 0) <= MAX_FILE_SIZE_BYTES: @@ -1187,3 +1192,47 @@ def fetch_file_contents_with_base( results[fc.filename] = FileContentPair(old_content=old_content, new_content=new_content) return results + + +def fetch_file_contents_with_base( + repo_owner: str, + repo_name: str, + base_sha: str, + head_sha: str, + file_changes: List['FileChangeType'], + token: str, +) -> Dict[str, FileContentPair]: + """ + Fetch both base and head (old and new) versions of files via batched GraphQL requests. + + Large PRs are split into batches to avoid exceeding GitHub's GraphQL query + complexity limits. File paths are escaped to prevent query syntax errors + from special characters. + + Args: + repo_owner: Repository owner + repo_name: Repository name + base_sha: The base branch SHA (before PR changes) + head_sha: The head/merge commit SHA (after PR changes) + file_changes: List of FileChange objects (needed for status and previous_filename) + token: GitHub PAT for authentication + + Returns: + Dict mapping file paths to FileContentPair (old_content, new_content) + - For new files: old_content is None + - For deleted files: new_content is None + - For renamed files: old_content fetched from previous_filename + """ + if not file_changes: + return {} + + results: Dict[str, FileContentPair] = {} + + for batch_start in range(0, len(file_changes), _MAX_FILES_PER_GRAPHQL_BATCH): + batch = file_changes[batch_start : batch_start + _MAX_FILES_PER_GRAPHQL_BATCH] + batch_results = _fetch_file_contents_with_base_batch( + repo_owner, repo_name, base_sha, head_sha, batch, token + ) + results.update(batch_results) + + return results diff --git a/gittensor/validator/forward.py b/gittensor/validator/forward.py index c8864806..771e89fe 100644 --- a/gittensor/validator/forward.py +++ b/gittensor/validator/forward.py @@ -8,7 +8,13 @@ import numpy as np from gittensor.classes import MinerEvaluation -from gittensor.constants import ISSUES_TREASURY_EMISSION_SHARE, ISSUES_TREASURY_UID, PREDICTIONS_EMISSIONS_SHARE +from gittensor.constants import ( + ISSUES_TREASURY_EMISSION_SHARE, + ISSUES_TREASURY_UID, + PREDICTIONS_EMISSIONS_SHARE, + PREDICTIONS_TOP_K, + PREDICTIONS_TOP_K_SHARES, +) from gittensor.utils.uids import get_all_uids from gittensor.validator.issue_competitions.forward import issue_competitions from gittensor.validator.merge_predictions.settlement import merge_predictions @@ -82,7 +88,11 @@ def build_prediction_ema_rewards( miner_uids: set[int], miner_evaluations: Dict[int, MinerEvaluation], ) -> np.ndarray: - """Build rewards array from prediction EMA scores, scaled to PREDICTIONS_EMISSIONS_SHARE. + """Build rewards array from prediction EMA scores using top-K winner-takes-most. + + Only the top PREDICTIONS_TOP_K miners by EMA score receive rewards, + split according to PREDICTIONS_TOP_K_SHARES (50%/35%/15%). + Ties are broken by rounds (more settled issues = higher rank). Maps github_id-keyed EMAs back to UIDs via miner_evaluations. """ @@ -101,6 +111,8 @@ def build_prediction_ema_rewards( if evaluation and evaluation.github_id and evaluation.github_id != '0': github_id_to_uid[evaluation.github_id] = uid + # Collect eligible miners: (ema_score, rounds, uid) + eligible: list[tuple[float, int, int]] = [] for mp_record in all_emas: github_id = mp_record['github_id'] ema_score = mp_record['ema_score'] @@ -112,13 +124,27 @@ def build_prediction_ema_rewards( if uid is None or uid not in miner_uids: continue + rounds = mp_record.get('rounds', 0) or 0 + eligible.append((ema_score, rounds, uid)) + + if not eligible: + return prediction_rewards + + # Rank by EMA descending, then by rounds descending (tiebreaker) + eligible.sort(key=lambda x: (x[0], x[1]), reverse=True) + + # Award top-K miners their fixed shares + top_k = min(PREDICTIONS_TOP_K, len(eligible)) + for rank in range(top_k): + _, _, uid = eligible[rank] idx = sorted_uids.index(uid) - prediction_rewards[idx] = ema_score + prediction_rewards[idx] = PREDICTIONS_TOP_K_SHARES[rank] * PREDICTIONS_EMISSIONS_SHARE - # Normalize to sum=1.0, then scale to prediction share - total = prediction_rewards.sum() - if total > 0: - prediction_rewards = (prediction_rewards / total) * PREDICTIONS_EMISSIONS_SHARE + top_miners_log = ', '.join( + f'UID {uid} (ema={ema:.4f}, rounds={rounds}, share={PREDICTIONS_TOP_K_SHARES[i] * 100:.0f}%)' + for i, (ema, rounds, uid) in enumerate(eligible[:top_k]) + ) + bt.logging.info(f'Merge prediction top-{top_k} rewards: {top_miners_log}') return prediction_rewards diff --git a/gittensor/validator/weights/master_repositories.json b/gittensor/validator/weights/master_repositories.json index 52b8eaf5..56009784 100644 --- a/gittensor/validator/weights/master_repositories.json +++ b/gittensor/validator/weights/master_repositories.json @@ -194,8 +194,8 @@ }, "ant-design/ant-design": { "additional_acceptable_branches": ["feature"], - "tier": "Silver", - "weight": 3.98 + "tier": "Bronze", + "weight": 1 }, "ant-design/ant-design-pro": { "tier": "Bronze", @@ -553,8 +553,8 @@ "weight": 0.12 }, "azerothcore/azerothcore-wotlk": { - "tier": "Silver", - "weight": 3.51 + "tier": "Bronze", + "weight": 0.88 }, "Azure-Samples/azure-search-openai-demo": { "tier": "Bronze", @@ -953,8 +953,8 @@ "weight": 0.13 }, "cloudflare/cloudflare-docs": { - "tier": "Silver", - "weight": 3.42 + "tier": "Bronze", + "weight": 0.86 }, "cloudwu/skynet": { "tier": "Bronze", @@ -1560,8 +1560,8 @@ "weight": 0.13 }, "erxes/erxes": { - "tier": "Silver", - "weight": 3.46 + "tier": "Bronze", + "weight": 0.87 }, "eslint/eslint": { "tier": "Bronze", @@ -1858,8 +1858,8 @@ "inactive_at": "2026-03-14" }, "frappe/frappe": { - "tier": "Silver", - "weight": 3.73, + "tier": "Bronze", + "weight": 0.93, "inactive_at": "2026-03-14" }, "frappe/gantt": { @@ -2520,8 +2520,8 @@ "weight": 0.17 }, "janhq/jan": { - "tier": "Silver", - "weight": 3.85 + "tier": "Bronze", + "weight": 0.96 }, "jbeder/yaml-cpp": { "tier": "Bronze", @@ -2878,8 +2878,8 @@ "weight": 0.11 }, "langgenius/dify": { - "tier": "Silver", - "weight": 3.2 + "tier": "Bronze", + "weight": 0.8 }, "laradock/laradock": { "tier": "Bronze", @@ -3417,8 +3417,8 @@ "weight": 0.21 }, "mrdoob/three.js": { - "tier": "Silver", - "weight": 4.12 + "tier": "Bronze", + "weight": 1.03 }, "MudBlazor/MudBlazor": { "inactive_at": "2025-11-29T17:45:38.525Z", @@ -3628,8 +3628,8 @@ "weight": 0.55 }, "nushell/nushell": { - "tier": "Silver", - "weight": 3.91 + "tier": "Bronze", + "weight": 0.98 }, "nuxt/nuxt": { "tier": "Bronze", @@ -3696,8 +3696,8 @@ "weight": 0.13 }, "ohmyzsh/ohmyzsh": { - "tier": "Silver", - "weight": 4.05 + "tier": "Bronze", + "weight": 1.01 }, "oldratlee/useful-scripts": { "tier": "Bronze", @@ -3919,8 +3919,8 @@ "weight": 0.13 }, "openwrt/luci": { - "tier": "Silver", - "weight": 3.67 + "tier": "Bronze", + "weight": 0.92 }, "openwrt/openwrt": { "tier": "Bronze", @@ -4317,8 +4317,8 @@ "weight": 0.23 }, "python-telegram-bot/python-telegram-bot": { - "tier": "Silver", - "weight": 3.32 + "tier": "Bronze", + "weight": 0.83 }, "python-visualization/folium": { "tier": "Bronze", @@ -4362,8 +4362,8 @@ "weight": 0.15 }, "Qiskit/qiskit": { - "tier": "Silver", - "weight": 3.62 + "tier": "Bronze", + "weight": 0.91 }, "qist/tvbox": { "tier": "Bronze", @@ -4445,6 +4445,10 @@ "tier": "Bronze", "weight": 0.17 }, + "react-component/image": { + "tier": "Bronze", + "weight": 0.1 + }, "react-component/picker": { "tier": "Bronze", "weight": 0.1 @@ -4653,7 +4657,7 @@ "sbt/sbt": { "additional_acceptable_branches": ["1.12.x"], "tier": "Bronze", - "weight": 1.2 + "weight": 0.3 }, "scala/scala": { "tier": "Bronze", @@ -4928,8 +4932,8 @@ "weight": 0.15 }, "Stirling-Tools/Stirling-PDF": { - "tier": "Silver", - "weight": 3.24 + "tier": "Bronze", + "weight": 0.81 }, "stleary/JSON-java": { "tier": "Bronze", @@ -4974,8 +4978,8 @@ "weight": 0.91 }, "sveltejs/svelte": { - "tier": "Silver", - "weight": 3.28 + "tier": "Bronze", + "weight": 0.82 }, "svenfuchs/rails-i18n": { "tier": "Bronze", @@ -5153,8 +5157,8 @@ "weight": 0.36 }, "TheOdinProject/curriculum": { - "tier": "Silver", - "weight": 3.79 + "tier": "Bronze", + "weight": 0.95 }, "TheOdinProject/theodinproject": { "tier": "Bronze", @@ -5337,8 +5341,8 @@ "weight": 0.19 }, "twentyhq/twenty": { - "tier": "Silver", - "weight": 3.37 + "tier": "Bronze", + "weight": 0.84 }, "typeorm/typeorm": { "tier": "Bronze", @@ -5385,8 +5389,8 @@ "weight": 0.24 }, "umbraco/Umbraco-CMS": { - "tier": "Silver", - "weight": 3.56 + "tier": "Bronze", + "weight": 0.89 }, "unclecode/crawl4ai": { "tier": "Bronze", diff --git a/tests/utils/test_github_api_tools.py b/tests/utils/test_github_api_tools.py index 185202e8..5ebd7c57 100644 --- a/tests/utils/test_github_api_tools.py +++ b/tests/utils/test_github_api_tools.py @@ -914,6 +914,232 @@ def _make_graphql_response(pr_nodes): return mock_response +_escape_graphql_expression = github_api_tools._escape_graphql_expression +_MAX_FILES_PER_GRAPHQL_BATCH = github_api_tools._MAX_FILES_PER_GRAPHQL_BATCH +fetch_file_contents_batch = github_api_tools.fetch_file_contents_batch +fetch_file_contents_with_base = github_api_tools.fetch_file_contents_with_base +FileContentPair = github_api_tools.FileContentPair + + +# ============================================================================ +# GraphQL Expression Escaping Tests +# ============================================================================ + + +class TestEscapeGraphQLExpression: + """Tests for _escape_graphql_expression helper.""" + + def test_plain_path_unchanged(self): + """Normal file paths pass through unmodified.""" + assert _escape_graphql_expression('abc123:src/main.py') == 'abc123:src/main.py' + + def test_double_quotes_escaped(self): + """Double quotes in paths are escaped to prevent query breakage.""" + assert _escape_graphql_expression('abc123:path/with"quote.py') == 'abc123:path/with\\"quote.py' + + def test_backslash_escaped(self): + """Backslashes in paths are escaped.""" + assert _escape_graphql_expression('abc123:path\\file.py') == 'abc123:path\\\\file.py' + + def test_both_quote_and_backslash(self): + """Paths with both special characters are fully escaped.""" + result = _escape_graphql_expression('abc123:dir\\"file.py') + assert result == 'abc123:dir\\\\\\"file.py' + + def test_empty_string(self): + """Empty string returns empty string.""" + assert _escape_graphql_expression('') == '' + + +# ============================================================================ +# File Contents Batch Tests +# ============================================================================ + + +class TestFetchFileContentsBatch: + """Tests for fetch_file_contents_batch batching and escaping.""" + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_empty_paths_returns_empty(self, mock_graphql): + """Empty file list returns empty dict without any API call.""" + result = fetch_file_contents_batch('owner', 'repo', 'abc123', [], 'token') + assert result == {} + mock_graphql.assert_not_called() + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_small_batch_single_request(self, mock_graphql): + """Few files are fetched in a single GraphQL request.""" + mock_graphql.return_value = { + 'data': { + 'repository': { + 'file0': {'text': 'content_a', 'byteSize': 9, 'isBinary': False}, + 'file1': {'text': 'content_b', 'byteSize': 9, 'isBinary': False}, + } + } + } + + result = fetch_file_contents_batch('owner', 'repo', 'abc123', ['a.py', 'b.py'], 'token') + + assert mock_graphql.call_count == 1 + assert result == {'a.py': 'content_a', 'b.py': 'content_b'} + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_large_batch_split_into_multiple_requests(self, mock_graphql): + """More files than _MAX_FILES_PER_GRAPHQL_BATCH triggers multiple requests.""" + total_files = _MAX_FILES_PER_GRAPHQL_BATCH + 10 + paths = [f'file_{i}.py' for i in range(total_files)] + + def side_effect(query, variables, token): + # Count how many file aliases are in the query + count = query.count('... on Blob') + repo_data = {} + for i in range(count): + repo_data[f'file{i}'] = {'text': f'content', 'byteSize': 7, 'isBinary': False} + return {'data': {'repository': repo_data}} + + mock_graphql.side_effect = side_effect + + result = fetch_file_contents_batch('owner', 'repo', 'abc123', paths, 'token') + + assert mock_graphql.call_count == 2, 'Should split into 2 batches' + assert len(result) == total_files + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_special_characters_in_path_escaped(self, mock_graphql): + """File paths with special characters are properly escaped in the query.""" + mock_graphql.return_value = { + 'data': { + 'repository': { + 'file0': {'text': 'ok', 'byteSize': 2, 'isBinary': False}, + } + } + } + + fetch_file_contents_batch('owner', 'repo', 'abc123', ['path/with"quote.py'], 'token') + + query_arg = mock_graphql.call_args[0][0] + assert '\\"' in query_arg, 'Double quotes in path should be escaped in GraphQL query' + assert 'with"quote' not in query_arg, 'Unescaped double quote should not appear' + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_failed_batch_returns_none_for_affected_files(self, mock_graphql): + """Failed GraphQL request returns None for all files in that batch.""" + mock_graphql.return_value = None + + result = fetch_file_contents_batch('owner', 'repo', 'abc123', ['a.py', 'b.py'], 'token') + + assert result == {'a.py': None, 'b.py': None} + + +# ============================================================================ +# File Contents With Base Batch Tests +# ============================================================================ + + +class TestFetchFileContentsWithBase: + """Tests for fetch_file_contents_with_base batching and escaping.""" + + @staticmethod + def _make_file_change(filename, status='modified', previous_filename=None): + """Create a mock FileChange object.""" + fc = Mock() + fc.filename = filename + fc.status = status + fc.previous_filename = previous_filename + return fc + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_empty_file_changes_returns_empty(self, mock_graphql): + """Empty file changes returns empty dict.""" + result = fetch_file_contents_with_base('owner', 'repo', 'base', 'head', [], 'token') + assert result == {} + mock_graphql.assert_not_called() + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_escapes_special_characters_in_paths(self, mock_graphql): + """File paths with special characters are escaped in both base and head expressions.""" + fc = self._make_file_change('path/with"quote.py') + mock_graphql.return_value = { + 'data': { + 'repository': { + 'base0': {'text': 'old', 'byteSize': 3, 'isBinary': False}, + 'head0': {'text': 'new', 'byteSize': 3, 'isBinary': False}, + } + } + } + + fetch_file_contents_with_base('owner', 'repo', 'base_sha', 'head_sha', [fc], 'token') + + query_arg = mock_graphql.call_args[0][0] + assert 'with\\"quote' in query_arg, 'Double quotes should be escaped' + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_large_pr_batched(self, mock_graphql): + """PRs with many files are split into batches.""" + total_files = _MAX_FILES_PER_GRAPHQL_BATCH + 5 + file_changes = [self._make_file_change(f'file_{i}.py') for i in range(total_files)] + + def side_effect(query, variables, token): + repo_data = {} + # Count base/head aliases in the query + for prefix in ('base', 'head'): + i = 0 + while f'{prefix}{i}:' in query: + repo_data[f'{prefix}{i}'] = {'text': 'content', 'byteSize': 7, 'isBinary': False} + i += 1 + return {'data': {'repository': repo_data}} + + mock_graphql.side_effect = side_effect + + result = fetch_file_contents_with_base( + 'owner', 'repo', 'base_sha', 'head_sha', file_changes, 'token' + ) + + assert mock_graphql.call_count == 2, 'Should split into 2 batches' + assert len(result) == total_files + for fc in file_changes: + assert fc.filename in result + assert isinstance(result[fc.filename], FileContentPair) + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_added_file_no_base_fetch(self, mock_graphql): + """Added files should not fetch base content.""" + fc = self._make_file_change('new_file.py', status='added') + mock_graphql.return_value = { + 'data': { + 'repository': { + 'head0': {'text': 'new content', 'byteSize': 11, 'isBinary': False}, + } + } + } + + result = fetch_file_contents_with_base('owner', 'repo', 'base_sha', 'head_sha', [fc], 'token') + + assert result['new_file.py'].old_content is None + assert result['new_file.py'].new_content == 'new content' + query_arg = mock_graphql.call_args[0][0] + assert 'base0' not in query_arg, 'Should not fetch base for added file' + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_removed_file_no_head_fetch(self, mock_graphql): + """Removed files should not fetch head content.""" + fc = self._make_file_change('deleted.py', status='removed') + mock_graphql.return_value = { + 'data': { + 'repository': { + 'base0': {'text': 'old content', 'byteSize': 11, 'isBinary': False}, + } + } + } + + result = fetch_file_contents_with_base('owner', 'repo', 'base_sha', 'head_sha', [fc], 'token') + + assert result['deleted.py'].old_content == 'old content' + assert result['deleted.py'].new_content is None + query_arg = mock_graphql.call_args[0][0] + assert 'head0' not in query_arg, 'Should not fetch head for removed file' + + class TestLoadMinersPrsErrorResilience: """Test that a single bad PR doesn't abort fetching for the entire miner.""" diff --git a/tests/validator/merge_predictions/test_merge_predictions.py b/tests/validator/merge_predictions/test_merge_predictions.py index baaa8d7b..f96757f6 100644 --- a/tests/validator/merge_predictions/test_merge_predictions.py +++ b/tests/validator/merge_predictions/test_merge_predictions.py @@ -18,6 +18,7 @@ PREDICTIONS_COOLDOWN_SECONDS, PREDICTIONS_CORRECTNESS_EXPONENT, PREDICTIONS_EMA_BETA, + PREDICTIONS_EMISSIONS_SHARE, PREDICTIONS_MAX_CONSENSUS_BONUS, PREDICTIONS_MAX_ORDER_BONUS, PREDICTIONS_MAX_TIMELINESS_BONUS, @@ -515,7 +516,158 @@ def test_update_ema(self): # ============================================================================= -# 4. Validation +# 4. Top-K reward distribution (build_prediction_ema_rewards) +# ============================================================================= + + +def _make_mock_validator(ema_records: list[dict]) -> MagicMock: + """Create a mock validator with mp_storage returning given EMA records.""" + validator = MagicMock() + validator.mp_storage.get_all_emas.return_value = ema_records + return validator + + +def _make_evaluations(uid_to_github_id: dict[int, str]) -> dict: + """Create mock miner evaluations mapping uid -> github_id.""" + evaluations = {} + for uid, github_id in uid_to_github_id.items(): + ev = MagicMock() + ev.github_id = github_id + evaluations[uid] = ev + return evaluations + + +class TestBuildPredictionEmaRewards: + """Tests for the top-K reward distribution integrated with validator state.""" + + def _call(self, validator, miner_uids, evaluations): + from gittensor.validator.forward import build_prediction_ema_rewards + + return build_prediction_ema_rewards(validator, miner_uids, evaluations) + + def test_standard_top3_split(self): + """3+ miners with positive EMA -> 50/35/15 split.""" + emas = [ + {'github_id': 'a', 'ema_score': 0.9, 'rounds': 10}, + {'github_id': 'b', 'ema_score': 0.7, 'rounds': 8}, + {'github_id': 'c', 'ema_score': 0.5, 'rounds': 6}, + {'github_id': 'd', 'ema_score': 0.3, 'rounds': 4}, + ] + validator = _make_mock_validator(emas) + uids = {1, 2, 3, 4} + evals = _make_evaluations({1: 'a', 2: 'b', 3: 'c', 4: 'd'}) + + rewards = self._call(validator, uids, evals) + sorted_uids = sorted(uids) + + assert rewards[sorted_uids.index(1)] == pytest.approx(0.50 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(2)] == pytest.approx(0.35 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(3)] == pytest.approx(0.15 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(4)] == 0.0 + + def test_two_miners_only(self): + """Only 2 miners with positive EMA -> 50% and 35%, rest unallocated.""" + emas = [ + {'github_id': 'a', 'ema_score': 0.8, 'rounds': 5}, + {'github_id': 'b', 'ema_score': 0.4, 'rounds': 3}, + ] + validator = _make_mock_validator(emas) + uids = {1, 2, 3} + evals = _make_evaluations({1: 'a', 2: 'b', 3: '0'}) + + rewards = self._call(validator, uids, evals) + sorted_uids = sorted(uids) + + assert rewards[sorted_uids.index(1)] == pytest.approx(0.50 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(2)] == pytest.approx(0.35 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(3)] == 0.0 + assert rewards.sum() < PREDICTIONS_EMISSIONS_SHARE + + def test_single_miner(self): + """Single miner -> receives 50%, rest unallocated.""" + emas = [ + {'github_id': 'a', 'ema_score': 0.6, 'rounds': 2}, + ] + validator = _make_mock_validator(emas) + uids = {1, 2} + evals = _make_evaluations({1: 'a', 2: '0'}) + + rewards = self._call(validator, uids, evals) + sorted_uids = sorted(uids) + + assert rewards[sorted_uids.index(1)] == pytest.approx(0.50 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(2)] == 0.0 + + def test_no_positive_ema(self): + """No miners with positive EMA -> all zeros.""" + emas = [ + {'github_id': 'a', 'ema_score': 0.0, 'rounds': 1}, + {'github_id': 'b', 'ema_score': -0.1, 'rounds': 1}, + ] + validator = _make_mock_validator(emas) + uids = {1, 2} + evals = _make_evaluations({1: 'a', 2: 'b'}) + + rewards = self._call(validator, uids, evals) + assert rewards.sum() == 0.0 + + def test_no_emas_at_all(self): + """Empty EMA table -> all zeros.""" + validator = _make_mock_validator([]) + uids = {1, 2} + evals = _make_evaluations({1: 'a', 2: 'b'}) + + rewards = self._call(validator, uids, evals) + assert rewards.sum() == 0.0 + + def test_tie_broken_by_rounds(self): + """Equal EMA scores -> higher rounds count wins.""" + emas = [ + {'github_id': 'a', 'ema_score': 0.5, 'rounds': 3}, + {'github_id': 'b', 'ema_score': 0.5, 'rounds': 10}, + {'github_id': 'c', 'ema_score': 0.5, 'rounds': 7}, + ] + validator = _make_mock_validator(emas) + uids = {1, 2, 3} + evals = _make_evaluations({1: 'a', 2: 'b', 3: 'c'}) + + rewards = self._call(validator, uids, evals) + sorted_uids = sorted(uids) + + assert rewards[sorted_uids.index(2)] == pytest.approx(0.50 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(3)] == pytest.approx(0.35 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(1)] == pytest.approx(0.15 * PREDICTIONS_EMISSIONS_SHARE) + + def test_deregistered_miner_excluded(self): + """Miner with EMA but no evaluation entry (deregistered) is excluded.""" + emas = [ + {'github_id': 'a', 'ema_score': 0.9, 'rounds': 10}, + {'github_id': 'orphan', 'ema_score': 0.8, 'rounds': 8}, + {'github_id': 'c', 'ema_score': 0.5, 'rounds': 6}, + ] + validator = _make_mock_validator(emas) + uids = {1, 3} + evals = _make_evaluations({1: 'a', 3: 'c'}) + + rewards = self._call(validator, uids, evals) + sorted_uids = sorted(uids) + + assert rewards[sorted_uids.index(1)] == pytest.approx(0.50 * PREDICTIONS_EMISSIONS_SHARE) + assert rewards[sorted_uids.index(3)] == pytest.approx(0.35 * PREDICTIONS_EMISSIONS_SHARE) + + def test_total_never_exceeds_emission_share(self): + """Total prediction rewards must never exceed PREDICTIONS_EMISSIONS_SHARE.""" + emas = [{'github_id': str(i), 'ema_score': 1.0 - i * 0.01, 'rounds': 100 - i} for i in range(20)] + validator = _make_mock_validator(emas) + uids = set(range(20)) + evals = _make_evaluations({i: str(i) for i in range(20)}) + + rewards = self._call(validator, uids, evals) + assert rewards.sum() == pytest.approx(PREDICTIONS_EMISSIONS_SHARE) + + +# ============================================================================= +# 5. Validation # ============================================================================= @@ -547,7 +699,7 @@ def test_total_exceeds_one(self): # ============================================================================= -# 5. Settlement +# 6. Settlement # ============================================================================= diff --git a/tests/validator/test_emission_shares.py b/tests/validator/test_emission_shares.py index d81b3445..ecba39fd 100644 --- a/tests/validator/test_emission_shares.py +++ b/tests/validator/test_emission_shares.py @@ -1,16 +1,24 @@ # Entrius 2025 """ -Guard-rail test: emission shares must never exceed 100% cumulatively. +Guard-rail tests: emission shares and top-K constant configuration. -If ISSUES_TREASURY_EMISSION_SHARE + PREDICTIONS_EMISSIONS_SHARE >= 1.0, -OSS contributions would receive zero or negative share, breaking the reward system. +Ensures: +- Combined non-OSS emission shares (treasury + predictions) never reach 100%. +- PREDICTIONS_TOP_K_SHARES sums to exactly 1.0 and has length == PREDICTIONS_TOP_K. Run: pytest tests/validator/test_emission_shares.py -v """ -from gittensor.constants import ISSUES_TREASURY_EMISSION_SHARE, PREDICTIONS_EMISSIONS_SHARE +import pytest + +from gittensor.constants import ( + ISSUES_TREASURY_EMISSION_SHARE, + PREDICTIONS_EMISSIONS_SHARE, + PREDICTIONS_TOP_K, + PREDICTIONS_TOP_K_SHARES, +) def test_combined_emission_shares_leave_room_for_oss(): @@ -23,3 +31,18 @@ def test_combined_emission_shares_leave_room_for_oss(): f'= {combined}) must be < 1.0, otherwise OSS contributions get nothing' ) assert oss_share > 0.0 + + +def test_top_k_shares_sum_to_one(): + """Top-K shares must sum to exactly 1.0.""" + assert sum(PREDICTIONS_TOP_K_SHARES) == pytest.approx(1.0), ( + f'PREDICTIONS_TOP_K_SHARES must sum to 1.0, got {sum(PREDICTIONS_TOP_K_SHARES)}' + ) + + +def test_top_k_shares_length_matches_top_k(): + """PREDICTIONS_TOP_K_SHARES length must equal PREDICTIONS_TOP_K.""" + assert len(PREDICTIONS_TOP_K_SHARES) == PREDICTIONS_TOP_K, ( + f'PREDICTIONS_TOP_K_SHARES has {len(PREDICTIONS_TOP_K_SHARES)} entries ' + f'but PREDICTIONS_TOP_K is {PREDICTIONS_TOP_K}' + )