entrius · travellingsoldier85 · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026 · Mar 8, 2026
diff --git a/gittensor/constants.py b/gittensor/constants.py
@@ -145,6 +145,8 @@
 # Merge Predictions
 # =============================================================================
 PREDICTIONS_EMISSIONS_SHARE = 0.15  # % of emissions allocated to prediction competition
+PREDICTIONS_TOP_K = 3  # only top-K miners by EMA receive prediction rewards
+PREDICTIONS_TOP_K_SHARES = [0.50, 0.35, 0.15]  # fixed reward split for top-K miners (must sum to 1.0)
 
 PREDICTIONS_EMA_BETA = 0.1  # EMA decay rate for predictions record
 PREDICTIONS_CORRECTNESS_EXPONENT = 3  # exponent on correctness to harshly punish incorrect predictions

diff --git a/gittensor/utils/github_api_tools.py b/gittensor/utils/github_api_tools.py
@@ -1015,6 +1015,28 @@ def check_github_issue_closed(repo: str, issue_number: int, token: str) -> Optio
         return None
 
 
+def _escape_graphql_expression(expression: str) -> str:
+    """Escape special characters in a GraphQL string literal.
+
+    File paths containing backslashes or double quotes break GraphQL query
+    syntax when interpolated directly. This escapes them so the query remains
+    valid.
+
+    Args:
+        expression: Raw string to embed inside a GraphQL double-quoted literal.
+
+    Returns:
+        Escaped string safe for embedding in GraphQL queries.
+    """
+    return expression.replace('\\', '\\\\').replace('"', '\\"')
+
+
+# Maximum files per GraphQL batch request. GitHub's GraphQL API has query
+# complexity limits; batching too many object lookups in a single request can
+# cause a 502/complexity error and lose all results.
+_MAX_FILES_PER_GRAPHQL_BATCH = 50
+
+
 def fetch_file_contents_batch(
     repo_owner: str,
     repo_name: str,
@@ -1023,9 +1045,10 @@ def fetch_file_contents_batch(
     token: str,
 ) -> Dict[str, Optional[str]]:
     """
-    Fetch multiple file contents from a repository in a single GraphQL request.
+    Fetch multiple file contents from a repository in batched GraphQL requests.
 
-    Uses retry logic with exponential backoff for reliability.
+    Uses retry logic with exponential backoff for reliability. Batches files
+    to avoid exceeding GitHub's GraphQL complexity limits.
 
     Args:
         repo_owner: Repository owner
@@ -1040,47 +1063,53 @@ def fetch_file_contents_batch(
     if not file_paths:
         return {}
 
-    # Build GraphQL query with aliased file fields
-    file_fields = []
-    for i, path in enumerate(file_paths):
-        expression = f'{head_sha}:{path}'
-        file_fields.append(
-            f'file{i}: object(expression: "{expression}") {{ ... on Blob {{ text byteSize isBinary }} }}'
-        )
+    results: Dict[str, Optional[str]] = {}
 
-    query = f"""
-        query($owner: String!, $name: String!) {{
-            repository(owner: $owner, name: $name) {{
-                {' '.join(file_fields)}
+    # Process files in batches to avoid exceeding GraphQL complexity limits
+    for batch_start in range(0, len(file_paths), _MAX_FILES_PER_GRAPHQL_BATCH):
+        batch_paths = file_paths[batch_start : batch_start + _MAX_FILES_PER_GRAPHQL_BATCH]
+
+        # Build GraphQL query with aliased file fields
+        file_fields = []
+        for i, path in enumerate(batch_paths):
+            expression = _escape_graphql_expression(f'{head_sha}:{path}')
+            file_fields.append(
+                f'file{i}: object(expression: "{expression}") {{ ... on Blob {{ text byteSize isBinary }} }}'
+            )
+
+        query = f"""
+            query($owner: String!, $name: String!) {{
+                repository(owner: $owner, name: $name) {{
+                    {' '.join(file_fields)}
+                }}
             }}
-        }}
-    """
+        """
 
-    variables = {'owner': repo_owner, 'name': repo_name}
+        variables = {'owner': repo_owner, 'name': repo_name}
 
-    # Execute with retry logic
-    data = execute_graphql_query(query, variables, token)
-    if data is None:
-        bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}')
-        return {path: None for path in file_paths}
+        data = execute_graphql_query(query, variables, token)
+        if data is None:
+            bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}')
+            for path in batch_paths:
+                results[path] = None
+            continue
 
-    if 'errors' in data:
-        bt.logging.warning(f'GraphQL errors fetching files: {data["errors"]}')
+        if 'errors' in data:
+            bt.logging.warning(f'GraphQL errors fetching files: {data["errors"]}')
 
-    repo_data = data.get('data', {}).get('repository', {})
-    results = {}
+        repo_data = data.get('data', {}).get('repository', {})
 
-    for i, path in enumerate(file_paths):
-        file_data = repo_data.get(f'file{i}')
+        for i, path in enumerate(batch_paths):
+            file_data = repo_data.get(f'file{i}')
 
-        if file_data is None:
-            results[path] = None
-        elif file_data.get('isBinary'):
-            results[path] = None
-        elif file_data.get('byteSize', 0) > MAX_FILE_SIZE_BYTES:
-            results[path] = None
-        else:
-            results[path] = file_data.get('text')
+            if file_data is None:
+                results[path] = None
+            elif file_data.get('isBinary'):
+                results[path] = None
+            elif file_data.get('byteSize', 0) > MAX_FILE_SIZE_BYTES:
+                results[path] = None
+            else:
+                results[path] = file_data.get('text')
 
     return results
 
@@ -1093,52 +1122,31 @@ class FileContentPair:
     new_content: Optional[str]  # None for deleted files
 
 
-def fetch_file_contents_with_base(
+def _fetch_file_contents_with_base_batch(
     repo_owner: str,
     repo_name: str,
     base_sha: str,
     head_sha: str,
     file_changes: List['FileChangeType'],
     token: str,
 ) -> Dict[str, FileContentPair]:
-    """
-    Fetch both base and head (old and new) versions of files in a single GraphQL request.
-
-    Args:
-        repo_owner: Repository owner
-        repo_name: Repository name
-        base_sha: The base branch SHA (before PR changes)
-        head_sha: The head/merge commit SHA (after PR changes)
-        file_changes: List of FileChange objects (needed for status and previous_filename)
-        token: GitHub PAT for authentication
+    """Fetch base and head file contents for a single batch of file changes.
 
-    Returns:
-        Dict mapping file paths to FileContentPair (old_content, new_content)
-        - For new files: old_content is None
-        - For deleted files: new_content is None
-        - For renamed files: old_content fetched from previous_filename
+    Internal helper called by fetch_file_contents_with_base for each batch.
     """
-    if not file_changes:
-        return {}
-
-    # Build GraphQL query with both base and head versions
     file_fields = []
     for i, fc in enumerate(file_changes):
-        # Determine the path to fetch for base version
-        # For renames, use previous_filename; otherwise use current filename
         base_path = fc.previous_filename if fc.previous_filename else fc.filename
         head_path = fc.filename
 
-        # Only fetch base version if file wasn't newly added
         if fc.status != 'added':
-            base_expr = f'{base_sha}:{base_path}'
+            base_expr = _escape_graphql_expression(f'{base_sha}:{base_path}')
             file_fields.append(
                 f'base{i}: object(expression: "{base_expr}") {{ ... on Blob {{ text byteSize isBinary }} }}'
             )
 
-        # Only fetch head version if file wasn't deleted
         if fc.status != 'removed':
-            head_expr = f'{head_sha}:{head_path}'
+            head_expr = _escape_graphql_expression(f'{head_sha}:{head_path}')
             file_fields.append(
                 f'head{i}: object(expression: "{head_expr}") {{ ... on Blob {{ text byteSize isBinary }} }}'
             )
@@ -1156,7 +1164,6 @@ def fetch_file_contents_with_base(
 
     variables = {'owner': repo_owner, 'name': repo_name}
 
-    # Execute with retry logic
     data = execute_graphql_query(query, variables, token)
     if data is None:
         bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}')
@@ -1172,13 +1179,11 @@ def fetch_file_contents_with_base(
         old_content = None
         new_content = None
 
-        # Extract base (old) content if applicable
         if fc.status != 'added':
             base_data = repo_data.get(f'base{i}')
             if base_data and not base_data.get('isBinary') and base_data.get('byteSize', 0) <= MAX_FILE_SIZE_BYTES:
                 old_content = base_data.get('text')
 
-        # Extract head (new) content if applicable
         if fc.status != 'removed':
             head_data = repo_data.get(f'head{i}')
             if head_data and not head_data.get('isBinary') and head_data.get('byteSize', 0) <= MAX_FILE_SIZE_BYTES:
@@ -1187,3 +1192,47 @@ def fetch_file_contents_with_base(
         results[fc.filename] = FileContentPair(old_content=old_content, new_content=new_content)
 
     return results
+
+
+def fetch_file_contents_with_base(
+    repo_owner: str,
+    repo_name: str,
+    base_sha: str,
+    head_sha: str,
+    file_changes: List['FileChangeType'],
+    token: str,
+) -> Dict[str, FileContentPair]:
+    """
+    Fetch both base and head (old and new) versions of files via batched GraphQL requests.
+
+    Large PRs are split into batches to avoid exceeding GitHub's GraphQL query
+    complexity limits. File paths are escaped to prevent query syntax errors
+    from special characters.
+
+    Args:
+        repo_owner: Repository owner
+        repo_name: Repository name
+        base_sha: The base branch SHA (before PR changes)
+        head_sha: The head/merge commit SHA (after PR changes)
+        file_changes: List of FileChange objects (needed for status and previous_filename)
+        token: GitHub PAT for authentication
+
+    Returns:
+        Dict mapping file paths to FileContentPair (old_content, new_content)
+        - For new files: old_content is None
+        - For deleted files: new_content is None
+        - For renamed files: old_content fetched from previous_filename
+    """
+    if not file_changes:
+        return {}
+
+    results: Dict[str, FileContentPair] = {}
+
+    for batch_start in range(0, len(file_changes), _MAX_FILES_PER_GRAPHQL_BATCH):
+        batch = file_changes[batch_start : batch_start + _MAX_FILES_PER_GRAPHQL_BATCH]
+        batch_results = _fetch_file_contents_with_base_batch(
+            repo_owner, repo_name, base_sha, head_sha, batch, token
+        )
+        results.update(batch_results)
+
+    return results
diff --git a/gittensor/validator/forward.py b/gittensor/validator/forward.py
@@ -8,7 +8,13 @@
 import numpy as np
 
 from gittensor.classes import MinerEvaluation
-from gittensor.constants import ISSUES_TREASURY_EMISSION_SHARE, ISSUES_TREASURY_UID, PREDICTIONS_EMISSIONS_SHARE
+from gittensor.constants import (
+    ISSUES_TREASURY_EMISSION_SHARE,
+    ISSUES_TREASURY_UID,
+    PREDICTIONS_EMISSIONS_SHARE,
+    PREDICTIONS_TOP_K,
+    PREDICTIONS_TOP_K_SHARES,
+)
 from gittensor.utils.uids import get_all_uids
 from gittensor.validator.issue_competitions.forward import issue_competitions
 from gittensor.validator.merge_predictions.settlement import merge_predictions
@@ -82,7 +88,11 @@ def build_prediction_ema_rewards(
     miner_uids: set[int],
     miner_evaluations: Dict[int, MinerEvaluation],
 ) -> np.ndarray:
-    """Build rewards array from prediction EMA scores, scaled to PREDICTIONS_EMISSIONS_SHARE.
+    """Build rewards array from prediction EMA scores using top-K winner-takes-most.
+
+    Only the top PREDICTIONS_TOP_K miners by EMA score receive rewards,
+    split according to PREDICTIONS_TOP_K_SHARES (50%/35%/15%).
+    Ties are broken by rounds (more settled issues = higher rank).
 
     Maps github_id-keyed EMAs back to UIDs via miner_evaluations.
     """
@@ -101,6 +111,8 @@ def build_prediction_ema_rewards(
         if evaluation and evaluation.github_id and evaluation.github_id != '0':
             github_id_to_uid[evaluation.github_id] = uid
 
+    # Collect eligible miners: (ema_score, rounds, uid)
+    eligible: list[tuple[float, int, int]] = []
     for mp_record in all_emas:
         github_id = mp_record['github_id']
         ema_score = mp_record['ema_score']
@@ -112,13 +124,27 @@ def build_prediction_ema_rewards(
         if uid is None or uid not in miner_uids:
             continue
 
+        rounds = mp_record.get('rounds', 0) or 0
+        eligible.append((ema_score, rounds, uid))
+
+    if not eligible:
+        return prediction_rewards
+
+    # Rank by EMA descending, then by rounds descending (tiebreaker)
+    eligible.sort(key=lambda x: (x[0], x[1]), reverse=True)
+
+    # Award top-K miners their fixed shares
+    top_k = min(PREDICTIONS_TOP_K, len(eligible))
+    for rank in range(top_k):
+        _, _, uid = eligible[rank]
         idx = sorted_uids.index(uid)
-        prediction_rewards[idx] = ema_score
+        prediction_rewards[idx] = PREDICTIONS_TOP_K_SHARES[rank] * PREDICTIONS_EMISSIONS_SHARE
 
-    # Normalize to sum=1.0, then scale to prediction share
-    total = prediction_rewards.sum()
-    if total > 0:
-        prediction_rewards = (prediction_rewards / total) * PREDICTIONS_EMISSIONS_SHARE
+    top_miners_log = ', '.join(
+        f'UID {uid} (ema={ema:.4f}, rounds={rounds}, share={PREDICTIONS_TOP_K_SHARES[i] * 100:.0f}%)'
+        for i, (ema, rounds, uid) in enumerate(eligible[:top_k])
+    )
+    bt.logging.info(f'Merge prediction top-{top_k} rewards: {top_miners_log}')
 
     return prediction_rewards