Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions gittensor/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@
# Merge Predictions
# =============================================================================
PREDICTIONS_EMISSIONS_SHARE = 0.15 # % of emissions allocated to prediction competition
PREDICTIONS_TOP_K = 3 # only top-K miners by EMA receive prediction rewards
PREDICTIONS_TOP_K_SHARES = [0.50, 0.35, 0.15] # fixed reward split for top-K miners (must sum to 1.0)

PREDICTIONS_EMA_BETA = 0.1 # EMA decay rate for predictions record
PREDICTIONS_CORRECTNESS_EXPONENT = 3 # exponent on correctness to harshly punish incorrect predictions
Expand Down
177 changes: 113 additions & 64 deletions gittensor/utils/github_api_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,28 @@ def check_github_issue_closed(repo: str, issue_number: int, token: str) -> Optio
return None


def _escape_graphql_expression(expression: str) -> str:
"""Escape special characters in a GraphQL string literal.

File paths containing backslashes or double quotes break GraphQL query
syntax when interpolated directly. This escapes them so the query remains
valid.

Args:
expression: Raw string to embed inside a GraphQL double-quoted literal.

Returns:
Escaped string safe for embedding in GraphQL queries.
"""
return expression.replace('\\', '\\\\').replace('"', '\\"')


# Maximum files per GraphQL batch request. GitHub's GraphQL API has query
# complexity limits; batching too many object lookups in a single request can
# cause a 502/complexity error and lose all results.
_MAX_FILES_PER_GRAPHQL_BATCH = 50


def fetch_file_contents_batch(
repo_owner: str,
repo_name: str,
Expand All @@ -1023,9 +1045,10 @@ def fetch_file_contents_batch(
token: str,
) -> Dict[str, Optional[str]]:
"""
Fetch multiple file contents from a repository in a single GraphQL request.
Fetch multiple file contents from a repository in batched GraphQL requests.

Uses retry logic with exponential backoff for reliability.
Uses retry logic with exponential backoff for reliability. Batches files
to avoid exceeding GitHub's GraphQL complexity limits.

Args:
repo_owner: Repository owner
Expand All @@ -1040,47 +1063,53 @@ def fetch_file_contents_batch(
if not file_paths:
return {}

# Build GraphQL query with aliased file fields
file_fields = []
for i, path in enumerate(file_paths):
expression = f'{head_sha}:{path}'
file_fields.append(
f'file{i}: object(expression: "{expression}") {{ ... on Blob {{ text byteSize isBinary }} }}'
)
results: Dict[str, Optional[str]] = {}

query = f"""
query($owner: String!, $name: String!) {{
repository(owner: $owner, name: $name) {{
{' '.join(file_fields)}
# Process files in batches to avoid exceeding GraphQL complexity limits
for batch_start in range(0, len(file_paths), _MAX_FILES_PER_GRAPHQL_BATCH):
batch_paths = file_paths[batch_start : batch_start + _MAX_FILES_PER_GRAPHQL_BATCH]

# Build GraphQL query with aliased file fields
file_fields = []
for i, path in enumerate(batch_paths):
expression = _escape_graphql_expression(f'{head_sha}:{path}')
file_fields.append(
f'file{i}: object(expression: "{expression}") {{ ... on Blob {{ text byteSize isBinary }} }}'
)

query = f"""
query($owner: String!, $name: String!) {{
repository(owner: $owner, name: $name) {{
{' '.join(file_fields)}
}}
}}
}}
"""
"""

variables = {'owner': repo_owner, 'name': repo_name}
variables = {'owner': repo_owner, 'name': repo_name}

# Execute with retry logic
data = execute_graphql_query(query, variables, token)
if data is None:
bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}')
return {path: None for path in file_paths}
data = execute_graphql_query(query, variables, token)
if data is None:
bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}')
for path in batch_paths:
results[path] = None
continue

if 'errors' in data:
bt.logging.warning(f'GraphQL errors fetching files: {data["errors"]}')
if 'errors' in data:
bt.logging.warning(f'GraphQL errors fetching files: {data["errors"]}')

repo_data = data.get('data', {}).get('repository', {})
results = {}
repo_data = data.get('data', {}).get('repository', {})

for i, path in enumerate(file_paths):
file_data = repo_data.get(f'file{i}')
for i, path in enumerate(batch_paths):
file_data = repo_data.get(f'file{i}')

if file_data is None:
results[path] = None
elif file_data.get('isBinary'):
results[path] = None
elif file_data.get('byteSize', 0) > MAX_FILE_SIZE_BYTES:
results[path] = None
else:
results[path] = file_data.get('text')
if file_data is None:
results[path] = None
elif file_data.get('isBinary'):
results[path] = None
elif file_data.get('byteSize', 0) > MAX_FILE_SIZE_BYTES:
results[path] = None
else:
results[path] = file_data.get('text')

return results

Expand All @@ -1093,52 +1122,31 @@ class FileContentPair:
new_content: Optional[str] # None for deleted files


def fetch_file_contents_with_base(
def _fetch_file_contents_with_base_batch(
repo_owner: str,
repo_name: str,
base_sha: str,
head_sha: str,
file_changes: List['FileChangeType'],
token: str,
) -> Dict[str, FileContentPair]:
"""
Fetch both base and head (old and new) versions of files in a single GraphQL request.

Args:
repo_owner: Repository owner
repo_name: Repository name
base_sha: The base branch SHA (before PR changes)
head_sha: The head/merge commit SHA (after PR changes)
file_changes: List of FileChange objects (needed for status and previous_filename)
token: GitHub PAT for authentication
"""Fetch base and head file contents for a single batch of file changes.

Returns:
Dict mapping file paths to FileContentPair (old_content, new_content)
- For new files: old_content is None
- For deleted files: new_content is None
- For renamed files: old_content fetched from previous_filename
Internal helper called by fetch_file_contents_with_base for each batch.
"""
if not file_changes:
return {}

# Build GraphQL query with both base and head versions
file_fields = []
for i, fc in enumerate(file_changes):
# Determine the path to fetch for base version
# For renames, use previous_filename; otherwise use current filename
base_path = fc.previous_filename if fc.previous_filename else fc.filename
head_path = fc.filename

# Only fetch base version if file wasn't newly added
if fc.status != 'added':
base_expr = f'{base_sha}:{base_path}'
base_expr = _escape_graphql_expression(f'{base_sha}:{base_path}')
file_fields.append(
f'base{i}: object(expression: "{base_expr}") {{ ... on Blob {{ text byteSize isBinary }} }}'
)

# Only fetch head version if file wasn't deleted
if fc.status != 'removed':
head_expr = f'{head_sha}:{head_path}'
head_expr = _escape_graphql_expression(f'{head_sha}:{head_path}')
file_fields.append(
f'head{i}: object(expression: "{head_expr}") {{ ... on Blob {{ text byteSize isBinary }} }}'
)
Expand All @@ -1156,7 +1164,6 @@ def fetch_file_contents_with_base(

variables = {'owner': repo_owner, 'name': repo_name}

# Execute with retry logic
data = execute_graphql_query(query, variables, token)
if data is None:
bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}')
Expand All @@ -1172,13 +1179,11 @@ def fetch_file_contents_with_base(
old_content = None
new_content = None

# Extract base (old) content if applicable
if fc.status != 'added':
base_data = repo_data.get(f'base{i}')
if base_data and not base_data.get('isBinary') and base_data.get('byteSize', 0) <= MAX_FILE_SIZE_BYTES:
old_content = base_data.get('text')

# Extract head (new) content if applicable
if fc.status != 'removed':
head_data = repo_data.get(f'head{i}')
if head_data and not head_data.get('isBinary') and head_data.get('byteSize', 0) <= MAX_FILE_SIZE_BYTES:
Expand All @@ -1187,3 +1192,47 @@ def fetch_file_contents_with_base(
results[fc.filename] = FileContentPair(old_content=old_content, new_content=new_content)

return results


def fetch_file_contents_with_base(
repo_owner: str,
repo_name: str,
base_sha: str,
head_sha: str,
file_changes: List['FileChangeType'],
token: str,
) -> Dict[str, FileContentPair]:
"""
Fetch both base and head (old and new) versions of files via batched GraphQL requests.

Large PRs are split into batches to avoid exceeding GitHub's GraphQL query
complexity limits. File paths are escaped to prevent query syntax errors
from special characters.

Args:
repo_owner: Repository owner
repo_name: Repository name
base_sha: The base branch SHA (before PR changes)
head_sha: The head/merge commit SHA (after PR changes)
file_changes: List of FileChange objects (needed for status and previous_filename)
token: GitHub PAT for authentication

Returns:
Dict mapping file paths to FileContentPair (old_content, new_content)
- For new files: old_content is None
- For deleted files: new_content is None
- For renamed files: old_content fetched from previous_filename
"""
if not file_changes:
return {}

results: Dict[str, FileContentPair] = {}

for batch_start in range(0, len(file_changes), _MAX_FILES_PER_GRAPHQL_BATCH):
batch = file_changes[batch_start : batch_start + _MAX_FILES_PER_GRAPHQL_BATCH]
batch_results = _fetch_file_contents_with_base_batch(
repo_owner, repo_name, base_sha, head_sha, batch, token
)
results.update(batch_results)

return results
40 changes: 33 additions & 7 deletions gittensor/validator/forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@
import numpy as np

from gittensor.classes import MinerEvaluation
from gittensor.constants import ISSUES_TREASURY_EMISSION_SHARE, ISSUES_TREASURY_UID, PREDICTIONS_EMISSIONS_SHARE
from gittensor.constants import (
ISSUES_TREASURY_EMISSION_SHARE,
ISSUES_TREASURY_UID,
PREDICTIONS_EMISSIONS_SHARE,
PREDICTIONS_TOP_K,
PREDICTIONS_TOP_K_SHARES,
)
from gittensor.utils.uids import get_all_uids
from gittensor.validator.issue_competitions.forward import issue_competitions
from gittensor.validator.merge_predictions.settlement import merge_predictions
Expand Down Expand Up @@ -82,7 +88,11 @@ def build_prediction_ema_rewards(
miner_uids: set[int],
miner_evaluations: Dict[int, MinerEvaluation],
) -> np.ndarray:
"""Build rewards array from prediction EMA scores, scaled to PREDICTIONS_EMISSIONS_SHARE.
"""Build rewards array from prediction EMA scores using top-K winner-takes-most.

Only the top PREDICTIONS_TOP_K miners by EMA score receive rewards,
split according to PREDICTIONS_TOP_K_SHARES (50%/35%/15%).
Ties are broken by rounds (more settled issues = higher rank).

Maps github_id-keyed EMAs back to UIDs via miner_evaluations.
"""
Expand All @@ -101,6 +111,8 @@ def build_prediction_ema_rewards(
if evaluation and evaluation.github_id and evaluation.github_id != '0':
github_id_to_uid[evaluation.github_id] = uid

# Collect eligible miners: (ema_score, rounds, uid)
eligible: list[tuple[float, int, int]] = []
for mp_record in all_emas:
github_id = mp_record['github_id']
ema_score = mp_record['ema_score']
Expand All @@ -112,13 +124,27 @@ def build_prediction_ema_rewards(
if uid is None or uid not in miner_uids:
continue

rounds = mp_record.get('rounds', 0) or 0
eligible.append((ema_score, rounds, uid))

if not eligible:
return prediction_rewards

# Rank by EMA descending, then by rounds descending (tiebreaker)
eligible.sort(key=lambda x: (x[0], x[1]), reverse=True)

# Award top-K miners their fixed shares
top_k = min(PREDICTIONS_TOP_K, len(eligible))
for rank in range(top_k):
_, _, uid = eligible[rank]
idx = sorted_uids.index(uid)
prediction_rewards[idx] = ema_score
prediction_rewards[idx] = PREDICTIONS_TOP_K_SHARES[rank] * PREDICTIONS_EMISSIONS_SHARE

# Normalize to sum=1.0, then scale to prediction share
total = prediction_rewards.sum()
if total > 0:
prediction_rewards = (prediction_rewards / total) * PREDICTIONS_EMISSIONS_SHARE
top_miners_log = ', '.join(
f'UID {uid} (ema={ema:.4f}, rounds={rounds}, share={PREDICTIONS_TOP_K_SHARES[i] * 100:.0f}%)'
for i, (ema, rounds, uid) in enumerate(eligible[:top_k])
)
bt.logging.info(f'Merge prediction top-{top_k} rewards: {top_miners_log}')

return prediction_rewards

Expand Down
Loading