Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,17 @@ Each annotation contains the test class, test name, and failure message.

## Pull Request Workflow

PR title requires a JIRA ticket ID (e.g., `[SPARK-xxxx][SQL] Title`). Ask the user to create a new ticket or provide an existing one if not given. Before writing the PR description, read `.github/PULL_REQUEST_TEMPLATE` and fill in every section from that file.
PR title format is `[SPARK-xxxx][Component] Title`. Infer the PR title from the changes. If no ticket ID is given, create one using `dev/create_spark_jira.py`, using the PR title (without the JIRA ID and component tag) as the ticket title.

python3 dev/create_spark_jira.py "<title>" -c <component> { -t <type> | -p <parent-jira-id> }

- **Component** (`-c`): e.g. "SQL", "Spark Core", "PySpark", "Connect". Run `python3 dev/create_spark_jira.py --list-components` for the full list.
- **Issue type** (`-t`): "Bug", "Improvement", "New Feature", "Test", "Documentation", or "Dependency upgrade".
- **Parent** (`-p`): if the user mentions a parent JIRA ticket (e.g., "this is a subtask of SPARK-12345"), pass it instead of `-t`. The issue type is automatically "Sub-task".

The script sets the latest unreleased version as the default affected version. Ask the user to review and adjust versions and other fields on the JIRA ticket after creation.

Before writing the PR description, read `.github/PULL_REQUEST_TEMPLATE` and fill in every section from that file.

DO NOT push to the upstream repo. Always push to the personal fork. Open PRs against `master` on the upstream repo.

Expand Down
173 changes: 71 additions & 102 deletions dev/create_spark_jira.py
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can add back the old script if people still need it, but with a different name, as create_spark_jira.py should only create ticket.

Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import argparse
import os
import re
import subprocess
import sys
import traceback

Expand All @@ -41,24 +40,52 @@ def fail(msg):
sys.exit(-1)


def run_cmd(cmd):
print(cmd)
if isinstance(cmd, list):
return subprocess.check_output(cmd).decode("utf-8")
else:
return subprocess.check_output(cmd.split(" ")).decode("utf-8")
def get_jira_client():
return jira.client.JIRA(
{"server": JIRA_API_BASE}, token_auth=JIRA_ACCESS_TOKEN, timeout=(3.05, 30)
)


def create_jira_issue(title, parent_jira_id=None, issue_type=None, version=None, component=None):
asf_jira = jira.client.JIRA(
{"server": JIRA_API_BASE}, token_auth=JIRA_ACCESS_TOKEN, timeout=(3.05, 30)
def list_components(asf_jira):
components = asf_jira.project_components("SPARK")
components = [c for c in components if not c.raw.get("archived", False)]
for c in sorted(components, key=lambda x: x.name):
print(c.name)


def main():
parser = argparse.ArgumentParser(description="Create a Spark JIRA issue.")
parser.add_argument("title", nargs="?", help="Title of the JIRA issue")
parser.add_argument(
"-p",
"--parent",
help="Parent JIRA ID to create a subtask (e.g. SPARK-12345).",
)
parser.add_argument(
"-t",
"--type",
help="Issue type (e.g. Bug, Improvement)",
)
parser.add_argument("-c", "--component", help="Component for the issue")
parser.add_argument(
"--list-components", action="store_true", help="List available components and exit"
)
args = parser.parse_args()

if version:
affected_version = version
else:
def check_jira_access():
errors = []
if not JIRA_IMPORTED:
errors.append("jira-python library not installed, run 'pip install jira'")
if not JIRA_ACCESS_TOKEN:
errors.append("JIRA_ACCESS_TOKEN env-var not set")
if errors:
fail("Cannot create JIRA ticket automatically (%s). "
"Please create the ticket manually at %s"
% ("; ".join(errors), JIRA_API_BASE))
return get_jira_client()

def detect_affected_version(asf_jira):
versions = asf_jira.project_versions("SPARK")
# Consider only x.y.z, unreleased, unarchived versions
versions = [
x
for x in versions
Expand All @@ -67,111 +94,53 @@ def create_jira_issue(title, parent_jira_id=None, issue_type=None, version=None,
and re.match(r"\d+\.\d+\.\d+", x.name)
]
versions = sorted(versions, key=lambda x: x.name, reverse=True)
affected_version = versions[0].name
if not versions:
fail("Cannot detect affected version. "
"Please create the ticket manually at %s" % JIRA_API_BASE)
return versions[0].name

if args.list_components:
asf_jira = check_jira_access()
list_components(asf_jira)
return

if not args.title:
parser.error("the following arguments are required: title")

if not args.component:
parser.error("-c/--component is required")

if args.parent and args.type:
parser.error("--parent and --type cannot be used together")

if not args.parent and not args.type:
parser.error("-t/--type is required when not creating a subtask")

asf_jira = check_jira_access()
affected_version = detect_affected_version(asf_jira)

issue_dict = {
"project": {"key": "SPARK"},
"summary": title,
"summary": args.title,
"description": "",
"versions": [{"name": affected_version}],
}

if component:
issue_dict["components"] = [{"name": component}]
issue_dict["components"] = [{"name": args.component}]

if parent_jira_id:
if args.parent:
issue_dict["issuetype"] = {"name": "Sub-task"}
issue_dict["parent"] = {"key": parent_jira_id}
issue_dict["parent"] = {"key": args.parent}
else:
issue_dict["issuetype"] = {"name": issue_type if issue_type else "Improvement"}
issue_dict["issuetype"] = {"name": args.type}

try:
new_issue = asf_jira.create_issue(fields=issue_dict)
return new_issue.key
print(new_issue.key)
except Exception as e:
fail("Failed to create JIRA issue: %s" % e)


def create_and_checkout_branch(jira_id):
try:
run_cmd("git checkout -b %s" % jira_id)
print("Created and checked out branch: %s" % jira_id)
except subprocess.CalledProcessError as e:
fail("Failed to create branch %s: %s" % (jira_id, e))


def create_commit(jira_id, title):
try:
run_cmd(["git", "commit", "-a", "-m", "[%s] %s" % (jira_id, title)])
print("Created a commit with message: [%s] %s" % (jira_id, title))
except subprocess.CalledProcessError as e:
fail("Failed to create commit: %s" % e)


def choose_components():
asf_jira = jira.client.JIRA(
{"server": JIRA_API_BASE}, token_auth=JIRA_ACCESS_TOKEN, timeout=(3.05, 30)
)
components = asf_jira.project_components("SPARK")
components = [c for c in components if not c.raw.get("archived", False)]
for i, c in enumerate(components):
print("%d. %s" % (i + 1, c.name))

while True:
try:
choice = input("Please choose a component by number: ")
idx = int(choice) - 1
if 0 <= idx < len(components):
return components[idx].name
else:
print("Invalid number. Please try again.")
except ValueError:
print("Invalid input. Please enter a number.")


def main():
if not JIRA_IMPORTED:
fail("Could not find jira-python library. Run 'sudo pip3 install jira' to install.")

if not JIRA_ACCESS_TOKEN:
fail("The env-var JIRA_ACCESS_TOKEN is not set.")

parser = argparse.ArgumentParser(description="Create a Spark JIRA issue.")
parser.add_argument("title", nargs="?", help="Title of the JIRA issue")
parser.add_argument("-p", "--parent", help="Parent JIRA ID for subtasks")
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm alway using this parent JIRA ID feature. Please recover this, @cloud-fan .

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

parser.add_argument(
"-t",
"--type",
help="Issue type to create when no parent is specified (e.g. Bug). Defaults to Improvement.",
)
parser.add_argument("-v", "--version", help="Version to use for the issue")
parser.add_argument("-c", "--component", help="Component for the issue")
args = parser.parse_args()

if args.parent:
asf_jira = jira.client.JIRA(
{"server": JIRA_API_BASE}, token_auth=JIRA_ACCESS_TOKEN, timeout=(3.05, 30)
)
parent_issue = asf_jira.issue(args.parent)
print("Parent issue title: %s" % parent_issue.fields.summary)
print("Creating a subtask of %s with title: %s" % (args.parent, args.title))
else:
print("Creating JIRA issue with title: %s" % args.title)

if not args.title:
parser.error("the following arguments are required: title")

if not args.component:
args.component = choose_components()

jira_id = create_jira_issue(args.title, args.parent, args.type, args.version, args.component)
print("Created JIRA issue: %s" % jira_id)

create_and_checkout_branch(jira_id)

create_commit(jira_id, args.title)


if __name__ == "__main__":
try:
main()
Expand Down