Skip to content

feat(migrations): Add identifier length validation #93413

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from django.db import migrations, models

from sentry.new_migrations.migrations import CheckedMigration


class Migration(CheckedMigration):

initial = True

dependencies = []

operations = [
migrations.CreateModel(
name="TestTable",
fields=[
(
"id",
models.AutoField(
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
),
),
],
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from django.db import migrations, models

from sentry.new_migrations.migrations import CheckedMigration


class Migration(CheckedMigration):

dependencies = [
("bad_flow_long_identifier_app", "0001_initial"),
]

operations = [
migrations.AddField(
model_name="testtable",
name="this_is_a_very_long_field_name_that_exceeds_the_postgresql_limit_of_sixty_three_bytes_and_should_fail",
field=models.IntegerField(null=True),
),
]
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from django.db import migrations, models

from sentry.new_migrations.migrations import CheckedMigration


class Migration(CheckedMigration):

initial = True

dependencies = []

operations = [
migrations.CreateModel(
name="TestTable",
fields=[
(
"id",
models.AutoField(
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
),
),
("name", models.CharField(max_length=100)),
],
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from django.db import migrations, models

from sentry.new_migrations.migrations import CheckedMigration


class Migration(CheckedMigration):

dependencies = [
("bad_flow_long_index_app", "0001_initial"),
]

operations = [
migrations.AddIndex(
model_name="testtable",
index=models.Index(
fields=["name"],
name="this_is_a_very_long_index_name_that_exceeds_the_postgresql_limit_of_sixty_three_bytes_and_should_fail",
),
),
]
18 changes: 2 additions & 16 deletions src/sentry/new_migrations/migrations.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from django.db.migrations import Migration, RunSQL, SeparateDatabaseAndState
from django_zero_downtime_migrations.backends.postgres.schema import UnsafeOperationException
from django.db.migrations import Migration

from sentry.new_migrations.monkey.special import SafeRunSQL
from sentry.new_migrations.validators import validate_operation


class CheckedMigration(Migration):
Expand All @@ -26,16 +25,3 @@ def apply(self, project_state, schema_editor, collect_sql=False):
validate_operation(op)

return super().apply(project_state, schema_editor, collect_sql)


def validate_operation(op):
if isinstance(op, RunSQL) and not isinstance(op, SafeRunSQL):
raise UnsafeOperationException(
"Using `RunSQL` is unsafe because our migrations safety framework can't detect problems with the "
"migration, and doesn't apply timeout and statement locks. Use `SafeRunSQL` instead, and get "
"approval from `owners-migrations` to make sure that it's safe."
)

if isinstance(op, SeparateDatabaseAndState):
for db_op in op.database_operations:
validate_operation(db_op)
116 changes: 116 additions & 0 deletions src/sentry/new_migrations/validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from __future__ import annotations

from django.db import migrations
from django.db.migrations import RunSQL, SeparateDatabaseAndState
from django_zero_downtime_migrations.backends.postgres.schema import UnsafeOperationException

from sentry.new_migrations.monkey.special import SafeRunSQL


def validate_operation(op):
if isinstance(op, RunSQL) and not isinstance(op, SafeRunSQL):
raise UnsafeOperationException(
"Using `RunSQL` is unsafe because our migrations safety framework can't detect problems with the "
"migration, and doesn't apply timeout and statement locks. Use `SafeRunSQL` instead, and get "
"approval from `owners-migrations` to make sure that it's safe."
)

if isinstance(op, SeparateDatabaseAndState):
for db_op in op.database_operations:
validate_operation(db_op)

# Check identifier lengths for various operation types
_validate_operation_identifiers(op)


MAX_IDENTIFIER_LENGTH = 63 # max identifier length in postgres in bytes


def _validate_identifier_length(identifier):
"""
Validate that PostgreSQL identifiers don't exceed 63 bytes.

Args:
identifier: The identifier to check
"""
if identifier and len(identifier.encode("utf-8")) > MAX_IDENTIFIER_LENGTH:
raise UnsafeOperationException(
f"PostgreSQL identifier '{identifier}' is {len(identifier.encode('utf-8'))} bytes long, "
f"which exceeds the {MAX_IDENTIFIER_LENGTH}-byte limit for identifiers in Postgres (e.g. table names, column names, index names)."
)


def _validate_create_model_identifier_length(op):
_validate_identifier_length(op.name)
# check field names
for field_name, field in op.fields:
_validate_identifier_length(field_name)
if hasattr(field, "db_column"):
_validate_identifier_length(field.db_column)
# check indexes
if hasattr(op, "options") and op.options:
indexes = op.options.get("indexes", [])
for index in indexes:
if hasattr(index, "name") and index.name:
_validate_identifier_length(index.name)


def _validate_add_field_identifier_length(op):
"""Validate identifier length for AddField operation."""
_validate_identifier_length(op.name)
# Check db_column if specified
if hasattr(op.field, "db_column") and op.field.db_column:
_validate_identifier_length(op.field.db_column)


def _validate_rename_field_identifier_length(op):
"""Validate identifier length for RenameField operation."""
_validate_identifier_length(op.new_name)


def _validate_rename_model_identifier_length(op):
"""Validate identifier length for RenameModel operation."""
# Model names become table names (usually prefixed with app name)
_validate_identifier_length(op.new_name)


def _validate_add_index_identifier_length(op):
"""Validate identifier length for AddIndex operation."""
if hasattr(op.index, "name") and op.index.name:
_validate_identifier_length(op.index.name)


def _validate_remove_index_identifier_length(op):
"""Validate identifier length for RemoveIndex operation."""
if hasattr(op, "name") and op.name:
_validate_identifier_length(op.name)


def _validate_add_constraint_identifier_length(op):
"""Validate identifier length for AddConstraint operation."""
if hasattr(op.constraint, "name") and op.constraint.name:
_validate_identifier_length(op.constraint.name)


def _validate_remove_constraint_identifier_length(op):
"""Validate identifier length for RemoveConstraint operation."""
if hasattr(op, "name") and op.name:
_validate_identifier_length(op.name)


OPERATION_VALIDATOR = {
migrations.CreateModel: _validate_create_model_identifier_length,
migrations.AddField: _validate_add_field_identifier_length,
migrations.RenameField: _validate_rename_field_identifier_length,
migrations.RenameModel: _validate_rename_model_identifier_length,
migrations.AddIndex: _validate_add_index_identifier_length,
migrations.RemoveIndex: _validate_remove_index_identifier_length,
migrations.AddConstraint: _validate_add_constraint_identifier_length,
migrations.RemoveConstraint: _validate_remove_constraint_identifier_length,
}


def _validate_operation_identifiers(op):
validator = OPERATION_VALIDATOR.get(type(op))
if validator:
validator(op)
Original file line number Diff line number Diff line change
Expand Up @@ -483,3 +483,29 @@ def test(self):
"SET statement_timeout TO '0ms';",
"SET lock_timeout TO '0ms';",
]


class LongIdentifierFieldTest(BaseSafeMigrationTest):
app = "bad_flow_long_identifier_app"
migrate_from = "0001_initial"
migrate_to = "0002_long_field_name"

def test(self):
with pytest.raises(
UnsafeOperationException,
match="PostgreSQL identifier .* is .* bytes long, which exceeds the 63-byte limit for identifiers in Postgres \\(e\\.g\\. table names, column names, index names\\)\\.",
):
self.run_migration()


class LongIdentifierIndexTest(BaseSafeMigrationTest):
app = "bad_flow_long_index_app"
migrate_from = "0001_initial"
migrate_to = "0002_long_index_name"

def test(self):
with pytest.raises(
UnsafeOperationException,
match="PostgreSQL identifier .* is .* bytes long, which exceeds the 63-byte limit for identifiers in Postgres \\(e\\.g\\. table names, column names, index names\\)\\.",
):
self.run_migration()
Loading
Loading