Skip to content

Commit

Permalink
Merge pull request #860 from ZeitOnline/WCM-291-index
Browse files Browse the repository at this point in the history
WCM-291: Add indexes so we can sort by timestamp columns
  • Loading branch information
stollero authored Sep 26, 2024
2 parents eab110e + 9146adc commit 68ab16b
Show file tree
Hide file tree
Showing 9 changed files with 151 additions and 148 deletions.
1 change: 1 addition & 0 deletions core/docs/changelog/WCM-291.change
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
WCM-291: Add indexes so we can sort by timestamp columns
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
Create Date: 2024-09-12 10:58:00.181930
"""

from typing import Sequence, Union

from alembic import op
Expand All @@ -25,6 +26,7 @@ def upgrade() -> None:
['channels'],
unique=False,
postgresql_using='gin',
postgresql_ops={'channels': 'jsonb_path_ops'},
postgresql_concurrently=True,
if_not_exists=True,
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""create index for timestamps
Revision ID: a4c1b7f678d9
Revises: 6cc99f5afdc5
Create Date: 2024-09-25 16:30:59.236538
"""

from typing import Sequence, Union

from alembic import op
from sqlalchemy import text as sql


# revision identifiers, used by Alembic.
revision: str = 'a4c1b7f678d9'
down_revision: Union[str, None] = '6cc99f5afdc5'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


COLUMNS = [
'date_last_modified_semantic',
'date_last_published',
'date_last_published_semantic',
'date_first_released',
]


def upgrade() -> None:
with op.get_context().autocommit_block():
for column in COLUMNS:
op.create_index(
f'ix_properties_{column}',
'properties',
[sql(f'{column} desc nulls last')],
postgresql_concurrently=True,
if_not_exists=True,
)


def downgrade() -> None:
with op.get_context().autocommit_block():
for column in COLUMNS:
op.drop_index(
f'ix_properties_{column}',
table_name='properties',
postgresql_concurrently=True,
if_exists=True,
)
154 changes: 57 additions & 97 deletions core/src/zeit/connector/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import hashlib

from sqlalchemy import (
TIMESTAMP,
Boolean,
ForeignKey,
Index,
Expand All @@ -26,121 +25,122 @@
ID_NAMESPACE = zeit.connector.interfaces.ID_NAMESPACE[:-1]


class TIMESTAMP(sqlalchemy.TIMESTAMP):
def __init__(self):
super().__init__(timezone=True)


class Base(sqlalchemy.orm.DeclarativeBase):
pass
@classmethod
def Index(cls, *args, name=None, ops=None, **kw):
if name is not None:
name = f'ix_{cls.__tablename__}_{name}'
if ops:
assert len(args) == 1
kw['postgresql_ops'] = {args[0]: ops}
if ops.startswith('json'):
kw['postgresql_using'] = 'gin'
return Index(name, *args, **kw)


class CommonMetadata:
@staticmethod
def table_args(tablename):
return (Index(f'ix_{tablename}_channels', 'channels', postgresql_using='gin'),)

# converter, use name to lookup IConverter instead of type
channels = mapped_column(
JSONB,
nullable=True,
info={'namespace': 'document', 'name': 'channels'},
)


class SemanticChange:
date_last_modified_semantic = mapped_column(
TIMESTAMP(timezone=True),
TIMESTAMP,
info={'namespace': 'document', 'name': 'last-semantic-change'},
)


class Modified:
date_created = mapped_column(
TIMESTAMP(timezone=True),
nullable=True,
TIMESTAMP,
info={'namespace': 'document', 'name': 'date_created'},
)
date_last_checkout = mapped_column(
TIMESTAMP(timezone=True),
nullable=True,
TIMESTAMP,
info={'namespace': 'document', 'name': 'date_last_checkout'},
)


class PublishInfo:
date_first_released = mapped_column(
TIMESTAMP(timezone=True),
nullable=True,
TIMESTAMP,
info={'namespace': 'document', 'name': 'date_first_released'},
)
date_last_published = mapped_column(
TIMESTAMP(timezone=True),
nullable=True,
TIMESTAMP,
info={'namespace': 'workflow', 'name': 'date_last_published'},
)
date_last_published_semantic = mapped_column(
TIMESTAMP(timezone=True),
nullable=True,
TIMESTAMP,
info={'namespace': 'workflow', 'name': 'date_last_published_semantic'},
)
date_print_published = mapped_column(
TIMESTAMP(timezone=True),
nullable=True,
TIMESTAMP,
info={'namespace': 'document', 'name': 'print-publish'},
)


class DevelopmentCommonMetadata:
access = mapped_column(Unicode, index=True, info={'namespace': 'document', 'name': 'access'})


class ContentBase:
__abstract__ = True
class Content(Base, CommonMetadata, Modified, PublishInfo, SemanticChange):
__tablename__ = 'properties'

@declared_attr.directive
@declared_attr
def __table_args__(cls):
return (
Index(
f'ix_{cls.__tablename__}_parent_path_pattern',
cls.Index('type'),
cls.Index('last_updated'),
cls.Index(
'parent_path',
postgresql_ops={'parent_path': 'varchar_pattern_ops'},
ops='varchar_pattern_ops',
name='parent_path_pattern',
),
Index(f'ix_{cls.__tablename__}_parent_path_name', 'parent_path', 'name', unique=True),
Index(
f'ix_{cls.__tablename__}_unsorted',
'unsorted',
postgresql_using='gin',
postgresql_ops={'unsorted': 'jsonb_path_ops'},
cls.Index(
'parent_path',
'name',
unique=True,
name='parent_path_name',
),
cls.Index('unsorted', ops='jsonb_path_ops'),
cls.Index('channels', ops='jsonb_path_ops'),
) + tuple(
cls.Index(getattr(cls, column).desc().nulls_last())
for column in [
'date_last_modified_semantic',
'date_last_published',
'date_last_published_semantic',
'date_first_released',
]
)

id = mapped_column(Uuid(as_uuid=False), primary_key=True)
type = mapped_column(Unicode, nullable=False, server_default='unknown', index=True)
type = mapped_column(Unicode, nullable=False, server_default='unknown')
is_collection = mapped_column(Boolean, nullable=False, server_default='false')

body = mapped_column(UnicodeText)

unsorted = mapped_column(JSONB)

last_updated = mapped_column(
TIMESTAMP(timezone=True),
server_default=sqlalchemy.func.now(),
onupdate=sqlalchemy.func.now(),
index=True,
TIMESTAMP, server_default=sqlalchemy.func.now(), onupdate=sqlalchemy.func.now()
)

parent_path = mapped_column(Unicode)
name = mapped_column(Unicode)

lock_class = NotImplemented

@declared_attr
def lock(cls):
return relationship(
cls.lock_class,
uselist=False,
lazy='noload',
back_populates='content',
cascade='delete, delete-orphan', # Propagate `del content.lock` to DB
passive_deletes=True, # Disable any heuristics, only ever explicitly delete Locks
)
lock = relationship(
'Lock',
uselist=False,
lazy='noload',
back_populates='content',
cascade='delete, delete-orphan', # Propagate `del content.lock` to DB
passive_deletes=True, # Disable any heuristics, only ever explicitly delete Locks
)

@classmethod
def column_by_name(cls, name, namespace):
Expand Down Expand Up @@ -238,19 +238,14 @@ def _body_checksum(self):
return alg.hexdigest()


class LockBase:
__abstract__ = True
class Lock(Base):
__tablename__ = 'locks'

id = mapped_column(Uuid(as_uuid=False), ForeignKey('properties.id'), primary_key=True)
principal = mapped_column(Unicode, nullable=False)
until = mapped_column(TIMESTAMP(timezone=True), nullable=False)

content_class = NotImplemented
until = mapped_column(TIMESTAMP, nullable=False)

@declared_attr
def content(cls):
return relationship(cls.content_class, uselist=False, lazy='noload', back_populates='lock')
content = relationship('Content', uselist=False, lazy='noload', back_populates='lock')

@property
def token(self):
Expand All @@ -267,38 +262,3 @@ def status(self):
return LockStatus.OWN
else:
return LockStatus.FOREIGN


class Content(Base, ContentBase, CommonMetadata, Modified, PublishInfo, SemanticChange):
lock_class = 'Lock'

@declared_attr.directive
def __table_args__(cls):
"""every new inheritance level needs to re-apply the table_args"""
return super().__table_args__ + CommonMetadata.table_args(cls.__tablename__)


class Lock(Base, LockBase):
content_class = 'Content'


class DevelopmentBase(sqlalchemy.orm.DeclarativeBase):
"""Experimental development features, not ready for any deployment or migration!"""


class DevelopmentContent(
DevelopmentBase,
ContentBase,
CommonMetadata,
Modified,
PublishInfo,
SemanticChange,
DevelopmentCommonMetadata,
):
lock_class = 'LockWithMetadataColumns'


# Having to duplicate all classes (and add indirections to their `relationship()`s)
# is annoying, but there's no obvious way around it.
class LockWithMetadataColumns(DevelopmentBase, LockBase):
content_class = 'DevelopmentContent'
Loading

0 comments on commit 68ab16b

Please sign in to comment.