diff --git a/cms/djangoapps/modulestore_migrator/tasks.py b/cms/djangoapps/modulestore_migrator/tasks.py index c65c8af0c59c..3426302c3c10 100644 --- a/cms/djangoapps/modulestore_migrator/tasks.py +++ b/cms/djangoapps/modulestore_migrator/tasks.py @@ -7,7 +7,7 @@ import os import typing as t from dataclasses import dataclass -from datetime import datetime, timezone +from datetime import UTC, datetime, timezone from enum import Enum from gettext import ngettext @@ -341,6 +341,7 @@ def _import_structure( openedx_content equivalent. """ migration = source_data.migration + migration_start_time = datetime.now(UTC) migration_context = _MigrationContext( used_component_keys=set( LibraryUsageLocatorV2(target_library.key, block_type, block_id) # type: ignore[abstract] @@ -367,14 +368,35 @@ def _import_structure( repeat_handling_strategy=RepeatHandlingStrategy(migration.repeat_handling_strategy), preserve_url_slugs=migration.preserve_url_slugs, created_by=status.user_id, - created_at=datetime.now(timezone.utc), # noqa: UP017 + created_at=migration_start_time, ) - with content_api.bulk_draft_changes_for(migration.target.id) as change_log: + with content_api.bulk_draft_changes_for( + learning_package_id=migration.target.id, + changed_by=migration_context.created_by, + changed_at=migration_start_time, + ) as change_log: root_migrated_node = _migrate_node( context=migration_context, source_node=root_node, ) - change_log.save() + # Publishing is not allowed inside bulk_draft_changes_for(), so publish + # everything that was modified now that the context has exited. We use the + # change log to identify which drafts to publish. If the context produced + # no records, it deletes the change log on exit (clearing its PK), in which + # case there's nothing to publish and we return None so callers don't try + # to associate the deleted change log with the migration. + if not change_log.pk: + return None, root_migrated_node + if change_log.records.exists(): + drafts_to_publish = content_api.get_all_drafts(migration.target.id).filter( + entity_id__in=change_log.records.values_list("entity_id", flat=True), + ) + content_api.publish_from_drafts( + migration.target.id, + draft_qset=drafts_to_publish, + published_by=migration_context.created_by, + # published_at will be later than 'migration_start_time' as _migrate_node() may have taken quite a while. + ) return change_log, root_migrated_node @@ -408,7 +430,7 @@ def _populate_collection(user_id: int, migration: models.ModulestoreMigration) - ) if block_target_pks: content_api.add_to_collection( - learning_package_id=migration.target.pk, + learning_package_id=migration.target.id, collection_code=migration.target_collection.collection_code, entities_qset=PublishableEntity.objects.filter(id__in=block_target_pks), created_by=user_id, @@ -421,6 +443,7 @@ def _create_collection( library_key: LibraryLocatorV2, title: str, course_name: str | None = None, + created_by: int | None = None, ) -> Collection: """ Creates a collection in the given library @@ -446,6 +469,7 @@ def _create_collection( collection_key=modified_key, title=f"{title}{f'_{attempt}' if attempt > 0 else ''}", description=description, + created_by=created_by, ) except libraries_api.LibraryCollectionAlreadyExists: attempt += 1 @@ -705,6 +729,7 @@ def bulk_migrate_from_modulestore( library_key=target_library_locator, title=legacy_root_list[i].display_name, course_name=legacy_root_list[i].display_name if source_data.source.key.is_course else None, + created_by=user_id, ) ) _populate_collection(user_id, migration) @@ -881,6 +906,7 @@ def _migrate_container( user_id=context.created_by, ) if container_exists and context.should_skip_strategy: + assert container.draft_version_num is not None # We know it exists, this is just for mypy return PublishableEntityVersion.objects.get( entity_id=container.container_id, version_num=container.draft_version_num, @@ -894,11 +920,7 @@ def _migrate_container( created_by=context.created_by, ).publishable_entity_version - # Publish the container - libraries_api.publish_container_changes( - container.container_key, - context.created_by, - ) + # Note: Publishing happens after bulk_draft_changes_for exits, in _import_structure. context.used_container_slugs.add(container.container_key.container_id) return container_publishable_entity_version, None @@ -969,11 +991,10 @@ def _migrate_component( # Create the new component version for it component_version = libraries_api.set_library_block_olx( - target_key, new_olx_str=olx, paths_to_media=paths_to_media_ids, + target_key, new_olx_str=olx, paths_to_media=paths_to_media_ids, created_by=context.created_by, ) - # Publish the component - libraries_api.publish_component_changes(target_key, context.created_by) + # Note: Publishing happens after bulk_draft_changes_for exits, in _import_structure. context.used_component_keys.add(target_key) return component_version.publishable_entity_version, None diff --git a/cms/djangoapps/modulestore_migrator/tests/test_tasks.py b/cms/djangoapps/modulestore_migrator/tests/test_tasks.py index ae4ad1548937..2b583265feb5 100644 --- a/cms/djangoapps/modulestore_migrator/tests/test_tasks.py +++ b/cms/djangoapps/modulestore_migrator/tests/test_tasks.py @@ -370,8 +370,9 @@ def test_migrate_component_success(self): "problem", result.componentversion.component.component_type.name ) - # The component is published - self.assertFalse(result.componentversion.component.versioning.has_unpublished_changes) # noqa: PT009 + # The component is left as a draft; publishing is the caller's responsibility + # (handled in _import_structure after bulk_draft_changes_for exits). + self.assertTrue(result.componentversion.component.versioning.has_unpublished_changes) # noqa: PT009 def test_migrate_component_failure(self): """ @@ -802,8 +803,9 @@ def test_migrate_container_different_container_types(self): container_version = result.containerversion self.assertEqual(container_version.title, f"Test {block_type.title()}") # noqa: PT009 - # The container is published - self.assertFalse(content_api.contains_unpublished_changes(container_version.container.pk)) # noqa: PT009 # pylint: disable=line-too-long + # The container is left as a draft; publishing is the caller's + # responsibility (handled in _import_structure after bulk_draft_changes_for exits). + self.assertTrue(content_api.contains_unpublished_changes(container_version.container.pk)) # noqa: PT009 # pylint: disable=line-too-long def test_migrate_container_same_title(self): """ diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py index 371e830260f0..4bf18b466acf 100644 --- a/openedx/core/djangoapps/content/search/api.py +++ b/openedx/core/djangoapps/content/search/api.py @@ -152,11 +152,21 @@ def _wait_for_meili_task(info: TaskInfo) -> None: Simple helper method to wait for a Meilisearch task to complete This method will block until the task is completed, so it should only be used in celery tasks or management commands. + + ✨ Note: "Meilisearch processes tasks in the order they were added to the queue." + per https://www.meilisearch.com/docs/capabilities/indexing/tasks_and_batches/monitor_tasks#monitoring-task-status + so if you need to wait for multiple tasks, simply wait for the final (last) task. """ client = _get_meilisearch_client() + # This function almost always gets called immediately after enqueing a task, and from experiments, an initial wait + # of at least 15ms is warranted, as the task is almost never done in less than 10ms. We are using 20ms which seems + # to work well without requiring an additional wait in most cases. + sleep_delay = 0.020 # Initial wait is only 20ms but we will back off exponentially + time.sleep(sleep_delay) current_status = client.get_task(info.task_uid) while current_status.status in ("enqueued", "processing"): - time.sleep(0.5) + time.sleep(sleep_delay) + sleep_delay = min(sleep_delay * 1.5, 2.0) # Increase delay up to 2s current_status = client.get_task(info.task_uid) if current_status.status != "succeeded": try: @@ -166,15 +176,6 @@ def _wait_for_meili_task(info: TaskInfo) -> None: raise MeilisearchError(err_reason) -def _wait_for_meili_tasks(info_list: list[TaskInfo]) -> None: - """ - Simple helper method to wait for multiple Meilisearch tasks to complete - """ - while info_list: - info = info_list.pop() - _wait_for_meili_task(info) - - def _index_exists(index_name: str) -> bool: """ Check if an index exists @@ -324,13 +325,10 @@ def _update_index_docs(docs) -> None: client = _get_meilisearch_client() current_rebuild_index_name = _get_running_rebuild_index_name() - tasks = [] if current_rebuild_index_name: # If there is a rebuild in progress, the document will also be added to the new index. - tasks.append(client.index(current_rebuild_index_name).update_documents(docs)) - tasks.append(client.index(STUDIO_INDEX_NAME).update_documents(docs)) - - _wait_for_meili_tasks(tasks) + client.index(current_rebuild_index_name).update_documents(docs) + _wait_for_meili_task(client.index(STUDIO_INDEX_NAME).update_documents(docs)) def only_if_meilisearch_enabled(f): @@ -828,13 +826,10 @@ def _delete_documents(filter_query: str) -> None: client = _get_meilisearch_client() current_rebuild_index_name = _get_running_rebuild_index_name() - tasks = [] if current_rebuild_index_name: # If there is a rebuild in progress, the document will also be removed from the new index. - tasks.append(client.index(current_rebuild_index_name).delete_documents(filter=filter_query)) - tasks.append(client.index(STUDIO_INDEX_NAME).delete_documents(filter=filter_query)) - - _wait_for_meili_tasks(tasks) + client.index(current_rebuild_index_name).delete_documents(filter=filter_query) + _wait_for_meili_task(client.index(STUDIO_INDEX_NAME).delete_documents(filter=filter_query)) def _delete_index_doc(doc_id) -> None: @@ -849,14 +844,11 @@ def _delete_index_doc(doc_id) -> None: client = _get_meilisearch_client() current_rebuild_index_name = _get_running_rebuild_index_name() - tasks = [] if current_rebuild_index_name: # If there is a rebuild in progress, the document will also be removed from the new index. - tasks.append(client.index(current_rebuild_index_name).delete_document(doc_id)) - - tasks.append(client.index(STUDIO_INDEX_NAME).delete_document(doc_id)) + client.index(current_rebuild_index_name).delete_document(doc_id) - _wait_for_meili_tasks(tasks) + _wait_for_meili_task(client.index(STUDIO_INDEX_NAME).delete_document(doc_id)) def upsert_library_block_index_doc(usage_key: UsageKey) -> None: diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py index 4623b814d0a0..028fc203ee95 100644 --- a/openedx/core/djangoapps/content/search/documents.py +++ b/openedx/core/djangoapps/content/search/documents.py @@ -12,7 +12,7 @@ from opaque_keys.edx.keys import ContainerKey, LearningContextKey, OpaqueKey, UsageKey from opaque_keys.edx.locator import LibraryCollectionLocator, LibraryContainerLocator from openedx_content import api as content_api -from openedx_content.models_api import Collection, Section, Subsection, Unit +from openedx_content.models_api import Collection from rest_framework.exceptions import NotFound from openedx.core.djangoapps.content.search.models import SearchAccess @@ -520,7 +520,7 @@ def searchable_doc_containers(object_id: OpaqueKey, container_type: str) -> dict else: log.warning(f"Unexpected key type for {object_id}") - except ObjectDoesNotExist: + except (ObjectDoesNotExist, lib_api.ContentLibraryBlockNotFound): log.warning(f"No library item found for {object_id}") if not containers: @@ -558,6 +558,9 @@ def searchable_doc_for_collection( if collection: assert collection.collection_code == collection_key.collection_id + # Collections themselves are not publishable entities, so don't have a "draft" or "published" version, but the + # entities they contain are publishable, so the number of entities in the collection may be different between + # draft and published views, if some draft entities are unpublished or are published but the draft is deleted. draft_num_children = content_api.filter_publishable_entities( collection.entities, has_draft=True, @@ -627,42 +630,21 @@ def searchable_doc_for_container( log.error(f"Container {container_key} not found") return doc - draft_children = lib_api.get_container_children( - container_key, - published=False, - ) + draft_children = lib_api.get_container_children_list(container_key, published=False) publish_status = PublishStatus.published if container.last_published is None: publish_status = PublishStatus.never elif container.has_unpublished_changes: publish_status = PublishStatus.modified - container_type_code = container_key.container_type - - def get_child_keys(children) -> list[str]: - match container_type_code: - case Unit.type_code: - return [ - str(child.usage_key) - for child in children - ] - case Subsection.type_code | Section.type_code: - return [ - str(child.container_key) - for child in children - ] - - def get_child_names(children) -> list[str]: - return [child.display_name for child in children] - doc.update({ Fields.display_name: container.display_name, Fields.created: container.created.timestamp(), Fields.modified: container.modified.timestamp(), Fields.num_children: len(draft_children), Fields.content: { - Fields.child_usage_keys: get_child_keys(draft_children), - Fields.child_display_names: get_child_names(draft_children), + Fields.child_usage_keys: [str(child.key) for child in draft_children], + Fields.child_display_names: [child.display_name for child in draft_children], }, Fields.publish_status: publish_status, Fields.last_published: container.last_published.timestamp() if container.last_published else None, @@ -672,16 +654,13 @@ def get_child_names(children) -> list[str]: doc[Fields.breadcrumbs] = [{"display_name": library.title}] if container.published_version_num is not None: - published_children = lib_api.get_container_children( - container_key, - published=True, - ) + published_children = lib_api.get_container_children_list(container_key, published=True) doc[Fields.published] = { Fields.published_display_name: container.published_display_name, Fields.published_num_children: len(published_children), Fields.published_content: { - Fields.child_usage_keys: get_child_keys(published_children), - Fields.child_display_names: get_child_names(published_children), + Fields.child_usage_keys: [str(child.key) for child in published_children], + Fields.child_display_names: [child.display_name for child in published_children], }, } diff --git a/openedx/core/djangoapps/content_libraries/api/block_metadata.py b/openedx/core/djangoapps/content_libraries/api/block_metadata.py index 6faa031cf78c..54d9b5d68ac7 100644 --- a/openedx/core/djangoapps/content_libraries/api/block_metadata.py +++ b/openedx/core/djangoapps/content_libraries/api/block_metadata.py @@ -46,9 +46,20 @@ class LibraryXBlockMetadata(PublishableItem): usage_key: LibraryUsageLocatorV2 @classmethod - def from_component(cls, library_key, component, associated_collections=None): + def from_component( + cls, + library_key: LibraryLocatorV2, + component, + associated_collections=None, + use_published=False, + ) -> LibraryXBlockMetadata: """ Construct a LibraryXBlockMetadata from a Component object. + + Requires that the draft version of the component exists, unless you + specify use_published=True, in which case it requires that the published + version exists. The 'display_name' and 'modified' fields will depend on + which version you request. """ # Import content_tagging.api here to avoid circular imports from openedx.core.djangoapps.content_tagging.api import get_object_tag_counts @@ -61,17 +72,17 @@ def from_component(cls, library_key, component, associated_collections=None): draft = component.versioning.draft published = component.versioning.published last_draft_created = draft.created if draft else None - last_draft_created_by = draft.publishable_entity_version.created_by if draft else None + last_draft_created_by = draft.publishable_entity_version.created_by if draft else "" usage_key = library_component_usage_key(library_key, component) tags = get_object_tag_counts(str(usage_key), count_implicit=True) return cls( usage_key=usage_key, - display_name=draft.title, + display_name=published.title if use_published else draft.title, created=component.created, created_by=component.created_by.username if component.created_by else None, - modified=draft.created, - draft_version_num=draft.version_num, + modified=published.created if use_published else draft.created, + draft_version_num=draft.version_num if draft else None, published_version_num=published.version_num if published else None, published_display_name=published.title if published else None, last_published=None if last_publish_log is None else last_publish_log.published_at, diff --git a/openedx/core/djangoapps/content_libraries/api/blocks.py b/openedx/core/djangoapps/content_libraries/api/blocks.py index 892bc3a62b28..39fac27ca12c 100644 --- a/openedx/core/djangoapps/content_libraries/api/blocks.py +++ b/openedx/core/djangoapps/content_libraries/api/blocks.py @@ -403,6 +403,9 @@ def set_library_block_olx( usage_key: LibraryUsageLocatorV2, new_olx_str: str, paths_to_media: dict | None = None, + # The following arg can be removed after https://github.com/openedx/openedx-core/pull/573 lands + # then we can presumably just get the name from the bulk_draft_changes_for context + created_by: int | None = None, ) -> ComponentVersion: """ Replace the OLX source of the given XBlock. @@ -474,6 +477,7 @@ def set_library_block_olx( 'block.xml': new_olx_media.pk, }, created=now, + created_by=created_by, ) return new_component_version @@ -883,6 +887,8 @@ def delete_library_block( try: component = get_component_from_usage_key(usage_key) + if component.versioning.draft is None: + raise Component.DoesNotExist("Component draft version was already deleted.") except Component.DoesNotExist: # There may be cases where entries are created in the # search index, but the component is not created diff --git a/openedx/core/djangoapps/content_libraries/api/collections.py b/openedx/core/djangoapps/content_libraries/api/collections.py index a8b715750d64..f2b16edbfd6f 100644 --- a/openedx/core/djangoapps/content_libraries/api/collections.py +++ b/openedx/core/djangoapps/content_libraries/api/collections.py @@ -125,7 +125,7 @@ def update_library_collection_items( assert content_library.library_key == library_key # Fetch the Component.entity_ref values for the provided UsageKeys. - item_refs = [] + entity_ids: list[PublishableEntity.ID] = [] for opaque_key in opaque_keys: if isinstance(opaque_key, LibraryContainerLocator): try: @@ -136,7 +136,7 @@ def update_library_collection_items( except Collection.DoesNotExist as exc: raise ContentLibraryContainerNotFound(opaque_key) from exc - item_refs.append(container.entity_ref) + entity_ids.append(container.id) elif isinstance(opaque_key, UsageKeyV2): # Parse the block_family from the key to use as namespace. block_type = BlockTypeKey.from_string(str(opaque_key)) @@ -150,14 +150,12 @@ def update_library_collection_items( except Component.DoesNotExist as exc: raise ContentLibraryBlockNotFound(opaque_key) from exc - item_refs.append(component.entity_ref) + entity_ids.append(component.id) else: # This should never happen, but just in case. raise ValueError(f"Invalid opaque_key: {opaque_key}") - entities_qset = PublishableEntity.objects.filter( - entity_ref__in=item_refs, - ) + entities_qset = content_api.get_publishable_entities(content_library.learning_package_id).filter(id__in=entity_ids) if remove: collection = content_api.remove_from_collection( diff --git a/openedx/core/djangoapps/content_libraries/api/container_metadata.py b/openedx/core/djangoapps/content_libraries/api/container_metadata.py index 98a5024ac674..d7ad7e7f6ad4 100644 --- a/openedx/core/djangoapps/content_libraries/api/container_metadata.py +++ b/openedx/core/djangoapps/content_libraries/api/container_metadata.py @@ -74,9 +74,14 @@ class ContainerMetadata(PublishableItem): container_id: Container.ID @classmethod - def from_container(cls, library_key, container: Container, associated_collections=None): + def from_container(cls, library_key, container: Container, associated_collections=None, use_published=False): """ Construct a ContainerMetadata object from a Container object. + + Requires that the draft version of the container exists, unless you + specify use_published=True, in which case it requires that the published + version exists. The 'display_name' and 'modified' fields will depend on + which version you request. """ last_publish_log = container.versioning.last_publish_log container_key = library_container_locator( @@ -100,10 +105,10 @@ def from_container(cls, library_key, container: Container, associated_collection container_key=container_key, container_type_code=container_key.container_type, container_id=container.id, - display_name=draft.title, + display_name=published.title if use_published else draft.title, created=container.created, - modified=draft.created, - draft_version_num=draft.version_num, + modified=published.created if use_published else draft.created, + draft_version_num=draft.version_num if draft else None, published_version_num=published.version_num if published else None, published_display_name=published.title if published else None, last_published=None if last_publish_log is None else last_publish_log.published_at, diff --git a/openedx/core/djangoapps/content_libraries/api/containers.py b/openedx/core/djangoapps/content_libraries/api/containers.py index 0fbe66518d44..c9c254f8c5a9 100644 --- a/openedx/core/djangoapps/content_libraries/api/containers.py +++ b/openedx/core/djangoapps/content_libraries/api/containers.py @@ -7,6 +7,7 @@ import logging import operator import typing +from dataclasses import dataclass from datetime import datetime, timezone from functools import cache from uuid import UUID, uuid4 @@ -28,6 +29,7 @@ LibraryXBlockMetadata, direct_published_entity_from_record, get_entity_item_type, + library_component_usage_key, make_contributor, resolve_change_action, ) @@ -48,9 +50,11 @@ # 🛑 UNSTABLE: All APIs related to containers are unstable until we've figured # out our approach to dynamic content (randomized, A/B tests, etc.) __all__ = [ + "ContainerChildMetadata", "get_container", "create_container", "get_container_children", + "get_container_children_list", "get_container_children_count", "update_container", "delete_container", @@ -70,6 +74,15 @@ log = logging.getLogger(__name__) +@dataclass(frozen=True) +class ContainerChildMetadata: + """ + Class that represents limited metadata about a child entity of a container + """ + display_name: str + key: LibraryUsageLocatorV2 | LibraryContainerLocator + + def get_container( container_key: LibraryContainerLocator, *, @@ -202,8 +215,8 @@ def get_container_children( published=False, ) -> list[LibraryXBlockMetadata | ContainerMetadata]: """ - [ 🛑 UNSTABLE ] Get the entities contained in the given container - (e.g. the components/xblocks in a unit, units in a subsection, subsections in a section) + [ 🛑 UNSTABLE ] Get the entities contained in the given container (e.g. the + components in a unit, units in a subsection, subsections in a section). """ container = get_container_from_key(container_key) @@ -211,10 +224,40 @@ def get_container_children( result: list[LibraryXBlockMetadata | ContainerMetadata] = [] for entry in child_entities: if hasattr(entry.entity, "component"): # the child is a Component - result.append(LibraryXBlockMetadata.from_component(container_key.lib_key, entry.entity.component)) + result.append(LibraryXBlockMetadata.from_component( + container_key.lib_key, entry.entity.component, use_published=published, + )) + else: + assert isinstance(entry.entity.container, Container) + result.append(ContainerMetadata.from_container( + container_key.lib_key, entry.entity.container, use_published=published, + )) + return result + + +def get_container_children_list( + container_key: LibraryContainerLocator, *, + published: bool, +) -> list[ContainerChildMetadata]: + """ + [ 🛑 UNSTABLE ] Get the entities contained in the given container (e.g. the + components/xblocks in a unit, units in a subsection, subsections in a section) + + Returns a list of ``ContainerChildMetadata`` objects (which give only each + child's display name and opaque key, though the opaque key also includes + information on what "type" of component/container it is). + """ + container = get_container_from_key(container_key) + result: list[ContainerChildMetadata] = [] + for entry in content_api.get_entities_in_container(container, published=published): + key: LibraryUsageLocatorV2 | LibraryContainerLocator + if hasattr(entry.entity, "component"): # the child is a Component + key = library_component_usage_key(container_key.lib_key, entry.entity.component) else: assert isinstance(entry.entity.container, Container) - result.append(ContainerMetadata.from_container(container_key.lib_key, entry.entity.container)) + key = library_container_locator(container_key.lib_key, entry.entity.container) + display_name = entry.entity_version.title + result.append(ContainerChildMetadata(display_name=display_name, key=key)) return result @@ -254,7 +297,7 @@ def update_container_children( def get_containers_contains_item(key: LibraryUsageLocatorV2 | LibraryContainerLocator) -> list[ContainerMetadata]: """ - [ 🛑 UNSTABLE ] Get containers that contains the item, that can be a component or another container. + [ 🛑 UNSTABLE ] Get list of draft containers that contain the item. Item can be a component or another container. """ entity = get_entity_from_key(key) containers = content_api.get_containers_with_entity(entity.id).select_related("container_type") diff --git a/openedx/core/djangoapps/content_libraries/api/libraries.py b/openedx/core/djangoapps/content_libraries/api/libraries.py index f41700e79b37..1d326d448cc8 100644 --- a/openedx/core/djangoapps/content_libraries/api/libraries.py +++ b/openedx/core/djangoapps/content_libraries/api/libraries.py @@ -200,7 +200,7 @@ class PublishableItem(LibraryItem): Common fields for anything that can be found in a content library that has draft/publish support. """ - draft_version_num: int + draft_version_num: int | None published_version_num: int | None = None published_display_name: str | None last_published: datetime | None = None diff --git a/openedx/core/djangoapps/content_libraries/rest_api/blocks.py b/openedx/core/djangoapps/content_libraries/rest_api/blocks.py index d123975e8cef..1a1389019706 100644 --- a/openedx/core/djangoapps/content_libraries/rest_api/blocks.py +++ b/openedx/core/djangoapps/content_libraries/rest_api/blocks.py @@ -433,7 +433,7 @@ def post(self, request, usage_key_str): serializer.is_valid(raise_exception=True) new_olx_str = serializer.validated_data["olx"] try: - version_num = api.set_library_block_olx(key, new_olx_str).version_num + version_num = api.set_library_block_olx(key, new_olx_str, created_by=request.user.id).version_num except ValueError as err: raise ValidationError(detail=str(err)) # lint-amnesty, pylint: disable=raise-missing-from # noqa: B904 return Response(self.serializer_class({"olx": new_olx_str, "version_num": version_num}).data) diff --git a/openedx/core/djangoapps/content_libraries/tasks.py b/openedx/core/djangoapps/content_libraries/tasks.py index 6fc9e91de9ba..83fd1ca59427 100644 --- a/openedx/core/djangoapps/content_libraries/tasks.py +++ b/openedx/core/djangoapps/content_libraries/tasks.py @@ -55,6 +55,7 @@ from openedx_events.content_authoring.data import ( ContentObjectChangedData, LibraryBlockData, + LibraryCollectionData, LibraryContainerData, ) from openedx_events.content_authoring.signals import ( @@ -63,11 +64,13 @@ LIBRARY_BLOCK_DELETED, LIBRARY_BLOCK_PUBLISHED, LIBRARY_BLOCK_UPDATED, + LIBRARY_COLLECTION_UPDATED, LIBRARY_CONTAINER_CREATED, LIBRARY_CONTAINER_DELETED, LIBRARY_CONTAINER_PUBLISHED, LIBRARY_CONTAINER_UPDATED, ) +from openedx_tagging import api as tagging_api from path import Path from user_tasks.models import UserTaskArtifact from user_tasks.tasks import UserTask, UserTaskStatus @@ -123,10 +126,11 @@ def send_change_events_for_modified_entities( for entity in entities: change = changes_by_entity_id[entity.id] + entity_opaque_key: LibraryUsageLocatorV2 | LibraryContainerLocator if hasattr(entity, "component"): # This is a library XBlock (component) - block_key = api.library_component_usage_key(library.library_key, entity.component) - event_data = LibraryBlockData(library_key=library.library_key, usage_key=block_key) + entity_opaque_key = api.library_component_usage_key(library.library_key, entity.component) + event_data = LibraryBlockData(library_key=library.library_key, usage_key=entity_opaque_key) if change.old_version is None and change.new_version: # .. event_implemented_name: LIBRARY_BLOCK_CREATED # .. event_type: org.openedx.content_authoring.library_block.created.v1 @@ -142,8 +146,8 @@ def send_change_events_for_modified_entities( LIBRARY_BLOCK_UPDATED.send_event(library_block=event_data) elif hasattr(entity, "container"): - container_key = api.library_container_locator(library.library_key, entity.container) - event_data = LibraryContainerData(container_key=container_key) + entity_opaque_key = api.library_container_locator(library.library_key, entity.container) + event_data = LibraryContainerData(container_key=entity_opaque_key) if change.old_version is None and change.new_version: # .. event_implemented_name: LIBRARY_CONTAINER_CREATED # .. event_type: org.openedx.content_authoring.content_library.container.created.v1 @@ -168,7 +172,7 @@ def send_change_events_for_modified_entities( # If entities were added/removed from this container, we need to notify things like the search index # that the list of parent containers for each entity has changed. check_container_content_changes.delay( - container_key_str=str(container_key), + container_key_str=str(entity_opaque_key), old_version_id=change.old_version_id, new_version_id=change.new_version_id, ) @@ -176,6 +180,57 @@ def send_change_events_for_modified_entities( log.error("Unknown publishable entity type: %s", entity) continue + if change.restored: + # This block/container was previously soft-deleted and is now un-deleted. It may have tags or collections. + # It would be best to expand the LIBRARY_BLOCK_CREATED event to include the "restored" flag, but in + # the interests of minimizing breaking event changes for now we'll just emit a + # CONTENT_OBJECT_ASSOCIATIONS_CHANGED event to ensure relevant search index records get updated. + association_changes: list[str] = [] + if content_api.get_entity_collections(learning_package_id, entity_ref=entity.entity_ref).exists(): + association_changes.append("collections") # This entity is part of at least one collection + if tagging_api.get_object_tags(str(entity_opaque_key)).exists(): + association_changes.append("tags") # This entity has tags + if association_changes: + # .. event_implemented_name: CONTENT_OBJECT_ASSOCIATIONS_CHANGED + # .. event_type: org.openedx.content_authoring.content.object.associations.changed.v1 + CONTENT_OBJECT_ASSOCIATIONS_CHANGED.send_event( + content_object=ContentObjectChangedData( + object_id=str(entity_opaque_key), + changes=association_changes, + ), + ) + # Notifying parent containers that a child has been restored is not necessary here - they'll already be + # included in 'change_list' [as side effects]. + + # When entities are deleted or un-deleted (as drafts), update any associated collections, so their "# of draft + # entities in collection" count is correct. We only care about deleted or un-deleted, not newly created drafts, + # because it's currently impossible for a newly-created draft to be part of a collection. + deleted_or_undeleted_entity_ids = [ + r.entity_id for r in changes if r.new_version is None or (r.old_version is None and r.restored) + ] + if deleted_or_undeleted_entity_ids: + emit_collections_updated(library, entity_ids=deleted_or_undeleted_entity_ids) + + +def emit_collections_updated(library: ContentLibrary, entity_ids: list[PublishableEntity.ID]) -> None: + """ + Helper function to notify affected collections after an entity is deleted/un-deleted/published/un-published. + + Used by `send_change_events_for_modified_entities()` and `send_events_after_publish()` + """ + # Check if any collections are affected: + affected_collections = ( + content_api.get_collections(library.learning_package_id, enabled=True).filter(entities__id__in=entity_ids) + ) + for collection in affected_collections: + collection_key = api.library_collection_locator( + library_key=library.library_key, + collection_key=collection.collection_code, + ) + # .. event_implemented_name: LIBRARY_COLLECTION_UPDATED + # .. event_type: org.openedx.content_authoring.content_library.collection.updated.v1 + LIBRARY_COLLECTION_UPDATED.send_event(library_collection=LibraryCollectionData(collection_key=collection_key)) + @shared_task(base=LoggedTask) @set_code_owner_attribute @@ -257,6 +312,8 @@ def send_collections_changed_events( """ Sends a CONTENT_OBJECT_ASSOCIATIONS_CHANGED event for each modified library entity in the given list, because their associated collections have changed. + This is dispatched in response to a COLLECTION_CHANGED event, usually + because entities have been added to or removed from a collection. ⏳ This task is designed to be run asynchronously so it can handle many entities, but you can also call it synchronously if you are only @@ -268,6 +325,8 @@ def send_collections_changed_events( entities = ( content_api.get_publishable_entities(learning_package_id) .filter(id__in=publishable_entity_ids) + # Ignore deleted items (both draft & published are deleted) that are still associated with the collection: + .exclude(draft__version=None, published__version=None) .select_related("component", "container") ) @@ -307,6 +366,7 @@ def send_events_after_publish(publish_log_id: int, library_key_str: str) -> None """ publish_log = PublishLog.objects.get(id=publish_log_id) library_key = LibraryLocatorV2.from_string(library_key_str) + library = ContentLibrary.objects.get_by_key(library_key) affected_entities = publish_log.records.select_related( "entity", "entity__container", "entity__container__container_type", "entity__component", ).all() @@ -338,6 +398,14 @@ def send_events_after_publish(publish_log_id: int, library_key_str: str) -> None "was modified during publish operation but is of unknown type." ) + # When entities get newly published or their published version is deleted, update the "# of published entities in + # collection" count of any associated collections. + newly_published_or_unpublished_entity_ids = [ + r.entity_id for r in affected_entities if r.new_version is None or r.old_version is None + ] + if not newly_published_or_unpublished_entity_ids: + emit_collections_updated(library, entity_ids=newly_published_or_unpublished_entity_ids) + def _filter_child(store, usage_key, capa_type): """ @@ -738,7 +806,11 @@ def dispatch_and_wait(task_fn: Task, wait_for_full_completion: bool = False, **k result: AsyncResult = task_fn.delay(**kwargs) # Try waiting a bit for the task to finish before we complete the request: try: - result.get(timeout=10) + # We use `disable_sync_subtasks=False` because some of our tasks emit + # events whose handlers then spawn additional tasks which are sometimes + # synchronous. Ideal usage of celery would be to "chain" the tasks + # instead of spawning subtasks, but that would require a major refactor. + result.get(timeout=10, disable_sync_subtasks=False) except CeleryTimeout: pass # This is fine! The search index is still being updated, and/or other diff --git a/openedx/core/djangoapps/content_libraries/tests/base.py b/openedx/core/djangoapps/content_libraries/tests/base.py index 4137ee36d045..8e5a9bb264ff 100644 --- a/openedx/core/djangoapps/content_libraries/tests/base.py +++ b/openedx/core/djangoapps/content_libraries/tests/base.py @@ -489,11 +489,11 @@ def _restore_container(self, container_key: ContainerKey | str, expect_response= """ Restore a deleted a container (unit etc.) """ return self._api('post', URL_LIB_CONTAINER_RESTORE.format(container_key=container_key), None, expect_response) - def _get_container_children(self, container_key: ContainerKey | str, expect_response=200): + def _get_container_children(self, container_key: ContainerKey | str, published=False, expect_response=200): """ Get container children""" return self._api( 'get', - URL_LIB_CONTAINER_CHILDREN.format(container_key=container_key), + URL_LIB_CONTAINER_CHILDREN.format(container_key=container_key) + ("?published=true" if published else ""), None, expect_response ) diff --git a/openedx/core/djangoapps/content_libraries/tests/test_api.py b/openedx/core/djangoapps/content_libraries/tests/test_api.py index 99f2568d6114..1060ec42713e 100644 --- a/openedx/core/djangoapps/content_libraries/tests/test_api.py +++ b/openedx/core/djangoapps/content_libraries/tests/test_api.py @@ -418,24 +418,24 @@ def test_delete_library_container(self) -> None: self.assertDictContainsEntries( event_receiver.call_args_list[0].kwargs, { - "signal": LIBRARY_COLLECTION_UPDATED, + "signal": LIBRARY_CONTAINER_UPDATED, "sender": None, - "library_collection": LibraryCollectionData( - collection_key=api.library_collection_locator( - self.lib1.library_key, - collection_key=self.col1.collection_code, - ), + "library_container": LibraryContainerData( + container_key=self.subsection1.container_key, + background=False, ), }, ) self.assertDictContainsEntries( event_receiver.call_args_list[1].kwargs, { - "signal": LIBRARY_CONTAINER_UPDATED, + "signal": LIBRARY_COLLECTION_UPDATED, "sender": None, - "library_container": LibraryContainerData( - container_key=self.subsection1.container_key, - background=False, + "library_collection": LibraryCollectionData( + collection_key=api.library_collection_locator( + self.lib1.library_key, + collection_key=self.col1.collection_code, + ), ), }, ) diff --git a/openedx/core/djangoapps/content_libraries/tests/test_containers.py b/openedx/core/djangoapps/content_libraries/tests/test_containers.py index 55be983d18dd..3900234f1db5 100644 --- a/openedx/core/djangoapps/content_libraries/tests/test_containers.py +++ b/openedx/core/djangoapps/content_libraries/tests/test_containers.py @@ -1,12 +1,13 @@ """ Tests for openedx_content-based Content Libraries """ +import copy import textwrap from datetime import UTC, datetime import ddt from freezegun import freeze_time -from opaque_keys.edx.locator import LibraryLocatorV2 +from opaque_keys.edx.locator import LibraryContainerLocator, LibraryLocatorV2, LibraryUsageLocatorV2 from common.djangoapps.student.tests.factories import UserFactory from openedx.core.djangoapps.content_libraries import api @@ -103,10 +104,19 @@ def setUp(self) -> None: ) # Create blocks - self.problem_block = self._add_block_to_library(self.lib["id"], "problem", "Problem1", can_stand_alone=False) - self.html_block = self._add_block_to_library(self.lib["id"], "html", "Html1", can_stand_alone=False) - self.problem_block_2 = self._add_block_to_library(self.lib["id"], "problem", "Problem2", can_stand_alone=False) - self.html_block_2 = self._add_block_to_library(self.lib["id"], "html", "Html2") + with freeze_time(self.create_date): + self.problem_block = self._add_block_to_library( + self.lib["id"], "problem", "Problem1", can_stand_alone=False + ) + self.html_block = self._add_block_to_library( + self.lib["id"], "html", "Html1", can_stand_alone=False + ) + self.problem_block_2 = self._add_block_to_library( + self.lib["id"], "problem", "Problem2", can_stand_alone=False + ) + self.html_block_2 = self._add_block_to_library( + self.lib["id"], "html", "Html2" + ) with freeze_time(self.modified_date): # Add components to `unit_with_components` @@ -528,6 +538,194 @@ def test_section_replace_children(self) -> None: assert data[0]['id'] == new_subsection_1['id'] assert data[1]['id'] == new_subsection_2['id'] + def test_published_child_components(self) -> None: + """ + Test that we can get the published version of a unit. + """ + unit_key_str = self.unit_with_components["id"] + unit_key = LibraryContainerLocator.from_string(unit_key_str) + # Publish "unit with components" + self._publish_container(unit_key_str) + # Now both its draft and published versions contain 4 identical components: + assert api.get_container_children_count(unit_key, published=False) == 4 + assert api.get_container_children_count(unit_key, published=True) == 4 + + original_children = self._get_container_children(unit_key_str) + draft_edited_date = datetime(2024, 9, 8, 7, 6, 5, tzinfo=UTC) + # Modify the first component: + first_child_key_str = original_children[0]["id"] + with freeze_time(draft_edited_date): + self._set_library_block_olx(first_child_key_str, "") + # And delete the last one: + last_child_key_str = original_children[3]["id"] + with freeze_time(draft_edited_date): + self._delete_library_block(last_child_key_str) + + # Now the counts should be different - the draft has only three children: + assert api.get_container_children_count(unit_key, published=False) == 3 + assert api.get_container_children_count(unit_key, published=True) == 4 + + # Check the published version + expected_published = copy.deepcopy(original_children) + # The first child was modified since publish: + expected_published[0]["has_unpublished_changes"] = True + # The last child was modified (deleted) since publish: + expected_published[3]["has_unpublished_changes"] = True + expected_published[3]["last_draft_created"] = None + expected_published[3]["last_draft_created_by"] = "" + + assert self._get_container_children(unit_key_str, published=True) == expected_published + + # Check the draft version: + expected_draft = copy.deepcopy(original_children) + # The first child was modified since publish: + expected_draft[0]["display_name"] = "DRAFT problem" + expected_draft[0]["has_unpublished_changes"] = True + # The last child was modified (deleted) since publish: + del expected_draft[3] + + assert self._get_container_children(unit_key_str, published=False) == expected_draft + + def test_published_child_containers(self) -> None: + """ + Test that we can get the published version of a subsection. + """ + subsection_key_str = self.subsection_with_units["id"] + subsection_key = LibraryContainerLocator.from_string(subsection_key_str) + # Publish "subsection with units" + self._publish_container(subsection_key_str) + # Now both its draft and published versions contain 4 identical units: + assert api.get_container_children_count(subsection_key, published=False) == 4 + assert api.get_container_children_count(subsection_key, published=True) == 4 + + original_children = self._get_container_children(subsection_key_str) + draft_edited_date = datetime(2024, 9, 8, 7, 6, 5, tzinfo=UTC) + # Modify the first child unit: + first_child_key_str = original_children[0]["id"] + with freeze_time(draft_edited_date): + self._update_container(first_child_key_str, display_name="DRAFT unit") + # And delete the last one: + last_child_key_str = original_children[3]["id"] + with freeze_time(draft_edited_date): + self._delete_container(last_child_key_str) + + # Now the counts should be different - the draft has only three children: + assert api.get_container_children_count(subsection_key, published=False) == 3 + assert api.get_container_children_count(subsection_key, published=True) == 4 + + # Check the published version + expected_published = copy.deepcopy(original_children) + # The first child was modified since publish: + expected_published[0]["has_unpublished_changes"] = True + # The last child was modified (deleted) since publish: + expected_published[3]["has_unpublished_changes"] = True + expected_published[3]["last_draft_created"] = None + expected_published[3]["last_draft_created_by"] = "" # last draft was deleted. + + assert self._get_container_children(subsection_key_str, published=True)[0] == expected_published[0] + assert self._get_container_children(subsection_key_str, published=True)[3] == expected_published[3] + assert self._get_container_children(subsection_key_str, published=True) == expected_published + + # Check the draft version: + expected_draft = copy.deepcopy(original_children) + # The first child was modified since publish: + expected_draft[0]["display_name"] = "DRAFT unit" + expected_draft[0]["has_unpublished_changes"] = True + # The last child was modified (deleted) since publish: + del expected_draft[3] + + assert self._get_container_children(subsection_key_str, published=False) == expected_draft + + def test_get_container_children_queries(self): + """ + Test how many queries are used to retrieve the children of a container + """ + empty_unit_key = LibraryContainerLocator.from_string(self.unit["id"]) + unit_with_children_key = LibraryContainerLocator.from_string(self.unit_with_components["id"]) + EMPTY_QUERIES = 6 + PER_CHILD_QUERIES = 10 # There's room to optimize here. + with self.assertNumQueries(EMPTY_QUERIES): + result = api.get_container_children(empty_unit_key) + assert len(result) == 0 + with self.assertNumQueries(6): + num_children = api.get_container_children_count(unit_with_children_key) + with self.assertNumQueries(EMPTY_QUERIES + PER_CHILD_QUERIES * num_children): + result = api.get_container_children(unit_with_children_key) + assert len(result) == num_children + + def test_get_container_children_list_components(self) -> None: + """ + Test that we can use get_container_children_list() to get the draft and + published children of any container, simply and performantly. + """ + # Publish "unit with components" + unit_key_str = self.unit_with_components["id"] + unit_key = LibraryContainerLocator.from_string(unit_key_str) + self._publish_container(unit_key_str) + + original_list = api.get_container_children_list(unit_key, published=True) + assert original_list == [ + api.ContainerChildMetadata( + display_name='Blank Problem', + key=LibraryUsageLocatorV2.from_string('lb:CL-TEST:containers:problem:Problem1'), + ), + api.ContainerChildMetadata( + display_name='Text', + key=LibraryUsageLocatorV2.from_string('lb:CL-TEST:containers:html:Html1'), + ), + api.ContainerChildMetadata( + display_name='Blank Problem', + key=LibraryUsageLocatorV2.from_string('lb:CL-TEST:containers:problem:Problem2'), + ), + api.ContainerChildMetadata( + display_name='Text', + key=LibraryUsageLocatorV2.from_string('lb:CL-TEST:containers:html:Html2'), + ), + ] + + # Modify the first component: + first_child_key = original_list[0].key + self._set_library_block_olx(str(first_child_key), "") + # And delete the last one: + last_child_key = original_list[3].key + self._delete_library_block(str(last_child_key)) + + # Now the counts should be different - the draft has only three children: + assert api.get_container_children_list(unit_key, published=True) == original_list + assert api.get_container_children_list(unit_key, published=False) == [ + api.ContainerChildMetadata( + display_name='DRAFT problem', # new name + key=LibraryUsageLocatorV2.from_string('lb:CL-TEST:containers:problem:Problem1'), + ), + api.ContainerChildMetadata( + display_name='Text', + key=LibraryUsageLocatorV2.from_string('lb:CL-TEST:containers:html:Html1'), + ), + api.ContainerChildMetadata( + display_name='Blank Problem', + key=LibraryUsageLocatorV2.from_string('lb:CL-TEST:containers:problem:Problem2'), + ), + # last component was deleted. + ] + + + def test_get_container_children_list_queries(self): + """ + Test how many queries are used to retrieve the children of a container + """ + empty_unit_key = LibraryContainerLocator.from_string(self.unit["id"]) + unit_with_children_key = LibraryContainerLocator.from_string(self.unit_with_components["id"]) + EMPTY_QUERIES = 6 + PER_CHILD_QUERIES = 3 # There's room to optimize here - remove the componenttype lookups + with self.assertNumQueries(EMPTY_QUERIES): + result = api.get_container_children_list(empty_unit_key, published=False) + assert len(result) == 0 + with self.assertNumQueries(6): + num_children = api.get_container_children_count(unit_with_children_key) + with self.assertNumQueries(EMPTY_QUERIES + PER_CHILD_QUERIES * num_children): + result = api.get_container_children_list(unit_with_children_key, published=False) + assert len(result) == num_children + @ddt.data( "unit", "subsection", diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index 0caa9c0a61a8..8089580b43e4 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -840,7 +840,7 @@ openedx-calc==5.0.0 # via # -r requirements/edx/kernel.in # xblocks-contrib -openedx-core==1.0.1 +openedx-core @ git+https://github.com/openedx/openedx-core.git@braden/backport-fix-collection-events # via # -c requirements/constraints.txt # -r requirements/edx/kernel.in diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt index dfdb92cc4361..4f26279e7480 100644 --- a/requirements/edx/development.txt +++ b/requirements/edx/development.txt @@ -1383,7 +1383,7 @@ openedx-calc==5.0.0 # -r requirements/edx/doc.txt # -r requirements/edx/testing.txt # xblocks-contrib -openedx-core==1.0.1 +openedx-core @ git+https://github.com/openedx/openedx-core.git@braden/backport-fix-collection-events # via # -c requirements/constraints.txt # -r requirements/edx/doc.txt diff --git a/requirements/edx/doc.txt b/requirements/edx/doc.txt index cb9c6045cc94..2e5bf5ebea24 100644 --- a/requirements/edx/doc.txt +++ b/requirements/edx/doc.txt @@ -1018,7 +1018,7 @@ openedx-calc==5.0.0 # via # -r requirements/edx/base.txt # xblocks-contrib -openedx-core==1.0.1 +openedx-core @ git+https://github.com/openedx/openedx-core.git@braden/backport-fix-collection-events # via # -c requirements/constraints.txt # -r requirements/edx/base.txt diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt index 1cd806332445..a1372933b4a3 100644 --- a/requirements/edx/testing.txt +++ b/requirements/edx/testing.txt @@ -1058,7 +1058,7 @@ openedx-calc==5.0.0 # via # -r requirements/edx/base.txt # xblocks-contrib -openedx-core==1.0.1 +openedx-core @ git+https://github.com/openedx/openedx-core.git@braden/backport-fix-collection-events # via # -c requirements/constraints.txt # -r requirements/edx/base.txt