Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 34 additions & 13 deletions cms/djangoapps/modulestore_migrator/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import os
import typing as t
from dataclasses import dataclass
from datetime import datetime, timezone
from datetime import UTC, datetime, timezone
from enum import Enum
from gettext import ngettext

Expand Down Expand Up @@ -341,6 +341,7 @@ def _import_structure(
openedx_content equivalent.
"""
migration = source_data.migration
migration_start_time = datetime.now(UTC)
migration_context = _MigrationContext(
used_component_keys=set(
LibraryUsageLocatorV2(target_library.key, block_type, block_id) # type: ignore[abstract]
Expand All @@ -367,14 +368,35 @@ def _import_structure(
repeat_handling_strategy=RepeatHandlingStrategy(migration.repeat_handling_strategy),
preserve_url_slugs=migration.preserve_url_slugs,
created_by=status.user_id,
created_at=datetime.now(timezone.utc), # noqa: UP017
created_at=migration_start_time,
)
with content_api.bulk_draft_changes_for(migration.target.id) as change_log:
with content_api.bulk_draft_changes_for(
learning_package_id=migration.target.id,
changed_by=migration_context.created_by,
changed_at=migration_start_time,
) as change_log:
root_migrated_node = _migrate_node(
context=migration_context,
source_node=root_node,
)
change_log.save()
# Publishing is not allowed inside bulk_draft_changes_for(), so publish
# everything that was modified now that the context has exited. We use the
# change log to identify which drafts to publish. If the context produced
# no records, it deletes the change log on exit (clearing its PK), in which
# case there's nothing to publish and we return None so callers don't try
# to associate the deleted change log with the migration.
if not change_log.pk:
return None, root_migrated_node
if change_log.records.exists():
drafts_to_publish = content_api.get_all_drafts(migration.target.id).filter(
entity_id__in=change_log.records.values_list("entity_id", flat=True),
)
content_api.publish_from_drafts(
migration.target.id,
draft_qset=drafts_to_publish,
published_by=migration_context.created_by,
# published_at will be later than 'migration_start_time' as _migrate_node() may have taken quite a while.
)
return change_log, root_migrated_node


Expand Down Expand Up @@ -408,7 +430,7 @@ def _populate_collection(user_id: int, migration: models.ModulestoreMigration) -
)
if block_target_pks:
content_api.add_to_collection(
learning_package_id=migration.target.pk,
learning_package_id=migration.target.id,
collection_code=migration.target_collection.collection_code,
entities_qset=PublishableEntity.objects.filter(id__in=block_target_pks),
created_by=user_id,
Expand All @@ -421,6 +443,7 @@ def _create_collection(
library_key: LibraryLocatorV2,
title: str,
course_name: str | None = None,
created_by: int | None = None,
) -> Collection:
"""
Creates a collection in the given library
Expand All @@ -446,6 +469,7 @@ def _create_collection(
collection_key=modified_key,
title=f"{title}{f'_{attempt}' if attempt > 0 else ''}",
description=description,
created_by=created_by,
)
except libraries_api.LibraryCollectionAlreadyExists:
attempt += 1
Expand Down Expand Up @@ -705,6 +729,7 @@ def bulk_migrate_from_modulestore(
library_key=target_library_locator,
title=legacy_root_list[i].display_name,
course_name=legacy_root_list[i].display_name if source_data.source.key.is_course else None,
created_by=user_id,
)
)
_populate_collection(user_id, migration)
Expand Down Expand Up @@ -881,6 +906,7 @@ def _migrate_container(
user_id=context.created_by,
)
if container_exists and context.should_skip_strategy:
assert container.draft_version_num is not None # We know it exists, this is just for mypy
return PublishableEntityVersion.objects.get(
entity_id=container.container_id,
version_num=container.draft_version_num,
Expand All @@ -894,11 +920,7 @@ def _migrate_container(
created_by=context.created_by,
).publishable_entity_version

# Publish the container
libraries_api.publish_container_changes(
container.container_key,
context.created_by,
)
# Note: Publishing happens after bulk_draft_changes_for exits, in _import_structure.
context.used_container_slugs.add(container.container_key.container_id)
return container_publishable_entity_version, None

Expand Down Expand Up @@ -969,11 +991,10 @@ def _migrate_component(

# Create the new component version for it
component_version = libraries_api.set_library_block_olx(
target_key, new_olx_str=olx, paths_to_media=paths_to_media_ids,
target_key, new_olx_str=olx, paths_to_media=paths_to_media_ids, created_by=context.created_by,
)

# Publish the component
libraries_api.publish_component_changes(target_key, context.created_by)
# Note: Publishing happens after bulk_draft_changes_for exits, in _import_structure.
context.used_component_keys.add(target_key)
return component_version.publishable_entity_version, None

Expand Down
10 changes: 6 additions & 4 deletions cms/djangoapps/modulestore_migrator/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,8 +370,9 @@ def test_migrate_component_success(self):
"problem", result.componentversion.component.component_type.name
)

# The component is published
self.assertFalse(result.componentversion.component.versioning.has_unpublished_changes) # noqa: PT009
# The component is left as a draft; publishing is the caller's responsibility
# (handled in _import_structure after bulk_draft_changes_for exits).
self.assertTrue(result.componentversion.component.versioning.has_unpublished_changes) # noqa: PT009

def test_migrate_component_failure(self):
"""
Expand Down Expand Up @@ -802,8 +803,9 @@ def test_migrate_container_different_container_types(self):

container_version = result.containerversion
self.assertEqual(container_version.title, f"Test {block_type.title()}") # noqa: PT009
# The container is published
self.assertFalse(content_api.contains_unpublished_changes(container_version.container.pk)) # noqa: PT009 # pylint: disable=line-too-long
# The container is left as a draft; publishing is the caller's
# responsibility (handled in _import_structure after bulk_draft_changes_for exits).
self.assertTrue(content_api.contains_unpublished_changes(container_version.container.pk)) # noqa: PT009 # pylint: disable=line-too-long

def test_migrate_container_same_title(self):
"""
Expand Down
42 changes: 17 additions & 25 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,21 @@ def _wait_for_meili_task(info: TaskInfo) -> None:
Simple helper method to wait for a Meilisearch task to complete
This method will block until the task is completed, so it should only be used in celery tasks
or management commands.

✨ Note: "Meilisearch processes tasks in the order they were added to the queue."
per https://www.meilisearch.com/docs/capabilities/indexing/tasks_and_batches/monitor_tasks#monitoring-task-status
so if you need to wait for multiple tasks, simply wait for the final (last) task.
"""
client = _get_meilisearch_client()
# This function almost always gets called immediately after enqueing a task, and from experiments, an initial wait
# of at least 15ms is warranted, as the task is almost never done in less than 10ms. We are using 20ms which seems
# to work well without requiring an additional wait in most cases.
sleep_delay = 0.020 # Initial wait is only 20ms but we will back off exponentially
time.sleep(sleep_delay)
current_status = client.get_task(info.task_uid)
while current_status.status in ("enqueued", "processing"):
time.sleep(0.5)
time.sleep(sleep_delay)
sleep_delay = min(sleep_delay * 1.5, 2.0) # Increase delay up to 2s
current_status = client.get_task(info.task_uid)
if current_status.status != "succeeded":
try:
Expand All @@ -166,15 +176,6 @@ def _wait_for_meili_task(info: TaskInfo) -> None:
raise MeilisearchError(err_reason)


def _wait_for_meili_tasks(info_list: list[TaskInfo]) -> None:
"""
Simple helper method to wait for multiple Meilisearch tasks to complete
"""
while info_list:
info = info_list.pop()
_wait_for_meili_task(info)


def _index_exists(index_name: str) -> bool:
"""
Check if an index exists
Expand Down Expand Up @@ -324,13 +325,10 @@ def _update_index_docs(docs) -> None:
client = _get_meilisearch_client()
current_rebuild_index_name = _get_running_rebuild_index_name()

tasks = []
if current_rebuild_index_name:
# If there is a rebuild in progress, the document will also be added to the new index.
tasks.append(client.index(current_rebuild_index_name).update_documents(docs))
tasks.append(client.index(STUDIO_INDEX_NAME).update_documents(docs))

_wait_for_meili_tasks(tasks)
client.index(current_rebuild_index_name).update_documents(docs)
_wait_for_meili_task(client.index(STUDIO_INDEX_NAME).update_documents(docs))


def only_if_meilisearch_enabled(f):
Expand Down Expand Up @@ -828,13 +826,10 @@ def _delete_documents(filter_query: str) -> None:
client = _get_meilisearch_client()
current_rebuild_index_name = _get_running_rebuild_index_name()

tasks = []
if current_rebuild_index_name:
# If there is a rebuild in progress, the document will also be removed from the new index.
tasks.append(client.index(current_rebuild_index_name).delete_documents(filter=filter_query))
tasks.append(client.index(STUDIO_INDEX_NAME).delete_documents(filter=filter_query))

_wait_for_meili_tasks(tasks)
client.index(current_rebuild_index_name).delete_documents(filter=filter_query)
_wait_for_meili_task(client.index(STUDIO_INDEX_NAME).delete_documents(filter=filter_query))


def _delete_index_doc(doc_id) -> None:
Expand All @@ -849,14 +844,11 @@ def _delete_index_doc(doc_id) -> None:
client = _get_meilisearch_client()
current_rebuild_index_name = _get_running_rebuild_index_name()

tasks = []
if current_rebuild_index_name:
# If there is a rebuild in progress, the document will also be removed from the new index.
tasks.append(client.index(current_rebuild_index_name).delete_document(doc_id))

tasks.append(client.index(STUDIO_INDEX_NAME).delete_document(doc_id))
client.index(current_rebuild_index_name).delete_document(doc_id)

_wait_for_meili_tasks(tasks)
_wait_for_meili_task(client.index(STUDIO_INDEX_NAME).delete_document(doc_id))


def upsert_library_block_index_doc(usage_key: UsageKey) -> None:
Expand Down
43 changes: 11 additions & 32 deletions openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from opaque_keys.edx.keys import ContainerKey, LearningContextKey, OpaqueKey, UsageKey
from opaque_keys.edx.locator import LibraryCollectionLocator, LibraryContainerLocator
from openedx_content import api as content_api
from openedx_content.models_api import Collection, Section, Subsection, Unit
from openedx_content.models_api import Collection
from rest_framework.exceptions import NotFound

from openedx.core.djangoapps.content.search.models import SearchAccess
Expand Down Expand Up @@ -520,7 +520,7 @@ def searchable_doc_containers(object_id: OpaqueKey, container_type: str) -> dict
else:
log.warning(f"Unexpected key type for {object_id}")

except ObjectDoesNotExist:
except (ObjectDoesNotExist, lib_api.ContentLibraryBlockNotFound):
log.warning(f"No library item found for {object_id}")

if not containers:
Expand Down Expand Up @@ -558,6 +558,9 @@ def searchable_doc_for_collection(
if collection:
assert collection.collection_code == collection_key.collection_id

# Collections themselves are not publishable entities, so don't have a "draft" or "published" version, but the
# entities they contain are publishable, so the number of entities in the collection may be different between
# draft and published views, if some draft entities are unpublished or are published but the draft is deleted.
draft_num_children = content_api.filter_publishable_entities(
collection.entities,
has_draft=True,
Expand Down Expand Up @@ -627,42 +630,21 @@ def searchable_doc_for_container(
log.error(f"Container {container_key} not found")
return doc

draft_children = lib_api.get_container_children(
container_key,
published=False,
)
draft_children = lib_api.get_container_children_list(container_key, published=False)
publish_status = PublishStatus.published
if container.last_published is None:
publish_status = PublishStatus.never
elif container.has_unpublished_changes:
publish_status = PublishStatus.modified

container_type_code = container_key.container_type

def get_child_keys(children) -> list[str]:
match container_type_code:
case Unit.type_code:
return [
str(child.usage_key)
for child in children
]
case Subsection.type_code | Section.type_code:
return [
str(child.container_key)
for child in children
]

def get_child_names(children) -> list[str]:
return [child.display_name for child in children]

doc.update({
Fields.display_name: container.display_name,
Fields.created: container.created.timestamp(),
Fields.modified: container.modified.timestamp(),
Fields.num_children: len(draft_children),
Fields.content: {
Fields.child_usage_keys: get_child_keys(draft_children),
Fields.child_display_names: get_child_names(draft_children),
Fields.child_usage_keys: [str(child.key) for child in draft_children],
Fields.child_display_names: [child.display_name for child in draft_children],
},
Fields.publish_status: publish_status,
Fields.last_published: container.last_published.timestamp() if container.last_published else None,
Expand All @@ -672,16 +654,13 @@ def get_child_names(children) -> list[str]:
doc[Fields.breadcrumbs] = [{"display_name": library.title}]

if container.published_version_num is not None:
published_children = lib_api.get_container_children(
container_key,
published=True,
)
published_children = lib_api.get_container_children_list(container_key, published=True)
doc[Fields.published] = {
Fields.published_display_name: container.published_display_name,
Fields.published_num_children: len(published_children),
Fields.published_content: {
Fields.child_usage_keys: get_child_keys(published_children),
Fields.child_display_names: get_child_names(published_children),
Fields.child_usage_keys: [str(child.key) for child in published_children],
Fields.child_display_names: [child.display_name for child in published_children],
},
}

Expand Down
21 changes: 16 additions & 5 deletions openedx/core/djangoapps/content_libraries/api/block_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,20 @@ class LibraryXBlockMetadata(PublishableItem):
usage_key: LibraryUsageLocatorV2

@classmethod
def from_component(cls, library_key, component, associated_collections=None):
def from_component(
cls,
library_key: LibraryLocatorV2,
component,
associated_collections=None,
use_published=False,
) -> LibraryXBlockMetadata:
"""
Construct a LibraryXBlockMetadata from a Component object.
Requires that the draft version of the component exists, unless you
specify use_published=True, in which case it requires that the published
version exists. The 'display_name' and 'modified' fields will depend on
which version you request.
"""
# Import content_tagging.api here to avoid circular imports
from openedx.core.djangoapps.content_tagging.api import get_object_tag_counts
Expand All @@ -61,17 +72,17 @@ def from_component(cls, library_key, component, associated_collections=None):
draft = component.versioning.draft
published = component.versioning.published
last_draft_created = draft.created if draft else None
last_draft_created_by = draft.publishable_entity_version.created_by if draft else None
last_draft_created_by = draft.publishable_entity_version.created_by if draft else ""
usage_key = library_component_usage_key(library_key, component)
tags = get_object_tag_counts(str(usage_key), count_implicit=True)

return cls(
usage_key=usage_key,
display_name=draft.title,
display_name=published.title if use_published else draft.title,
created=component.created,
created_by=component.created_by.username if component.created_by else None,
modified=draft.created,
draft_version_num=draft.version_num,
modified=published.created if use_published else draft.created,
draft_version_num=draft.version_num if draft else None,
published_version_num=published.version_num if published else None,
published_display_name=published.title if published else None,
last_published=None if last_publish_log is None else last_publish_log.published_at,
Expand Down
Loading
Loading