Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix import channel in Postgresql #12709

Open
wants to merge 4 commits into
base: release-v0.17.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,13 @@ def process_docstring(app, what, name, obj, options, lines):

# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode", "m2r", "notfound.extension"]
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.viewcode",
"m2r",
"notfound.extension",
"sphinxcontrib.jquery",
]

linkcheck_ignore = [
"https://groups.google.com/a/learningequality.org/forum/#!forum/dev"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@
{
"id": "222455c2cc484298b1501a13e1c7eb5c",
"tag_name": "tag_3"
},
{
"id": "0c20e2eb254b4070a713da63380ff0a3",
"tag_name": "velocidad de reacciones químicas"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like this is still ASCII rather than having any UTF-8 characters?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, this test is to check 32 chars are cut to 30, just that. I used a real case from channel 07cd1633691b4473b6fda08caf826253 just for the pleasure of using real data

}
],
"content_contentnode": [
Expand Down
17 changes: 17 additions & 0 deletions kolibri/core/content/test/test_channel_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import logging
import os
import tempfile
import unittest
import uuid

import pytest
from django.conf import settings
from django.core.management import call_command
from django.test import TestCase
from django.test import TransactionTestCase
Expand All @@ -30,6 +32,7 @@
from kolibri.core.content.models import AssessmentMetaData
from kolibri.core.content.models import ChannelMetadata
from kolibri.core.content.models import ContentNode
from kolibri.core.content.models import ContentTag
from kolibri.core.content.models import File
from kolibri.core.content.models import Language
from kolibri.core.content.models import LocalFile
Expand Down Expand Up @@ -832,6 +835,7 @@ class ImportLongDescriptionsTestCase(ContentImportTestBase, TransactionTestCase)
data_name = "longdescriptions"

longdescription = "soverylong" * 45
long_tag_id = "0c20e2eb254b4070a713da63380ff0a3"

def test_long_descriptions(self):
self.assertEqual(
Expand All @@ -845,6 +849,19 @@ def test_long_descriptions(self):
self.longdescription,
)

@unittest.skipIf(
getattr(settings, "DATABASES")["default"]["ENGINE"]
!= "django.db.backends.postgresql",
"Postgresql only test",
)
def test_import_too_long_content_tags(self):
"""
Test that importing content tags with overly long tag_name fields will truncate correctly.
"""
max_length = ContentTag._meta.get_field("tag_name").max_length
long_imported_tag = ContentTag.objects.get(id=self.long_tag_id)
assert len(long_imported_tag.tag_name) == max_length


class Version4ImportTestCase(NaiveImportTestCase):
"""
Expand Down
40 changes: 27 additions & 13 deletions kolibri/core/content/utils/channel_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django.db.models.fields.related import ForeignKey
from sqlalchemy import and_
from sqlalchemy import or_
from sqlalchemy import String as sa_String
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.exc import OperationalError
from sqlalchemy.exc import SQLAlchemyError
Expand Down Expand Up @@ -598,7 +599,14 @@ def postgres_table_import(self, model, row_mapper, table_mapper):
def generate_data_with_default(record):
for col_name, column_obj in columns:
default = self.get_and_set_column_default(column_obj)
value = row_mapper(record, column_obj.name)
value = row_mapper(record, col_name)
if not column_obj.primary_key and isinstance(
column_obj.type, sa_String
):
max_length = column_obj.type.length
if max_length is not None:
value = value[:max_length] if value is not None else default
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this to handle cases where even with proper handling of utf-8 the tag name is too long because of slack of checks in generation of the SQLite db?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, while testing this, I've seen tag names 40 chars long in sqlite while max_length is 30 in the model.


yield value if value is not None else default

if not merge:
Expand All @@ -624,7 +632,6 @@ def generate_data_with_default(record):
)
else:
# Import here so that we don't need to depend on psycopg2 for Kolibri in general.
from psycopg2.extras import execute_values

pk_name = DestinationTable.primary_key.columns.values()[0].name

Expand All @@ -647,13 +654,23 @@ def generate_data_with_default(record):
)
)
else:
execute_values(
cursor,
# We want to overwrite new values that we are inserting here, so we use an ON CONFLICT DO UPDATE here
# for the resulting SET statement, we generate a statement for each column we are trying to update
"INSERT INTO {table} AS SOURCE ({column_names}) VALUES %s ON CONFLICT ({pk_name}) DO UPDATE SET {set_statement};".format(
list_of_values = (
tuple(datum for datum in generate_data_with_default(record))
for record in results_slice
)
values_str = ", ".join(
cursor.mogrify(
f"({', '.join(['%s'] * len(column_names))})", v
).decode("utf-8")
for v in list_of_values
)
insert_sql = (
"INSERT INTO {table} AS SOURCE ({column_names}) "
"VALUES {values_str} "
"ON CONFLICT ({pk_name}) DO UPDATE SET {set_statement};".format(
table=DestinationTable.name,
column_names=", ".join(column_names),
values_str=values_str,
pk_name=pk_name,
set_statement=", ".join(
[
Expand All @@ -670,13 +687,10 @@ def generate_data_with_default(record):
if column_name != pk_name
]
),
),
(
tuple(datum for datum in generate_data_with_default(record))
for record in results_slice
),
template="(" + "%s, " * (len(columns) - 1) + "%s)",
)
)
cursor.execute(insert_sql)

i += BATCH_SIZE
results_slice = list(islice(results, i, i + BATCH_SIZE))
cursor.close()
Expand Down
7 changes: 5 additions & 2 deletions requirements/docs.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
-r base.txt

# These are for building the docs
sphinx

# Sphinx stack requires a version of requests that's incompatible with Morango, so downgrading
sphinx>=6,<7
sphinx-intl
sphinx-rtd-theme
# We want to ensure the latest version of sphinx-rtd-theme that has sphinxcontrib-jquery as a dependency
sphinx-rtd-theme~=2.0
sphinx-autobuild
m2r
sphinx-notfound-page