Skip to content

Commit 5704e81

Browse files
committed
🚚(back) serve legacy timed text tracks from Scaleway S3
Timed text tracks are now served from the aws/ directory in Scaleway S3. They are served using the django-storage already in place. As the newer timed text tracks are already stored in Scaleway, we do not rename these files and allow them to be served without requiring signed URLs.
1 parent 8817db4 commit 5704e81

File tree

4 files changed

+35
-179
lines changed

4 files changed

+35
-179
lines changed

src/backend/marsha/core/models/video.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -623,32 +623,6 @@ class Meta:
623623
)
624624
]
625625

626-
def get_source_s3_key(self, stamp=None):
627-
"""Compute the S3 key in the source bucket.
628-
629-
It is built from the video ID + ID of the timed text track + version stamp + language +
630-
closed captioning flag.
631-
632-
Parameters
633-
----------
634-
stamp: Type[string]
635-
Passing a value for this argument will return the source S3 key for the timed text
636-
track assuming its active stamp is set to this value. This is useful to create an
637-
upload policy for this prospective version of the track, so that the client can
638-
upload the file to S3 and the confirmation lambda can set the `uploaded_on` field
639-
to this value only after the file upload and processing is successful.
640-
641-
Returns
642-
-------
643-
string
644-
The S3 key for the timed text files in the source bucket, where uploaded files are
645-
stored before they are converted and copied to the destination bucket.
646-
647-
"""
648-
stamp = stamp or self.uploaded_on_stamp()
649-
mode = f"_{self.mode}" if self.mode else ""
650-
return f"{self.video.pk}/timedtexttrack/{self.pk}/{stamp}_{self.language}{mode}"
651-
652626
def get_storage_prefix(
653627
self,
654628
stamp=None,

src/backend/marsha/core/serializers/timed_text_track.py

Lines changed: 15 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
11
"""Structure of TimedTextTrack related models API responses with DRF serializers."""
22

3-
from urllib.parse import quote_plus
4-
53
from django.conf import settings
6-
from django.utils.text import slugify
74

85
from rest_framework import serializers
96

10-
from marsha.core.defaults import CELERY_PIPELINE
7+
from marsha.core.defaults import AWS_STORAGE_BASE_DIRECTORY, CELERY_PIPELINE
118
from marsha.core.models import TimedTextTrack
12-
from marsha.core.serializers.base import TimestampField, get_video_cloudfront_url_params
9+
from marsha.core.serializers.base import TimestampField
1310
from marsha.core.storage.storage_class import file_storage
14-
from marsha.core.utils import cloudfront_utils, time_utils
11+
from marsha.core.utils import time_utils
1512

1613

1714
class TimedTextTrackSerializer(serializers.ModelSerializer):
@@ -80,51 +77,8 @@ def create(self, validated_data):
8077

8178
return super().create(validated_data)
8279

83-
def _sign_url(self, url, video_id):
84-
"""Generate a presigned cloudfront url.
85-
86-
Parameters
87-
----------
88-
url: string
89-
The url to sign
90-
91-
Returns:
92-
string
93-
The signed url
94-
95-
"""
96-
params = get_video_cloudfront_url_params(video_id)
97-
return cloudfront_utils.build_signed_url(url, params)
98-
99-
def _generate_url(self, obj, object_path, extension=None, content_disposition=None):
100-
"""Generate an url to fetch a timed text track file depending on argument passed.
101-
102-
Parameters:
103-
obj : Type[models.TimedTextTrack]
104-
The timed text track that we want to serialize
105-
106-
object_patch: string
107-
The path in the path the timed text track is stored
108-
109-
extension: string or None
110-
If the timed text track need an extension in the url, add it to the end
111-
112-
content_disposition: string or None
113-
Add a response-content-disposition query string to url if present
114-
"""
115-
base = f"{settings.AWS_S3_URL_PROTOCOL}://{settings.CLOUDFRONT_DOMAIN}/{obj.video_id}"
116-
stamp = time_utils.to_timestamp(obj.uploaded_on)
117-
mode = f"_{obj.mode}" if obj.mode else ""
118-
url = f"{base}/{object_path}/{stamp}_{obj.language:s}{mode:s}"
119-
if extension:
120-
url = f"{url}.{extension}"
121-
122-
if content_disposition:
123-
url = f"{url}?response-content-disposition={content_disposition}"
124-
return url
125-
12680
def get_source_url(self, obj):
127-
"""Source url of the timed text track, signed with a CloudFront key if activated.
81+
"""Source url of the timed text track.
12882
12983
This is the url of the uploaded file without any modification.
13084
@@ -150,21 +104,15 @@ def get_source_url(self, obj):
150104

151105
return file_storage.url(f"{base}/source.{obj.extension}")
152106

153-
# Default AWS fallback
154-
filename = f"{slugify(obj.video.playlist.title)}_{stamp}.{obj.extension}"
155-
url = self._generate_url(
156-
obj,
157-
"timedtext/source",
158-
content_disposition=quote_plus(f"attachment; filename={filename}"),
159-
)
107+
# Default fallback to location under "aws" directory
108+
base = obj.get_storage_prefix(base_dir=AWS_STORAGE_BASE_DIRECTORY)
109+
stamp = time_utils.to_timestamp(obj.uploaded_on)
110+
mode = f"_{obj.mode}" if obj.mode else ""
160111

161-
# Sign the url only if the functionality is activated
162-
if settings.CLOUDFRONT_SIGNED_URLS_ACTIVE:
163-
url = self._sign_url(url, obj.video_id)
164-
return url
112+
return file_storage.url(f"{base}/source/{stamp}_{obj.language:s}{mode:s}")
165113

166114
def get_url(self, obj):
167-
"""Url of the timed text track, signed with a CloudFront key if activated.
115+
"""Url of the timed text track.
168116
169117
Parameters
170118
----------
@@ -187,10 +135,9 @@ def get_url(self, obj):
187135

188136
return file_storage.url(f"{base}/{stamp}.vtt")
189137

190-
# Default AWS fallback
191-
url = self._generate_url(obj, "timedtext", extension="vtt")
138+
# Default fallback to location under "aws" directory
139+
base = obj.get_storage_prefix(base_dir=AWS_STORAGE_BASE_DIRECTORY)
140+
stamp = time_utils.to_timestamp(obj.uploaded_on)
141+
mode = f"_{obj.mode}" if obj.mode else ""
192142

193-
# Sign the url only if the functionality is activated
194-
if settings.CLOUDFRONT_SIGNED_URLS_ACTIVE:
195-
url = self._sign_url(url, obj.video_id)
196-
return url
143+
return file_storage.url(f"{base}/{stamp}_{obj.language:s}{mode:s}.vtt")

src/backend/marsha/core/tests/api/timed_text_tracks/test_retrieve.py

Lines changed: 14 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,21 @@
33
from datetime import datetime, timezone as baseTimezone
44
import json
55
import random
6-
from unittest import mock
76

87
from django.test import TestCase, override_settings
98

109
from marsha.core import factories, models
11-
from marsha.core.api import timezone
1210
from marsha.core.defaults import AWS_PIPELINE
1311
from marsha.core.factories import TimedTextTrackFactory, UserFactory, VideoFactory
1412
from marsha.core.simple_jwt.factories import (
1513
InstructorOrAdminLtiTokenFactory,
1614
StudentLtiTokenFactory,
1715
UserAccessTokenFactory,
1816
)
19-
from marsha.core.tests.testing_utils import RSA_KEY_MOCK
17+
18+
19+
# flake8: noqa: E501
20+
# pylint: disable=line-too-long
2021

2122

2223
class TimedTextTrackRetrieveAPITest(TestCase):
@@ -55,7 +56,7 @@ def test_api_timed_text_track_read_detail_student(self):
5556
content, {"detail": "You do not have permission to perform this action."}
5657
)
5758

58-
@override_settings(CLOUDFRONT_SIGNED_URLS_ACTIVE=False)
59+
@override_settings(MEDIA_URL="https://abc.svc.edge.scw.cloud/")
5960
def test_api_timed_text_track_read_detail_token_user(self):
6061
"""A token user associated to a video can read a timed text track related to this video."""
6162
timed_text_track = TimedTextTrackFactory(
@@ -91,12 +92,11 @@ def test_api_timed_text_track_read_detail_token_user(self):
9192
"language": "fr",
9293
"upload_state": "ready",
9394
"source_url": (
94-
"https://abc.cloudfront.net/b8d40ed7-95b8-4848-98c9-50728dfee25d/"
95-
"timedtext/source/1533686400_fr_cc?response-content-disposition=a"
96-
"ttachment%3B+filename%3Dfoo_1533686400.srt"
95+
"https://abc.svc.edge.scw.cloud/aws/b8d40ed7-95b8-4848-98c9-50728dfee25d/"
96+
"timedtext/source/1533686400_fr_cc"
9797
),
9898
"url": (
99-
"https://abc.cloudfront.net/b8d40ed7-95b8-4848-98c9-50728dfee25d/"
99+
"https://abc.svc.edge.scw.cloud/aws/b8d40ed7-95b8-4848-98c9-50728dfee25d/"
100100
"timedtext/1533686400_fr_cc.vtt"
101101
),
102102
"video": str(timed_text_track.video.id),
@@ -116,7 +116,7 @@ def test_api_timed_text_track_read_detail_token_user(self):
116116
content, {"detail": "You do not have permission to perform this action."}
117117
)
118118

119-
@override_settings(CLOUDFRONT_SIGNED_URLS_ACTIVE=False)
119+
@override_settings(MEDIA_URL="https://abc.svc.edge.scw.cloud/")
120120
def test_api_timed_text_track_without_extension_read_detail_token_user(self):
121121
"""A timed text track without extension should return empty source url."""
122122
timed_text_track = TimedTextTrackFactory(
@@ -152,7 +152,7 @@ def test_api_timed_text_track_without_extension_read_detail_token_user(self):
152152
"upload_state": "ready",
153153
"source_url": None,
154154
"url": (
155-
"https://abc.cloudfront.net/b8d40ed7-95b8-4848-98c9-50728dfee25d/"
155+
"https://abc.svc.edge.scw.cloud/aws/b8d40ed7-95b8-4848-98c9-50728dfee25d/"
156156
"timedtext/1533686400_fr_cc.vtt"
157157
),
158158
"video": str(timed_text_track.video.id),
@@ -172,7 +172,7 @@ def test_api_timed_text_track_without_extension_read_detail_token_user(self):
172172
content, {"detail": "You do not have permission to perform this action."}
173173
)
174174

175-
@override_settings(CLOUDFRONT_SIGNED_URLS_ACTIVE=False)
175+
@override_settings(MEDIA_URL="https://abc.svc.edge.scw.cloud/")
176176
def test_api_timed_text_track_read_detail_admin_user(self):
177177
"""Admin user associated to a video can read a timed text track related to this video."""
178178
timed_text_track = TimedTextTrackFactory(
@@ -209,12 +209,11 @@ def test_api_timed_text_track_read_detail_admin_user(self):
209209
"language": "fr",
210210
"upload_state": "ready",
211211
"source_url": (
212-
"https://abc.cloudfront.net/b8d40ed7-95b8-4848-98c9-50728dfee25d/timedtext/"
213-
"source/1533686400_fr_cc?response-content-disposition=attachment%3B+filenam"
214-
"e%3Dfoo_1533686400.srt"
212+
"https://abc.svc.edge.scw.cloud/aws/b8d40ed7-95b8-4848-98c9-50728dfee25d/timedtext/"
213+
"source/1533686400_fr_cc"
215214
),
216215
"url": (
217-
"https://abc.cloudfront.net/b8d40ed7-95b8-4848-98c9-50728dfee25d/timedtext/"
216+
"https://abc.svc.edge.scw.cloud/aws/b8d40ed7-95b8-4848-98c9-50728dfee25d/timedtext/"
218217
"1533686400_fr_cc.vtt"
219218
),
220219
"video": str(timed_text_track.video.id),
@@ -258,7 +257,6 @@ def test_api_timed_text_track_read_instructor_in_read_only(self):
258257
)
259258
self.assertEqual(response.status_code, 403)
260259

261-
@override_settings(CLOUDFRONT_SIGNED_URLS_ACTIVE=False)
262260
def test_api_timed_text_track_read_detail_token_user_no_active_stamp(self):
263261
"""A timed text track with no active stamp should not fail.
264262
@@ -279,7 +277,6 @@ def test_api_timed_text_track_read_detail_token_user_no_active_stamp(self):
279277
content = json.loads(response.content)
280278
self.assertIsNone(content["url"])
281279

282-
@override_settings(CLOUDFRONT_SIGNED_URLS_ACTIVE=False)
283280
def test_api_timed_text_track_read_detail_token_user_not_ready(self):
284281
"""A timed_text_track that has never been uploaded successfully should have no url."""
285282
timed_text_track = TimedTextTrackFactory(
@@ -299,66 +296,6 @@ def test_api_timed_text_track_read_detail_token_user_not_ready(self):
299296
content = json.loads(response.content)
300297
self.assertIsNone(content["url"])
301298

302-
@override_settings(
303-
CLOUDFRONT_SIGNED_URLS_ACTIVE=True,
304-
CLOUDFRONT_SIGNED_PUBLIC_KEY_ID="cloudfront-access-key-id",
305-
)
306-
@mock.patch("builtins.open", new_callable=mock.mock_open, read_data=RSA_KEY_MOCK)
307-
def test_api_timed_text_track_read_detail_token_user_signed_urls(self, _mock_open):
308-
"""Activating signed urls should add Cloudfront query string authentication parameters."""
309-
timed_text_track = TimedTextTrackFactory(
310-
video__pk="b8d40ed7-95b8-4848-98c9-50728dfee25d",
311-
video__playlist__title="foo",
312-
mode="cc",
313-
language="fr",
314-
uploaded_on=datetime(2018, 8, 8, tzinfo=baseTimezone.utc),
315-
upload_state="ready",
316-
process_pipeline=AWS_PIPELINE,
317-
extension="srt",
318-
)
319-
jwt_token = InstructorOrAdminLtiTokenFactory(
320-
playlist=timed_text_track.video.playlist
321-
)
322-
323-
# Get the timed_text_track via the API using the JWT token
324-
# fix the time so that the url signature is deterministic and can be checked
325-
now = datetime(2018, 8, 8, tzinfo=baseTimezone.utc)
326-
with mock.patch.object(timezone, "now", return_value=now):
327-
response = self.client.get(
328-
self._get_url(timed_text_track.video, timed_text_track),
329-
HTTP_AUTHORIZATION=f"Bearer {jwt_token}",
330-
)
331-
self.assertEqual(response.status_code, 200)
332-
content = json.loads(response.content)
333-
334-
expected_cloudfront_signature = (
335-
"Policy=eyJTdGF0ZW1lbnQiOlt7IlJlc291cmNlIjoiaHR0cHM6"
336-
"Ly9hYmMuY2xvdWRmcm9udC5uZXQvYjhkNDBlZDctOTViOC00ODQ4LTk4YzktNTA3MjhkZmVlM"
337-
"jVkLyoiLCJDb25kaXRpb24iOnsiRGF0ZUxlc3NUaGFuIjp7IkFXUzpFcG9jaFRpbWUiOjE1Mz"
338-
"M2OTM2MDB9fX1dfQ__&Signature=PkRZOcfOxbalcuNG9XN6wO72enDenSetWgTthNjR4Nsy"
339-
"UvCao1rZ9s4MZbqU61NDxB8Q3yDoWZUm-PP0uFa6v2Rz9g6XSTCA~-x8Yhh72-jc1J5NZOavh"
340-
"~HT6lbC2HnPAesaxbVG4EejSDuXjncE8kBiUdT6YNotAv1JzbqidXuOBdkSjR32PEav98PT0r"
341-
"UKmXohNAL-RFdwHL1cKGhy17CoxABn4ToDJ-t0Z4cT4husb5HebH~6nOmhlDDdFMSdmD7FjZ~"
342-
"qaJwagJ3sAqG1ph9NcTX45bDn2rcrDXUy0jHWxBPYUId6NGbKCITp1SFj0QAsoxsXnh90Ibkr"
343-
"GQ4XUA__&Key-Pair-Id=cloudfront-access-key-id"
344-
)
345-
346-
self.assertEqual(
347-
content["url"],
348-
(
349-
"https://abc.cloudfront.net/b8d40ed7-95b8-4848-98c9-50728dfee25d/timedtext"
350-
f"/1533686400_fr_cc.vtt?{expected_cloudfront_signature}"
351-
),
352-
)
353-
self.assertEqual(
354-
content["source_url"],
355-
(
356-
"https://abc.cloudfront.net/b8d40ed7-95b8-4848-98c9-50728dfee25d/timedtext"
357-
"/source/1533686400_fr_cc?response-content-disposition=attachment%3B+filen"
358-
f"ame%3Dfoo_1533686400.srt&{expected_cloudfront_signature}"
359-
),
360-
)
361-
362299
def test_api_timed_text_track_read_detail_staff_or_user(self):
363300
"""Users authenticated via a session are not allowed to read a timed text track detail."""
364301
for user in [UserFactory(), UserFactory(is_staff=True)]:

src/backend/marsha/core/tests/serializers/test_timed_text_track.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
class TimedTextTrackSerializerTest(TestCase):
1313
"""Test the TimedTextTrack serializer."""
1414

15+
@override_settings(MEDIA_URL="https://abc.svc.edge.scw.cloud/")
1516
def test_timed_text_track_serializer_urls_with_aws_pipeline(self):
1617
"""The TimedTextTrackSerializer should return AWS URLs."""
1718
date = datetime(2022, 1, 1, tzinfo=baseTimezone.utc)
@@ -32,18 +33,15 @@ def test_timed_text_track_serializer_urls_with_aws_pipeline(self):
3233
)
3334
serializer = TimedTextTrackSerializer(timed_text_track)
3435
self.assertEqual(
35-
f"https://abc.cloudfront.net/{video.pk}/timedtext/1640995200_fr_ts.vtt",
36+
f"https://abc.svc.edge.scw.cloud/aws/{video.pk}/timedtext/1640995200_fr_ts.vtt",
3637
serializer.data["url"],
3738
)
3839
self.assertEqual(
39-
f"https://abc.cloudfront.net/{video.pk}/timedtext/source/1640995200_fr_ts"
40-
"?response-content-disposition=attachment%3B+filename%3Dplaylist-001_1640995200.srt",
40+
f"https://abc.svc.edge.scw.cloud/aws/{video.pk}/timedtext/source/1640995200_fr_ts",
4141
serializer.data["source_url"],
4242
)
4343

44-
@override_settings(
45-
MEDIA_URL="https://abc.cloudfront.net/",
46-
)
44+
@override_settings(MEDIA_URL="https://abc.svc.edge.scw.cloud/")
4745
def test_timed_text_track_serializer_urls_with_celery_pipeline(self):
4846
"""The TimedTextTrackSerializer should return videos storage URLs."""
4947
date = datetime(2022, 1, 1, tzinfo=baseTimezone.utc)
@@ -63,12 +61,12 @@ def test_timed_text_track_serializer_urls_with_celery_pipeline(self):
6361
)
6462
serializer = TimedTextTrackSerializer(timed_text_track)
6563
self.assertEqual(
66-
f"https://abc.cloudfront.net/vod/{video.pk}/timedtext/"
64+
f"https://abc.svc.edge.scw.cloud/vod/{video.pk}/timedtext/"
6765
f"{timed_text_track.pk}/1640995200/1640995200.vtt",
6866
serializer.data["url"],
6967
)
7068
self.assertEqual(
71-
f"https://abc.cloudfront.net/vod/{video.pk}/timedtext/"
69+
f"https://abc.svc.edge.scw.cloud/vod/{video.pk}/timedtext/"
7270
f"{timed_text_track.pk}/1640995200/source.srt",
7371
serializer.data["source_url"],
7472
)

0 commit comments

Comments
 (0)