Skip to content

Commit

Permalink
Merge pull request #619 from ZeitOnline/ZO-4683_article_tts_reference
Browse files Browse the repository at this point in the history
ZO-4683: Check date_last_modified instead of checksum
  • Loading branch information
stollero committed Feb 19, 2024
2 parents 2fc9501 + c23b3d2 commit 47ae963
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 33 deletions.
1 change: 1 addition & 0 deletions core/docs/changelog/ZO-4683.change
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ZO-4683: Only publish article after referencing tts audio if it is unchanged
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
<attribute ns="http://namespaces.zeit.de/CMS/workflow" name="corrected">yes</attribute>
<attribute ns="http://namespaces.zeit.de/CMS/document" name="date_first_released">2022-08-09T06:11:30+00:00</attribute>
<attribute ns="http://namespaces.zeit.de/CMS/document" name="date_last_checkout">2022-08-10T21:35:24.546250+00:00</attribute>
<attribute ns="http://namespaces.zeit.de/CMS/document" name="date_last_modified">2022-08-10T21:35:24.546250+00:00</attribute>
<attribute ns="http://namespaces.zeit.de/CMS/workflow" name="date_last_published">2022-08-10T21:35:23.950553+00:00</attribute>
<attribute ns="http://namespaces.zeit.de/CMS/workflow" name="date_last_published_semantic">2022-08-09T06:11:30+00:00</attribute>
<attribute ns="http://namespaces.zeit.de/CMS/workflow" name="edited">yes</attribute>
Expand Down
39 changes: 21 additions & 18 deletions core/src/zeit/speech/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
from zeit.cms.repository.interfaces import IFolder
from zeit.cms.workflow.interfaces import IPublish, IPublishInfo
from zeit.connector.search import SearchVar
from zeit.content.article.interfaces import IArticle, ISpeechbertChecksum
from zeit.content.article.interfaces import IArticle
from zeit.content.audio.audio import AUDIO_SCHEMA_NS, Audio
from zeit.content.audio.interfaces import IAudio, IAudioReferences, ISpeechInfo
from zeit.speech.errors import ChecksumMismatchError
from zeit.speech.errors import AudioReferenceError
import zeit.cms.interfaces
import zeit.cms.repository.folder
import zeit.speech.interfaces
Expand Down Expand Up @@ -57,7 +57,7 @@ def _create(self, data: dict) -> IAudio:
speech.audio_type = 'tts'
folder = self._get_target_folder(data['uuid'])
folder[data['uuid']] = speech
log.info('Text-to-speech was created for article uuid %s', data['uuid'])
log.info('Created %s for article uuid %s', speech, data['uuid'])
self._update(data, folder[data['uuid']])
return folder[data['uuid']]

Expand All @@ -73,15 +73,15 @@ def _update(self, data: dict, speech: IAudio):
elif audio['type'] == 'PREVIEW_TTS':
ISpeechInfo(co).preview_url = audio_entry['url']
ISemanticChange(co).last_semantic_change = datetime.now(pytz.UTC)
log.info('Text-to-speech %s was updated for article uuid %s', speech.uniqueId, data['uuid'])
log.info('Updated %s for article uuid %s', speech, data['uuid'])

def _find(self, article_uuid: str) -> Optional[IAudio]:
connector = zope.component.getUtility(zeit.connector.interfaces.IConnector)
result = list(connector.search([AUDIO_ID], (AUDIO_ID == str(article_uuid))))
if not result:
return None
content = zeit.cms.interfaces.ICMSContent(result[0][0])
log.debug('Text-to-speech %s found for %s.', content.uniqueId, article_uuid)
log.debug('%s found for article uuid %s.', content, article_uuid)
return content

def update(self, data: dict):
Expand All @@ -94,36 +94,39 @@ def update(self, data: dict):
self._add_audio_reference(speech)

def _add_audio_reference(self, speech: IAudio):
article = self._assert_checksum_matches(speech)
IPublish(speech).publish(background=False)

article = self._assert_article_unchanged(speech)
if speech in IAudioReferences(article).items:
log.debug('%s already references %s', article, speech)
return
with checked_out(article, raise_if_error=True) as co:
references = IAudioReferences(co)
references.add(speech)
log.info('Added reference from %s to %s', article, speech)
IPublish(article).publish(background=False)

def _article(self, speech: IAudio) -> IArticle:
return zeit.cms.interfaces.ICMSContent(
zeit.cms.content.interfaces.IUUID(ISpeechInfo(speech).article_uuid), None
)

def _assert_checksum_matches(self, speech: IAudio) -> IArticle:
def _assert_article_unchanged(self, speech: IAudio) -> IArticle:
article = self._article(speech)
article_checksum = ISpeechbertChecksum(article).calculate()
if article_checksum != ISpeechInfo(speech).checksum:
raise ChecksumMismatchError(
'Speechbert checksum mismatch for article %s and speech %s',
article.uniqueId,
speech.uniqueId,
)
return article
pub_status = zeit.cms.workflow.interfaces.IPublicationStatus(article).published
if pub_status == 'published':
return article
raise AudioReferenceError(
'%s was modified after publish. Skipped adding reference %s.',
article,
speech,
)

def _remove_reference_from_article(self, speech: IAudio):
article = self._article(speech)
if not article:
log.warning(
'No article found for Text-to-speech %s. ' 'Maybe it was already deleted?',
'No article found for %s. Maybe it was already deleted?',
speech,
)
return
Expand All @@ -135,12 +138,12 @@ def delete(self, data: dict):
speech = self._find(data['article_uuid'])
if not speech:
log.warning(
'No Text-to-speech found for article uuid %s. '
'No audio object found for article uuid %s. '
'Maybe it was already deleted?' % data['article_uuid'],
)
return
self._remove_reference_from_article(speech)
IPublish(speech).retract(background=False)
unique_id = speech.uniqueId
del speech.__parent__[speech.__name__]
log.info('Text-to-speech %s successfully deleted.', unique_id)
log.info('Deleted %s', unique_id)
7 changes: 5 additions & 2 deletions core/src/zeit/speech/errors.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
class ChecksumMismatchError(Exception):
"""An exception raised when the checksum of the article and the speech do not match."""
class AudioReferenceError(Exception):
"""
An exception raised if the article was updated after publish, to avoid publishing it with
unreviewed changes
"""
32 changes: 19 additions & 13 deletions core/src/zeit/speech/tests/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from zeit.content.audio.interfaces import IAudioReferences, ISpeechInfo
from zeit.content.audio.testing import AudioBuilder
from zeit.speech.connection import Speech
from zeit.speech.errors import ChecksumMismatchError
from zeit.speech.errors import AudioReferenceError
from zeit.speech.testing import TTS_CREATED, TTS_DELETED, FunctionalTestCase
import zeit.cms.checkout.interfaces
import zeit.cms.workflow.mock
Expand Down Expand Up @@ -87,23 +87,29 @@ def test_update_audio_without_touching_the_article(self):
assert zeit.cms.workflow.mock._publish_count[article.uniqueId] == 2
assert zeit.cms.workflow.mock._publish_count[audio.uniqueId] == 2

def test_if_checksum_does_not_match_do_not_add_reference(self):
tts_msg = self.setup_speech_message('checksum', 'cake')
with pytest.raises(ChecksumMismatchError):
self.create_audio(tts_msg)
def test_if_article_changed_do_not_add_reference(self):
IPublish(self.article).publish(background=False)
with checked_out(self.article) as co:
paragraph = co.body.create_item('p')
paragraph.text = 'the article has changed'
with pytest.raises(AudioReferenceError):
self.create_audio(TTS_CREATED)
article = ICMSContent(self.article_uid)
reference = IAudioReferences(article)
assert not reference.items

def test_update_audio_fails_because_checksum_is_not_matching(self):
def test_update_audio_fails_if_article_changed(self):
audio = self.create_audio(TTS_CREATED)
assert zeit.cms.workflow.mock._publish_count[audio.uniqueId] == 1

tts_msg = self.setup_speech_message('checksum', 'cake')
article = ICMSContent(self.article_uid)
reference = IAudioReferences(article)
assert audio in reference.items
with checked_out(self.article) as co:
paragraph = co.body.create_item('p')
paragraph.text = 'the article has changed'
self.repository.connector.search_result = [(self.article.uniqueId)]
with mock.patch('zeit.speech.connection.Speech._find', return_value=audio):
with pytest.raises(ChecksumMismatchError):
Speech().update(tts_msg)
with pytest.raises(AudioReferenceError):
Speech().update(TTS_CREATED)

def test_handle_delete_event(self):
audio = self.create_audio(TTS_CREATED)
Expand Down Expand Up @@ -137,5 +143,5 @@ def test_unable_to_remove_anything_because_article_is_missing(self):
self.repository.connector.search_result = []
with mock.patch('zeit.speech.connection.Speech._find', return_value=audio):
Speech().delete(TTS_DELETED)
assert f'No article found for Text-to-speech {audio}' in self.caplog.text
assert not self.repository.has_key('audio')
assert f'No article found for {audio}' in self.caplog.text
assert 'audio' not in self.repository

0 comments on commit 47ae963

Please sign in to comment.