Skip to content

Commit 1c044ed

Browse files
committed
[WIP] Introduce SUMMARY_MAX_PARAGRAPHS
Let the users use only the n-first paragraphs of the article as article summary. The advantage of this approach is that we avoid the random word count ellipsis that will cut content in pieces, while not having to copy the first paragraph of the article into the article's summary metadata. If both SUMMARY_MAX_PARAGRAPHS and SUMMARY_MAX_LENGTH are set, then the SUMMARY_MAX_LENGTH option will apply to the number of paragraphs in SUMMARY_MAX_PARAGRAPHS.
1 parent 7194cf5 commit 1c044ed

File tree

5 files changed

+75
-1
lines changed

5 files changed

+75
-1
lines changed

RELEASE.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Release type: minor
2+
3+
Allow users to use n-first paragraphs as article summary.

pelican/contents.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
sanitised_join,
2525
set_date_tzinfo,
2626
slugify,
27+
truncate_html_paragraphs,
2728
truncate_html_words,
2829
)
2930

@@ -431,8 +432,17 @@ def get_summary(self, siteurl):
431432
if "summary" in self.metadata:
432433
return self.metadata["summary"]
433434

435+
content = self.content
436+
if (
437+
"SUMMARY_MAX_PARAGRAPHS" in self.settings
438+
and self.settings["SUMMARY_MAX_PARAGRAPHS"] is not None
439+
):
440+
content = truncate_html_paragraphs(
441+
self.content, self.settings["SUMMARY_MAX_PARAGRAPHS"]
442+
)
443+
434444
if self.settings["SUMMARY_MAX_LENGTH"] is None:
435-
return self.content
445+
return content
436446

437447
return truncate_html_words(
438448
self.content,

pelican/tests/test_contents.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,31 @@ def test_summary_max_length(self):
117117
page = Page(**page_kwargs)
118118
self.assertEqual(page.summary, "")
119119

120+
def test_summary_paragraph(self):
121+
# If a :SUMMARY_MAX_PARAGRAPHS: is set, the generated summary should
122+
# not exceed the given paragraph count.
123+
page_kwargs = self._copy_page_kwargs()
124+
settings = get_settings()
125+
page_kwargs["settings"] = settings
126+
del page_kwargs["metadata"]["summary"]
127+
settings["SUMMARY_MAX_PARAGRAPHS"] = 1
128+
settings["SUMMARY_MAX_LENGTH"] = None
129+
page = Page(**page_kwargs)
130+
self.assertEqual(page.summary, TEST_CONTENT)
131+
132+
def test_summary_paragraph_max_length(self):
133+
# If a :SUMMARY_MAX_PARAGRAPHS: and :SUMMARY_MAX_LENGTH: are set, the
134+
# generated summary should not exceed the given paragraph count and
135+
# not exceed the given length.
136+
page_kwargs = self._copy_page_kwargs()
137+
settings = get_settings()
138+
page_kwargs["settings"] = settings
139+
del page_kwargs["metadata"]["summary"]
140+
settings["SUMMARY_MAX_PARAGRAPHS"] = 1
141+
settings["SUMMARY_MAX_LENGTH"] = 10
142+
page = Page(**page_kwargs)
143+
self.assertEqual(page.summary, truncate_html_words(TEST_CONTENT, 10))
144+
120145
def test_summary_end_suffix(self):
121146
# If a :SUMMARY_END_SUFFIX: is set, and there is no other summary,
122147
# generated summary should contain the specified marker at the end.

pelican/tests/test_utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,23 @@ def test_truncate_html_words(self):
401401
self.assertEqual(utils.truncate_html_words("&#1234 text", 20), "&#1234 text")
402402
self.assertEqual(utils.truncate_html_words("&#xabc text", 20), "&#xabc text")
403403

404+
def test_truncate_html_paragraphs(self):
405+
one = "<p>one</p>"
406+
407+
self.assertEqual(utils.truncate_html_paragraphs(one, 0), "")
408+
self.assertEqual(utils.truncate_html_paragraphs(one, 1), one)
409+
self.assertEqual(utils.truncate_html_paragraphs(one, 2), one)
410+
411+
two = one + "<p>two</p>"
412+
self.assertEqual(utils.truncate_html_paragraphs(two, 1), one)
413+
self.assertEqual(utils.truncate_html_paragraphs(two, 2), two)
414+
415+
three = two + "<p>three</p>"
416+
self.assertEqual(utils.truncate_html_paragraphs(three, 1), one)
417+
self.assertEqual(utils.truncate_html_paragraphs(three, 2), two)
418+
self.assertEqual(utils.truncate_html_paragraphs(three, 3), three)
419+
self.assertEqual(utils.truncate_html_paragraphs(three, 4), three)
420+
404421
def test_process_translations(self):
405422
fr_articles = []
406423
en_articles = []

pelican/utils.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,25 @@ def truncate_html_words(s, num, end_text="…"):
600600
return out
601601

602602

603+
def truncate_html_paragraphs(s, count):
604+
"""Truncates HTML to a certain number of paragraphs.
605+
606+
:param count: number of paragraphs to keep
607+
608+
Newlines in the HTML are preserved.
609+
"""
610+
paragraphs = []
611+
tag_stop = 0
612+
substr = s[:]
613+
for i in range(count):
614+
substr = substr[tag_stop:]
615+
tag_start = substr.find("<p>")
616+
tag_stop = substr.find("</p>") + len("</p>")
617+
paragraphs.append(substr[tag_start:tag_stop])
618+
619+
return "".join(paragraphs)
620+
621+
603622
def process_translations(content_list, translation_id=None):
604623
"""Finds translations and returns them.
605624

0 commit comments

Comments
 (0)