diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 0000000000..c4272ec5b0 --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,3 @@ +Release type: minor + +Allow users to use n-first paragraphs as article summary. diff --git a/docs/content.rst b/docs/content.rst index cacacea9aa..dcb9930968 100644 --- a/docs/content.rst +++ b/docs/content.rst @@ -162,7 +162,10 @@ author you can use ``author`` field. If you do not explicitly specify summary metadata for a given post, the ``SUMMARY_MAX_LENGTH`` setting can be used to specify how many words from the -beginning of an article are used as the summary. +beginning of an article are used as the summary. You can also use the first N +paragraphs from the post as a summary using the ``SUMMARY_MAX_PARAGRAPHS`` +setting. If both options are in use, the specified number of paragraphs will +be used but may be truncated to respect the specified max length. You can also extract any metadata from the filename through a regular expression to be set in the ``FILENAME_METADATA`` setting. All named groups diff --git a/docs/settings.rst b/docs/settings.rst index e9edffde0e..d0c14d043a 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -308,6 +308,14 @@ Basic settings does not otherwise specify a summary. Setting to ``None`` will cause the summary to be a copy of the original content. +.. data:: SUMMARY_MAX_PARAGRAPHS = 1 + + When creating a short summary of an article, this will be the number of + paragraphs to use as the summary. This only applies if your content + does not otherwise specify a summary. Setting to ``None`` will cause the + summary to use the whole text (up to ``SUMMARY_MAX_LENGTH``) instead of just + the first N paragraphs. + .. data:: SUMMARY_END_SUFFIX = '…' When creating a short summary of an article and the result was truncated to diff --git a/pelican/contents.py b/pelican/contents.py index 474e5bbfea..51e89123af 100644 --- a/pelican/contents.py +++ b/pelican/contents.py @@ -24,6 +24,7 @@ sanitised_join, set_date_tzinfo, slugify, + truncate_html_paragraphs, truncate_html_words, ) @@ -431,8 +432,17 @@ def get_summary(self, siteurl): if "summary" in self.metadata: return self.metadata["summary"] + content = self.content + if ( + "SUMMARY_MAX_PARAGRAPHS" in self.settings + and self.settings["SUMMARY_MAX_PARAGRAPHS"] is not None + ): + content = truncate_html_paragraphs( + self.content, self.settings["SUMMARY_MAX_PARAGRAPHS"] + ) + if self.settings["SUMMARY_MAX_LENGTH"] is None: - return self.content + return content return truncate_html_words( self.content, diff --git a/pelican/tests/test_contents.py b/pelican/tests/test_contents.py index 9dc7b70d71..96890fecca 100644 --- a/pelican/tests/test_contents.py +++ b/pelican/tests/test_contents.py @@ -117,6 +117,31 @@ def test_summary_max_length(self): page = Page(**page_kwargs) self.assertEqual(page.summary, "") + def test_summary_paragraph(self): + # If a :SUMMARY_MAX_PARAGRAPHS: is set, the generated summary should + # not exceed the given paragraph count. + page_kwargs = self._copy_page_kwargs() + settings = get_settings() + page_kwargs["settings"] = settings + del page_kwargs["metadata"]["summary"] + settings["SUMMARY_MAX_PARAGRAPHS"] = 1 + settings["SUMMARY_MAX_LENGTH"] = None + page = Page(**page_kwargs) + self.assertEqual(page.summary, TEST_CONTENT) + + def test_summary_paragraph_max_length(self): + # If a :SUMMARY_MAX_PARAGRAPHS: and :SUMMARY_MAX_LENGTH: are set, the + # generated summary should not exceed the given paragraph count and + # not exceed the given length. + page_kwargs = self._copy_page_kwargs() + settings = get_settings() + page_kwargs["settings"] = settings + del page_kwargs["metadata"]["summary"] + settings["SUMMARY_MAX_PARAGRAPHS"] = 1 + settings["SUMMARY_MAX_LENGTH"] = 10 + page = Page(**page_kwargs) + self.assertEqual(page.summary, truncate_html_words(TEST_CONTENT, 10)) + def test_summary_end_suffix(self): # If a :SUMMARY_END_SUFFIX: is set, and there is no other summary, # generated summary should contain the specified marker at the end. diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py index 22dd8e38a5..1ffe44407c 100644 --- a/pelican/tests/test_utils.py +++ b/pelican/tests/test_utils.py @@ -401,6 +401,23 @@ def test_truncate_html_words(self): self.assertEqual(utils.truncate_html_words("Ӓ text", 20), "Ӓ text") self.assertEqual(utils.truncate_html_words("઼ text", 20), "઼ text") + def test_truncate_html_paragraphs(self): + one = "

one

" + + self.assertEqual(utils.truncate_html_paragraphs(one, 0), "") + self.assertEqual(utils.truncate_html_paragraphs(one, 1), one) + self.assertEqual(utils.truncate_html_paragraphs(one, 2), one) + + two = one + "

two

" + self.assertEqual(utils.truncate_html_paragraphs(two, 1), one) + self.assertEqual(utils.truncate_html_paragraphs(two, 2), two) + + three = two + "

three

" + self.assertEqual(utils.truncate_html_paragraphs(three, 1), one) + self.assertEqual(utils.truncate_html_paragraphs(three, 2), two) + self.assertEqual(utils.truncate_html_paragraphs(three, 3), three) + self.assertEqual(utils.truncate_html_paragraphs(three, 4), three) + def test_process_translations(self): fr_articles = [] en_articles = [] diff --git a/pelican/utils.py b/pelican/utils.py index eda53d3f54..dfe05d216a 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -600,6 +600,25 @@ def truncate_html_words(s, num, end_text="…"): return out +def truncate_html_paragraphs(s, count): + """Truncates HTML to a certain number of paragraphs. + + :param count: number of paragraphs to keep + + Newlines in the HTML are preserved. + """ + paragraphs = [] + tag_stop = 0 + substr = s[:] + for i in range(count): + substr = substr[tag_stop:] + tag_start = substr.find("

") + tag_stop = substr.find("

") + len("

") + paragraphs.append(substr[tag_start:tag_stop]) + + return "".join(paragraphs) + + def process_translations(content_list, translation_id=None): """Finds translations and returns them.