From f19de98b9e6495bf42154ef6dd763b6b8c486fde Mon Sep 17 00:00:00 2001 From: boxydog Date: Mon, 17 Jun 2024 08:11:21 -0500 Subject: [PATCH] Log warnings about files that would have been processed by disabled readers --- pelican/__init__.py | 5 +++-- pelican/generators.py | 27 +++++++++++++++++++++++- pelican/readers.py | 39 ++++++++++++++++++++++++++++++----- pelican/tests/test_pelican.py | 24 ++++++++++++++++++++- pelican/tests/test_readers.py | 15 +++++++++++++- pelican/tests/test_utils.py | 7 +++++++ pelican/utils.py | 15 +++++++++++--- 7 files changed, 119 insertions(+), 13 deletions(-) diff --git a/pelican/__init__.py b/pelican/__init__.py index 20d177063..96849bae2 100644 --- a/pelican/__init__.py +++ b/pelican/__init__.py @@ -30,7 +30,6 @@ ) from pelican.plugins import signals from pelican.plugins._utils import get_plugin_name, load_plugins -from pelican.readers import Readers from pelican.server import ComplexHTTPRequestHandler, RootedHTTPServer from pelican.settings import read_settings from pelican.utils import clean_output_dir, maybe_pluralize, wait_for_changes @@ -126,6 +125,8 @@ def run(self): for p in generators: if hasattr(p, "generate_context"): p.generate_context() + if hasattr(p, "check_disabled_readers"): + p.check_disabled_readers() # for plugins that create/edit the summary logger.debug("Signal all_generators_finalized.send()") @@ -573,7 +574,7 @@ def autoreload(args, excqueue=None): try: pelican.run() - changed_files = wait_for_changes(args.settings, Readers, settings) + changed_files = wait_for_changes(args.settings, settings) changed_files = {c[1] for c in changed_files} if settings_file in changed_files: diff --git a/pelican/generators.py b/pelican/generators.py index 73b517132..548c494fe 100644 --- a/pelican/generators.py +++ b/pelican/generators.py @@ -7,6 +7,7 @@ from functools import partial from itertools import chain, groupby from operator import attrgetter +from typing import List, Optional, Set from jinja2 import ( BaseLoader, @@ -156,7 +157,9 @@ def _include_path(self, path, extensions=None): return False - def get_files(self, paths, exclude=None, extensions=None): + def get_files( + self, paths, exclude: Optional[List[str]] = None, extensions=None + ) -> Set[str]: """Return a list of files to use, based on rules :param paths: the list pf paths to search (relative to self.path) @@ -250,6 +253,13 @@ def __str__(self): # return the name of the class for logging purposes return self.__class__.__name__ + def _check_disabled_readers(self, paths, exclude: Optional[List[str]]) -> None: + """Log warnings for files that would have been processed by disabled readers.""" + for fil in self.get_files( + paths, exclude=exclude, extensions=self.readers.disabled_extensions + ): + self.readers.check_file(fil) + class CachingGenerator(Generator, FileStampDataCacher): """Subclass of Generator and FileStampDataCacher classes @@ -643,6 +653,11 @@ def generate_pages(self, writer): self.generate_authors(write) self.generate_drafts(write) + def check_disabled_readers(self) -> None: + self._check_disabled_readers( + self.settings["ARTICLE_PATHS"], exclude=self.settings["ARTICLE_EXCLUDES"] + ) + def generate_context(self): """Add the articles into the shared context""" @@ -849,6 +864,11 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) signals.page_generator_init.send(self) + def check_disabled_readers(self) -> None: + self._check_disabled_readers( + self.settings["PAGE_PATHS"], exclude=self.settings["PAGE_EXCLUDES"] + ) + def generate_context(self): all_pages = [] hidden_pages = [] @@ -953,6 +973,11 @@ def __init__(self, *args, **kwargs): self.fallback_to_symlinks = False signals.static_generator_init.send(self) + def check_disabled_readers(self) -> None: + self._check_disabled_readers( + self.settings["STATIC_PATHS"], exclude=self.settings["STATIC_EXCLUDES"] + ) + def generate_context(self): self.staticfiles = [] linked_files = set(self.context["static_links"]) diff --git a/pelican/readers.py b/pelican/readers.py index 422f39fc2..ee7e34668 100644 --- a/pelican/readers.py +++ b/pelican/readers.py @@ -17,7 +17,7 @@ from pelican.cache import FileStampDataCacher from pelican.contents import Author, Category, Page, Tag from pelican.plugins import signals -from pelican.utils import get_date, pelican_open, posixize_path +from pelican.utils import file_suffix, get_date, pelican_open, posixize_path try: from markdown import Markdown @@ -125,6 +125,10 @@ def read(self, source_path): metadata = {} return content, metadata + def disabled_message(self) -> str: + """Message about why this plugin was disabled.""" + return "" + class _FieldBodyTranslator(HTMLTranslator): def __init__(self, document): @@ -347,6 +351,12 @@ def read(self, source_path): metadata = {} return content, metadata + def disabled_message(self) -> str: + return ( + "Could not import markdown.Markdown. " + "Have you installed the markdown package?" + ) + class HTMLReader(BaseReader): """Parses HTML files as input, looking for meta, title, and body tags""" @@ -508,17 +518,23 @@ class Readers(FileStampDataCacher): def __init__(self, settings=None, cache_name=""): self.settings = settings or {} self.readers = {} + self.disabled_readers = {} + # extension => reader for readers that are enabled self.reader_classes = {} + # extension => reader for readers that are not enabled + disabled_reader_classes = {} for cls in [BaseReader] + BaseReader.__subclasses__(): if not cls.enabled: logger.debug( "Missing dependencies for %s", ", ".join(cls.file_extensions) ) - continue for ext in cls.file_extensions: - self.reader_classes[ext] = cls + if cls.enabled: + self.reader_classes[ext] = cls + else: + disabled_reader_classes[ext] = cls if self.settings["READERS"]: self.reader_classes.update(self.settings["READERS"]) @@ -531,6 +547,9 @@ def __init__(self, settings=None, cache_name=""): self.readers[fmt] = reader_class(self.settings) + for fmt, reader_class in disabled_reader_classes.items(): + self.disabled_readers[fmt] = reader_class(self.settings) + # set up caching cache_this_level = ( cache_name != "" and self.settings["CONTENT_CACHING_LAYER"] == "reader" @@ -541,8 +560,13 @@ def __init__(self, settings=None, cache_name=""): @property def extensions(self): + """File extensions that will be processed by a reader.""" return self.readers.keys() + @property + def disabled_extensions(self): + return self.disabled_readers.keys() + def read_file( self, base_path, @@ -562,8 +586,7 @@ def read_file( logger.debug("Read file %s -> %s", source_path, content_class.__name__) if not fmt: - _, ext = os.path.splitext(os.path.basename(path)) - fmt = ext[1:] + fmt = file_suffix(path) if fmt not in self.readers: raise TypeError("Pelican does not know how to parse %s", path) @@ -654,6 +677,12 @@ def typogrify_wrapper(text): context=context, ) + def check_file(self, source_path: str) -> None: + """Log a warning if a file is processed by a disabled reader.""" + reader = self.disabled_readers.get(file_suffix(source_path), None) + if reader: + logger.warning(f"{source_path}: {reader.disabled_message()}") + def find_empty_alt(content, path): """Find images with empty alt diff --git a/pelican/tests/test_pelican.py b/pelican/tests/test_pelican.py index add5f576e..e243be617 100644 --- a/pelican/tests/test_pelican.py +++ b/pelican/tests/test_pelican.py @@ -9,10 +9,11 @@ from collections.abc import Sequence from shutil import rmtree from tempfile import TemporaryDirectory, mkdtemp -from unittest.mock import patch +from unittest.mock import PropertyMock, patch from rich.console import Console +import pelican.readers from pelican import Pelican, __version__, main from pelican.generators import StaticGenerator from pelican.settings import read_settings @@ -303,3 +304,24 @@ def test_main_on_content(self): main(["-o", temp_dir, "pelican/tests/simple_content"]) self.assertIn("Processed 1 article", out.getvalue()) self.assertEqual("", err.getvalue()) + + def test_main_on_content_markdown_disabled(self): + """Invoke main on simple_content directory.""" + with patch.object( + pelican.readers.MarkdownReader, "enabled", new_callable=PropertyMock + ) as attr_mock: + attr_mock.return_value = False + out, err = io.StringIO(), io.StringIO() + with contextlib.redirect_stdout(out), contextlib.redirect_stderr(err): + with TemporaryDirectory() as temp_dir: + # Don't highlight anything. + # See https://rich.readthedocs.io/en/stable/highlighting.html + with patch("pelican.console", new=Console(highlight=False)): + main(["-o", temp_dir, "pelican/tests/simple_content"]) + self.assertIn("Processed 0 articles", out.getvalue()) + self.assertLogCountEqual( + 1, + ".*article_with_md_extension.md: " + "Could not import markdown.Markdown. " + "Have you installed the markdown package?", + ) diff --git a/pelican/tests/test_readers.py b/pelican/tests/test_readers.py index ec366fa84..68938a83a 100644 --- a/pelican/tests/test_readers.py +++ b/pelican/tests/test_readers.py @@ -1,5 +1,5 @@ import os -from unittest.mock import patch +from unittest.mock import PropertyMock, patch from pelican import readers from pelican.tests.support import get_settings, unittest @@ -32,6 +32,19 @@ def assertDictHasSubset(self, dictionary, subset): else: self.fail(f"Expected {key} to have value {value}, but was not in Dict") + def test_markdown_disabled(self): + with patch.object( + readers.MarkdownReader, "enabled", new_callable=PropertyMock + ) as attr_mock: + attr_mock.return_value = False + readrs = readers.Readers(settings=get_settings()) + self.assertEqual( + set(readers.MarkdownReader.file_extensions), + readrs.disabled_readers.keys(), + ) + for val in readrs.disabled_readers.values(): + self.assertEqual(readers.MarkdownReader, val.__class__) + class TestAssertDictHasSubset(ReaderTest): def setUp(self): diff --git a/pelican/tests/test_utils.py b/pelican/tests/test_utils.py index f7f11ffbf..0da59dd46 100644 --- a/pelican/tests/test_utils.py +++ b/pelican/tests/test_utils.py @@ -966,3 +966,10 @@ def get(self, key): container.get.cache.clear() self.assertEqual("bar", container.get("bar")) get_mock.assert_called_once_with("bar") + + +class TestStringUtils(unittest.TestCase): + def test_file_suffix(self): + self.assertEqual("", utils.file_suffix("")) + self.assertEqual("", utils.file_suffix("foo")) + self.assertEqual("md", utils.file_suffix("foo.md")) diff --git a/pelican/utils.py b/pelican/utils.py index a29fdf815..b780ab97f 100644 --- a/pelican/utils.py +++ b/pelican/utils.py @@ -29,6 +29,7 @@ ) import dateutil.parser +from watchfiles import Change try: from zoneinfo import ZoneInfo @@ -39,7 +40,6 @@ if TYPE_CHECKING: from pelican.contents import Content - from pelican.readers import Readers from pelican.settings import Settings logger = logging.getLogger(__name__) @@ -797,9 +797,8 @@ def order_content( def wait_for_changes( settings_file: str, - reader_class: type[Readers], settings: Settings, -): +) -> set[tuple[Change, str]]: content_path = settings.get("PATH", "") theme_path = settings.get("THEME", "") ignore_files = { @@ -924,3 +923,13 @@ class to use the C locale. locale.setlocale(lc_category, temp_locale) yield locale.setlocale(lc_category, orig_locale) + + +def file_suffix(path: str) -> str: + """Return the suffix of a filename in a path.""" + _, ext = os.path.splitext(os.path.basename(path)) + ret = "" + if len(ext) > 1: + # drop the ".", e.g., "exe", not ".exe" + ret = ext[1:] + return ret