Skip to content

Commit

Permalink
Log warnings about files that would have been processed by disabled r…
Browse files Browse the repository at this point in the history
…eaders
  • Loading branch information
boxydog committed Jun 17, 2024
1 parent 79d37ba commit 548b0ef
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 11 deletions.
5 changes: 3 additions & 2 deletions pelican/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
)
from pelican.plugins import signals
from pelican.plugins._utils import get_plugin_name, load_plugins
from pelican.readers import Readers
from pelican.server import ComplexHTTPRequestHandler, RootedHTTPServer
from pelican.settings import read_settings
from pelican.utils import clean_output_dir, maybe_pluralize, wait_for_changes
Expand Down Expand Up @@ -126,6 +125,8 @@ def run(self):
for p in generators:
if hasattr(p, "generate_context"):
p.generate_context()
if hasattr(p, "check_disabled_readers"):
p.check_disabled_readers()

# for plugins that create/edit the summary
logger.debug("Signal all_generators_finalized.send(<generators>)")
Expand Down Expand Up @@ -573,7 +574,7 @@ def autoreload(args, excqueue=None):
try:
pelican.run()

changed_files = wait_for_changes(args.settings, Readers, settings)
changed_files = wait_for_changes(args.settings, settings)
changed_files = {c[1] for c in changed_files}

if settings_file in changed_files:
Expand Down
27 changes: 26 additions & 1 deletion pelican/generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from functools import partial
from itertools import chain, groupby
from operator import attrgetter
from typing import List, Optional, Set

from jinja2 import (
BaseLoader,
Expand Down Expand Up @@ -156,7 +157,9 @@ def _include_path(self, path, extensions=None):

return False

def get_files(self, paths, exclude=None, extensions=None):
def get_files(
self, paths, exclude: Optional[List[str]] = None, extensions=None
) -> Set[str]:
"""Return a list of files to use, based on rules
:param paths: the list pf paths to search (relative to self.path)
Expand Down Expand Up @@ -250,6 +253,13 @@ def __str__(self):
# return the name of the class for logging purposes
return self.__class__.__name__

def _check_disabled_readers(self, paths, exclude: Optional[List[str]]) -> None:
"""Log warnings for files that would have been processed by disabled readers."""
for fil in self.get_files(
paths, exclude=exclude, extensions=self.readers.disabled_extensions
):
self.readers.check_file(fil)


class CachingGenerator(Generator, FileStampDataCacher):
"""Subclass of Generator and FileStampDataCacher classes
Expand Down Expand Up @@ -643,6 +653,11 @@ def generate_pages(self, writer):
self.generate_authors(write)
self.generate_drafts(write)

def check_disabled_readers(self) -> None:
self._check_disabled_readers(
self.settings["ARTICLE_PATHS"], exclude=self.settings["ARTICLE_EXCLUDES"]
)

def generate_context(self):
"""Add the articles into the shared context"""

Expand Down Expand Up @@ -849,6 +864,11 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
signals.page_generator_init.send(self)

def check_disabled_readers(self) -> None:
self._check_disabled_readers(
self.settings["PAGE_PATHS"], exclude=self.settings["PAGE_EXCLUDES"]
)

def generate_context(self):
all_pages = []
hidden_pages = []
Expand Down Expand Up @@ -953,6 +973,11 @@ def __init__(self, *args, **kwargs):
self.fallback_to_symlinks = False
signals.static_generator_init.send(self)

def check_disabled_readers(self) -> None:
self._check_disabled_readers(
self.settings["STATIC_PATHS"], exclude=self.settings["STATIC_EXCLUDES"]
)

def generate_context(self):
self.staticfiles = []
linked_files = set(self.context["static_links"])
Expand Down
39 changes: 34 additions & 5 deletions pelican/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from pelican.cache import FileStampDataCacher
from pelican.contents import Author, Category, Page, Tag
from pelican.plugins import signals
from pelican.utils import get_date, pelican_open, posixize_path
from pelican.utils import file_suffix, get_date, pelican_open, posixize_path

try:
from markdown import Markdown
Expand Down Expand Up @@ -125,6 +125,10 @@ def read(self, source_path):
metadata = {}
return content, metadata

def disabled_message(self) -> str:
"""Message about why this plugin was disabled."""
return ""


class _FieldBodyTranslator(HTMLTranslator):
def __init__(self, document):
Expand Down Expand Up @@ -347,6 +351,12 @@ def read(self, source_path):
metadata = {}
return content, metadata

def disabled_message(self) -> str:
return (
"Could not import markdown.Markdown. "
"Have you installed the markdown package?"
)


class HTMLReader(BaseReader):
"""Parses HTML files as input, looking for meta, title, and body tags"""
Expand Down Expand Up @@ -508,17 +518,23 @@ class Readers(FileStampDataCacher):
def __init__(self, settings=None, cache_name=""):
self.settings = settings or {}
self.readers = {}
self.disabled_readers = {}
# extension => reader for readers that are enabled
self.reader_classes = {}
# extension => reader for readers that are not enabled
disabled_reader_classes = {}

for cls in [BaseReader] + BaseReader.__subclasses__():
if not cls.enabled:
logger.debug(
"Missing dependencies for %s", ", ".join(cls.file_extensions)
)
continue

for ext in cls.file_extensions:
self.reader_classes[ext] = cls
if cls.enabled:
self.reader_classes[ext] = cls
else:
disabled_reader_classes[ext] = cls

if self.settings["READERS"]:
self.reader_classes.update(self.settings["READERS"])
Expand All @@ -531,6 +547,9 @@ def __init__(self, settings=None, cache_name=""):

self.readers[fmt] = reader_class(self.settings)

for fmt, reader_class in disabled_reader_classes.items():
self.disabled_readers[fmt] = reader_class(self.settings)

# set up caching
cache_this_level = (
cache_name != "" and self.settings["CONTENT_CACHING_LAYER"] == "reader"
Expand All @@ -541,8 +560,13 @@ def __init__(self, settings=None, cache_name=""):

@property
def extensions(self):
"""File extensions that will be processed by a reader."""
return self.readers.keys()

@property
def disabled_extensions(self):
return self.disabled_readers.keys()

def read_file(
self,
base_path,
Expand All @@ -562,8 +586,7 @@ def read_file(
logger.debug("Read file %s -> %s", source_path, content_class.__name__)

if not fmt:
_, ext = os.path.splitext(os.path.basename(path))
fmt = ext[1:]
fmt = file_suffix(path)

if fmt not in self.readers:
raise TypeError("Pelican does not know how to parse %s", path)
Expand Down Expand Up @@ -654,6 +677,12 @@ def typogrify_wrapper(text):
context=context,
)

def check_file(self, source_path: str) -> None:
"""Log a warning if a file is processed by a disabled reader."""
reader = self.disabled_readers.get(file_suffix(source_path), None)
if reader:
logger.warning(f"{source_path}: {reader.disabled_message()}")


def find_empty_alt(content, path):
"""Find images with empty alt
Expand Down
22 changes: 22 additions & 0 deletions pelican/tests/test_pelican.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from pelican import Pelican, __version__, main
from pelican.generators import StaticGenerator
from pelican.readers import MarkdownReader
from pelican.settings import read_settings
from pelican.tests.support import (
LoggedTestCase,
Expand Down Expand Up @@ -58,12 +59,15 @@ def setUp(self):
self.maxDiff = None
self.old_locale = locale.setlocale(locale.LC_ALL)
locale.setlocale(locale.LC_ALL, "C")
self.old_markdown_enabled = MarkdownReader.enabled

def tearDown(self):
read_settings() # cleanup PYGMENTS_RST_OPTIONS
rmtree(self.temp_path)
rmtree(self.temp_cache)
locale.setlocale(locale.LC_ALL, self.old_locale)
# restore MarkdownReader.enabled setting
MarkdownReader.enabled = self.old_markdown_enabled
super().tearDown()

def assertDirsEqual(self, left_path, right_path, msg=None):
Expand Down Expand Up @@ -303,3 +307,21 @@ def test_main_on_content(self):
main(["-o", temp_dir, "pelican/tests/simple_content"])
self.assertIn("Processed 1 article", out.getvalue())
self.assertEqual("", err.getvalue())

def test_main_on_content_markdown_disabled(self):
"""Invoke main on simple_content directory."""
MarkdownReader.enabled = False
out, err = io.StringIO(), io.StringIO()
with contextlib.redirect_stdout(out), contextlib.redirect_stderr(err):
with TemporaryDirectory() as temp_dir:
# Don't highlight anything.
# See https://rich.readthedocs.io/en/stable/highlighting.html
with patch("pelican.console", new=Console(highlight=False)):
main(["-o", temp_dir, "pelican/tests/simple_content"])
self.assertIn("Processed 0 articles", out.getvalue())
self.assertLogCountEqual(
1,
".*article_with_md_extension.md: "
"Could not import markdown.Markdown. "
"Have you installed the markdown package?",
)
16 changes: 16 additions & 0 deletions pelican/tests/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ def _path(*args):


class ReaderTest(unittest.TestCase):
def setUp(self) -> None:
self.old_markdown_enabled = readers.MarkdownReader.enabled

def tearDown(self) -> None:
# restore MarkdownReader.enabled
readers.MarkdownReader.enabled = self.old_markdown_enabled

def read_file(self, path, **kwargs):
# Isolate from future API changes to readers.read_file

Expand All @@ -32,6 +39,15 @@ def assertDictHasSubset(self, dictionary, subset):
else:
self.fail(f"Expected {key} to have value {value}, but was not in Dict")

def test_markdown_disabled(self):
readers.MarkdownReader.enabled = False
readrs = readers.Readers(settings=get_settings())
self.assertEqual(
set(readers.MarkdownReader.file_extensions), readrs.disabled_readers.keys()
)
for val in readrs.disabled_readers.values():
self.assertEqual(readers.MarkdownReader, val.__class__)


class TestAssertDictHasSubset(ReaderTest):
def setUp(self):
Expand Down
7 changes: 7 additions & 0 deletions pelican/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -966,3 +966,10 @@ def get(self, key):
container.get.cache.clear()
self.assertEqual("bar", container.get("bar"))
get_mock.assert_called_once_with("bar")


class TestStringUtils(unittest.TestCase):
def test_file_suffix(self):
self.assertEqual("", utils.file_suffix(""))
self.assertEqual("", utils.file_suffix("foo"))
self.assertEqual("md", utils.file_suffix("foo.md"))
15 changes: 12 additions & 3 deletions pelican/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
)

import dateutil.parser
from watchfiles import Change

try:
from zoneinfo import ZoneInfo
Expand All @@ -39,7 +40,6 @@

if TYPE_CHECKING:
from pelican.contents import Content
from pelican.readers import Readers
from pelican.settings import Settings

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -797,9 +797,8 @@ def order_content(

def wait_for_changes(
settings_file: str,
reader_class: type[Readers],
settings: Settings,
):
) -> set[tuple[Change, str]]:
content_path = settings.get("PATH", "")
theme_path = settings.get("THEME", "")
ignore_files = {
Expand Down Expand Up @@ -924,3 +923,13 @@ class to use the C locale.
locale.setlocale(lc_category, temp_locale)
yield
locale.setlocale(lc_category, orig_locale)


def file_suffix(path: str) -> str:
"""Return the suffix of a filename in a path."""
_, ext = os.path.splitext(os.path.basename(path))
ret = ""
if len(ext) > 1:
# drop the ".", e.g., "exe", not ".exe"
ret = ext[1:]
return ret

0 comments on commit 548b0ef

Please sign in to comment.