Skip to content

Commit

Permalink
Merge pull request #68 from dhellmann/contributor-filter
Browse files Browse the repository at this point in the history
add contributor filter
  • Loading branch information
dhellmann authored Sep 5, 2020
2 parents 4077a4c + 82fbe4a commit 579e9bc
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 29 deletions.
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@

spelling_word_list_filename = [
'spelling_wordlist.txt',
'spelling_people.txt',
]

spelling_show_suggestions = True
spelling_ignore_pypi_package_names = True
spelling_ignore_contributor_names = True

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
Expand Down
26 changes: 0 additions & 26 deletions docs/source/spelling_people.txt

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
features:
- |
Added a new filter
(``sphinxcontrib.spelling.filters.ContributorFilter``) that treats
contributor names extracted from the git history as spelled
correctly, making it easier to refer to the names in
acknowledgments . Includes a new configuration option,
``spelling_ignore_contributor_names`` to enable it.
2 changes: 2 additions & 0 deletions sphinxcontrib/spelling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ def setup(app):
# Assume words that look like the names of importable modules are
# spelled properly
app.add_config_value('spelling_ignore_importable_modules', True, 'env')
# Treat contributor names from git history as spelled correctly
app.add_config_value('spelling_ignore_contributor_names', True, 'env')
# Add any user-defined filter classes
app.add_config_value('spelling_filters', [], 'env')
# Set a user-provided list of files to ignore
Expand Down
3 changes: 3 additions & 0 deletions sphinxcontrib/spelling/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ def init(self):
if self.config.spelling_ignore_importable_modules:
logger.info('Ignoring importable module names')
f.append(filters.ImportableModuleFilter)
if self.config.spelling_ignore_contributor_names:
logger.info('Ignoring contributor names')
f.append(filters.ContributorFilter)
f.extend(self._load_filter_classes(self.config.spelling_filters))

if not os.path.isdir(self.outdir):
Expand Down
42 changes: 40 additions & 2 deletions sphinxcontrib/spelling/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,19 @@
"""Spelling checker extension for Sphinx.
"""

# TODO - Words with multiple uppercase letters treated as classes and ignored

import builtins
import imp
import subprocess
import xmlrpc.client as xmlrpc_client

from enchant.tokenize import Filter, tokenize, unit_tokenize
from enchant.tokenize import Filter, get_tokenizer, tokenize, unit_tokenize

from sphinx.util import logging

# TODO - Words with multiple uppercase letters treated as classes and ignored

logger = logging.getLogger(__name__)


class AcronymFilter(Filter):
Expand Down Expand Up @@ -193,3 +199,35 @@ def _skip(self, word):
self.found_modules.add(word)
return True
return word in self.found_modules


class ContributorFilter(IgnoreWordsFilter):
"""Accept information about contributors as spelled correctly.
Look in the git history for authors and commiters and accept
tokens that are in the set.
"""

_pretty_format = (
'%(trailers:key=Co-Authored-By,separator=%x0A)%x0A%an%x0A%cn'
)

def __init__(self, tokenizer):
contributors = self._get_contributors()
IgnoreWordsFilter.__init__(self, tokenizer, contributors)

def _get_contributors(self):
logger.info('Scanning contributors')
cmd = [
'git', 'log', '--quiet', '--no-color',
'--pretty=format:' + self._pretty_format,
]
try:
p = subprocess.run(cmd, check=True, stdout=subprocess.PIPE)
except subprocess.CalledProcessError as err:
logger.warning('Called: {}'.format(' '.join(cmd)))
logger.warning('Failed to scan contributors: {}'.format(err))
return set()
output = p.stdout.decode('utf-8')
tokenizer = get_tokenizer('en_US', filters=[])
return set(word for word, pos in tokenizer(output))
35 changes: 35 additions & 0 deletions sphinxcontrib/spelling/tests/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,38 @@ def test_acronym_unicode():
f = filters.AcronymFilter(t)
words = [w[0] for w in f(text)]
assert u'DBM' not in words, 'Failed to filter out acronym'


def test_contributors():
f = filters.ContributorFilter(None)
names = [
"Alex",
"Atlakson",
"Avram",
"Baumgold",
"Berman",
"Daniele",
"Doug",
"Finucane",
"Gaynor",
"Gonsiorowski",
"Hong",
"Hong",
"Huon",
"Kampik",
"Kolosov",
"Lubkin",
"Marti",
"Minhee",
"Olausson",
"Raggam",
"Raudsepp",
"sdelliot",
"Sergey",
"Sevilla",
"Timotheus",
"Tobias",
"Tricoli",
]
for name in names:
assert f._skip(name)

0 comments on commit 579e9bc

Please sign in to comment.