Skip to content

Commit

Permalink
feat yaml_check
Browse files Browse the repository at this point in the history
  • Loading branch information
clay-lake committed Dec 3, 2024
1 parent 047adb5 commit 86899f8
Show file tree
Hide file tree
Showing 13 changed files with 349 additions and 0 deletions.
3 changes: 3 additions & 0 deletions yaml_checker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# YAML Checker


13 changes: 13 additions & 0 deletions yaml_checker/demo/basic.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# python3 -m yaml_checker demo/basic.yaml

hello:

world:
- 1
- 2 # inline comment
- 3

# a comment heading a field
foo:
bar
baz: "This is a really long string in this document. By the end I hope to reach over 150 characters in total. But I need a few more words to reach that goal. Done"
9 changes: 9 additions & 0 deletions yaml_checker/demo/factory.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# python3 -m yaml_checker --config=OCIFactory demo/factory.yaml

hello:
world: "42"
foo:
- "bar"
- "baz"
- "etc"
tricky_string: "it's a trap!"
33 changes: 33 additions & 0 deletions yaml_checker/demo/slice.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# python3 -m yaml_checker --config=Chisel demo/factory.yaml

package: grep

essential:
- grep_copyright

# hello: world

slices:
bins:
essential:
- libpcre2-8-0_libs # tests
- libc6_libs
contents:
/usr/bin/grep:

deprecated:
# These are shell scripts requiring a symlink from /usr/bin/dash to
# /usr/bin/sh.
# See: https://manpages.ubuntu.com/manpages/noble/en/man1/grep.1.html
essential: ["dash_bins", "grep_bins"]
contents:
/usr/bin/rgrep:

# tests
/usr/bin/fgrep:

/usr/bin/egrep: # tests1

copyright:
contents:
/usr/share/doc/grep/copyright:
2 changes: 2 additions & 0 deletions yaml_checker/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ruyaml
pytest
8 changes: 8 additions & 0 deletions yaml_checker/sample.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# comment

test:
test2: "1" # another comment
test3: [a, b, c]

# more comments
test4: [1, 2, 3]
17 changes: 17 additions & 0 deletions yaml_checker/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from pathlib import Path
from setuptools import setup, find_packages


def read(filename):
filepath = Path(__file__).parent / filename
file = open(filepath, "r")
return file.read()


setup(
name="yaml_checker",
version="0.1.0",
long_description=read("README.md"),
packages=find_packages(),
install_requires=read("requirements.txt"),
)
Empty file.
53 changes: 53 additions & 0 deletions yaml_checker/yaml_checker/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from .config.base import YAMLCheckConfigBase
from pathlib import Path
import logging

import argparse

parser = argparse.ArgumentParser()

parser.add_argument(
"-v", "--verbose", action="store_true", help="Enable verbose output."
)

parser.add_argument(
"-w", "--write", action="store_true", help="Write yaml output to disk."
)

parser.add_argument(
"--config",
type=str,
default="YAMLCheckConfigBase",
help="CheckYAML subclass to load",
)

parser.add_argument(
"files", type=Path, nargs="*", help="Additional files to process (optional)."
)


def main():
args = parser.parse_args()

log_level = logging.DEBUG if args.verbose else logging.INFO
logging.basicConfig(level=log_level)

check_yaml_config = YAMLCheckConfigBase.configs[args.config]

yaml = check_yaml_config()

for file in args.files:
data = yaml.load(file.read_text())
data = yaml.apply_rules(data)
yaml.validate_model(data)

output = yaml.dump(data)

if args.write:
file.write_text(output)
else:
print(output)


if __name__ == "__main__":
main()
14 changes: 14 additions & 0 deletions yaml_checker/yaml_checker/config/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from pathlib import Path
from importlib import import_module

submodule_root = Path(__file__).parent
package_name = __name__


for submodule in submodule_root.glob("*.py"):
submodule_name = submodule.stem

if submodule_name.startswith("_"):
continue

import_module(f"{__name__}.{submodule_name}")
98 changes: 98 additions & 0 deletions yaml_checker/yaml_checker/config/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import fnmatch
from io import StringIO
import logging
from pathlib import Path
from pydantic import BaseModel
from ruyaml import YAML
from typing import Any


class YAMLCheckConfigReg(type):
def __init__(cls, *args, **kwargs):
"""Track all subclass configurations of YAMLCheckConfigBase for CLI"""
super().__init__(*args, **kwargs)
name = cls.__name__
if name not in cls.configs:
cls.configs[name] = cls


class YAMLCheckConfigBase(metaclass=YAMLCheckConfigReg):
configs = {} # Store configs for access from CLI
rules = {} # map glob strings to class method names

class Model(BaseModel):
"""Pydantic BaseModel to provide validation"""

class Config:
extra = "allow"

class Config:
"""ruyaml.YAML configuration set before loading."""

preserve_quotes = True
width = 80
map_indent = 2
sequence_indent = 4
sequence_dash_offset = 2

def __init__(self):
"""YAMLCheck Base Config"""
self.yaml = YAML()

# load Config into yaml
for attr in dir(self.Config):
if attr.startswith("__"):
continue

attr_val = getattr(self.Config, attr)

if hasattr(self.yaml, attr):
setattr(self.yaml, attr, attr_val)
else:
raise AttributeError(f"Invalid ruyaml.YAML attribute: {attr}")

def load(self, yaml_str: str):
"""Load YAML data from string"""
data = self.yaml.load(yaml_str)

return data

def dump(self, data: Any):
"""Dump data to YAML string"""
with StringIO() as sio:
self.yaml.dump(data, sio)
sio.seek(0)

return sio.read()

def validate_model(self, data: Any):
"""Apply validate data against model"""
if issubclass(self.Model, BaseModel):
_ = self.Model(**data)

def _apply_rules(self, path: Path, data: Any):
"""Recursively apply rules starting from the outermost elements."""
logging.debug(f"Walking path {path}.")

# recurse over dicts and lists
if isinstance(data, dict):
for key, value in data.items():
data[key] = self._apply_rules(path / str(key), value)

elif isinstance(data, list):
for index, item in enumerate(data):
data[index] = self._apply_rules(path / str(item), item)

# scan for applicable rules at each directory
# TODO: selection of rules here does not scale well and should be improved
for key, value in self.rules.items():
if fnmatch.fnmatch(path, key):
logging.debug(f'Applying rule "{value}" at {path}')
rule = getattr(self, value)
data = rule(path, data)

return data

def apply_rules(self, data: Any):
"""Walk all objects in data and apply rules where applicable."""
return self._apply_rules(Path("/"), data)
78 changes: 78 additions & 0 deletions yaml_checker/yaml_checker/config/chisel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from ruyaml.scalarstring import PlainScalarString
from ruyaml.comments import CommentedMap

from pydantic import BaseModel, Field, RootModel
from typing import List, Dict, Any


from .base import YAMLCheckConfigBase


class Slices(RootModel):
root: Dict[str, Any]


class SDF(BaseModel):
package: str = Field()
essential: List[str] = Field()
slices: Slices = Field()

model_config = {"extra": "forbid"}


class Chisel(YAMLCheckConfigBase):
rules = {
"/slices/*/essential": "sort_content",
"/slices/*/essential/*": "no_quotes",
"/slices/*/contents": "sort_content",
}

def sort_content(self, path, data):
# print(path, type(data))
# print(dir(data))
# print()
# return CommentedMap(sorted(data))

if isinstance(data, dict):
# data.ordereddict()
print(path, type(data), str(data))
print(data.ca.items)
# print(dir(data))
sorted_dict = CommentedMap()
for key, value in data.items():
# print(key, "before", data.get_comment_inline(key))

sorted_dict[key] = data[key]

return sorted_dict

# sorted_items = sorted(
# data.items(),
# key=lambda item: item[0], # Sort by key
# )

# sorted_settings = CommentedMap()
# for key, value in sorted_items:
# # Attach comments manually
# if isinstance(value, dict) and isinstance(value, CommentedMap):
# sorted_settings[key] = value
# else:
# sorted_settings[key] = value

# # print(dir(data))

# return sorted_settings

elif isinstance(data, list):
data.sort()
return data

return data

def no_quotes(self, path, data):
if isinstance(data, str):
return PlainScalarString(data)

return data

Model = SDF
21 changes: 21 additions & 0 deletions yaml_checker/yaml_checker/config/oci_factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import logging
from ruyaml.scalarstring import SingleQuotedScalarString, DoubleQuotedScalarString

from .base import YAMLCheckConfigBase


class OCIFactory(YAMLCheckConfigBase):
rules = {"/**": "convert_to_single_quotes"}

def convert_to_single_quotes(self, path, data):
# filter out only strings of DoubleQuotedScalarString
if isinstance(data, DoubleQuotedScalarString):
# skip strings containing "'" character
if "'" in data:
logging.warning(f'Cannot convert {path}, contains "\'" character.')
return data

return SingleQuotedScalarString(data)

# fall back
return data

0 comments on commit 86899f8

Please sign in to comment.