Skip to content

Commit 08d3850

Browse files
committed
Add git lfs pre-commit check
1 parent d26f50b commit 08d3850

File tree

2 files changed

+127
-1
lines changed

2 files changed

+127
-1
lines changed

.pre-commit-config.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
exclude: (\.git/|\.tox/|\.venv/|build/|static/|dist/|node_modules/|kolibripip\.pex)
1+
exclude: (\.git/|\.tox/|\.venv/|build/|static/(?!assets/fonts)|dist/|node_modules/|kolibripip\.pex)
22
repos:
33
- repo: https://github.com/pre-commit/pre-commit-hooks
44
rev: v4.6.0
@@ -90,6 +90,11 @@ repos:
9090
language: pygrep
9191
exclude: (^packages/kolibri-common/|moduleMapping\.js|(__tests__|__fixtures__))
9292
files: ^packages/.*\.(js|vue)$
93+
- id: check-lfs-pointers
94+
name: Check LFS files are pointers not binary data
95+
description: Prevents accidentally committing binary files that should be LFS pointers (issue #7099)
96+
entry: python .pre-commit-hooks/check_lfs_pointers.py
97+
language: python
9398
- repo: https://github.com/isidentical/teyit
9499
rev: 0.4.3
95100
hooks:
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Pre-commit hook to ensure files designated for Git LFS are actually LFS pointers.
4+
5+
This prevents accidentally committing binary files that should be managed by Git LFS.
6+
See: https://github.com/learningequality/kolibri/issues/7099
7+
8+
Checks staged content (what will be committed) to verify it's an LFS pointer,
9+
not binary data. This works whether or not Git LFS is installed in the environment
10+
running the check.
11+
"""
12+
import logging
13+
import subprocess
14+
import sys
15+
16+
17+
LFS_POINTER_HEADER = b"version https://git-lfs.github.com/spec/v1"
18+
19+
20+
def has_lfs_filter(filepath):
21+
"""
22+
Check if a file has the LFS filter attribute set.
23+
24+
Args:
25+
filepath: Path to the file to check
26+
27+
Returns:
28+
bool: True if file has filter=lfs attribute
29+
"""
30+
try:
31+
result = subprocess.run(
32+
["git", "check-attr", "filter", filepath],
33+
capture_output=True,
34+
text=True,
35+
check=True,
36+
)
37+
return "filter: lfs" in result.stdout
38+
except subprocess.CalledProcessError:
39+
return False
40+
41+
42+
def is_lfs_pointer(filepath):
43+
"""
44+
Check if the staged content for a file is an LFS pointer.
45+
46+
Reads the staged content (what will be committed) to verify it starts
47+
with the LFS pointer header.
48+
49+
Args:
50+
filepath: Path to the file to check
51+
52+
Returns:
53+
bool: True if staged content is an LFS pointer, False otherwise
54+
"""
55+
try:
56+
result = subprocess.run(
57+
["git", "show", f":{filepath}"],
58+
capture_output=True,
59+
check=True,
60+
)
61+
first_line = result.stdout.split(b"\n")[0].strip()
62+
return first_line.startswith(LFS_POINTER_HEADER)
63+
except subprocess.CalledProcessError:
64+
return False
65+
66+
67+
def main(filenames):
68+
"""
69+
Main function to check staged files against LFS patterns.
70+
71+
Args:
72+
filenames: List of filenames to check (passed by pre-commit)
73+
74+
Returns:
75+
int: Exit code (0 for success, 1 for failure)
76+
"""
77+
# Configure logging to write to stderr
78+
logging.basicConfig(level=logging.ERROR, format="%(message)s", stream=sys.stderr)
79+
logger = logging.getLogger(__name__)
80+
81+
failed_files = []
82+
83+
for filepath in filenames:
84+
if has_lfs_filter(filepath):
85+
if not is_lfs_pointer(filepath):
86+
failed_files.append(filepath)
87+
88+
if failed_files:
89+
logger.error("\n" + "=" * 80)
90+
logger.error("ERROR: LFS Pointer Check Failed")
91+
logger.error("=" * 80)
92+
logger.error("")
93+
logger.error(
94+
"The following files should be LFS pointers but contain binary data:"
95+
)
96+
logger.error("")
97+
98+
for filepath in failed_files:
99+
logger.error(" \033[31m✗\033[0m %s", filepath)
100+
101+
logger.error("")
102+
logger.error("-" * 80)
103+
logger.error("These files match LFS patterns in .gitattributes but were not")
104+
logger.error("committed through Git LFS.")
105+
logger.error("")
106+
logger.error("To fix this issue:")
107+
logger.error(" 1. Ensure Git LFS is installed: git lfs install")
108+
logger.error(
109+
" 2. Remove the binary files from staging: git rm --cached <file>"
110+
)
111+
logger.error(" 3. Re-add the files (Git LFS will handle them): git add <file>")
112+
logger.error("=" * 80)
113+
logger.error("")
114+
115+
return 1
116+
117+
return 0
118+
119+
120+
if __name__ == "__main__":
121+
sys.exit(main(sys.argv[1:]))

0 commit comments

Comments
 (0)