-
Notifications
You must be signed in to change notification settings - Fork 1
/
exceldiff
executable file
·91 lines (70 loc) · 2.95 KB
/
exceldiff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#! /usr/bin/python3
import xlrd
import sys
import argparse
missing_sheets = []
missing_rows = []
missing_cols = []
diffs = []
parser = argparse.ArgumentParser(description='Find differences between Excel files')
parser.add_argument('paths', metavar='PATH', type=str, nargs=2,
help='files paths')
parser.add_argument('-n', '--names', action='store_true',
help='match sheets by names (default: by positions)')
args = parser.parse_args()
paths = args.paths
books = [xlrd.open_workbook(path) for path in paths]
if args.names:
names = [b.sheet_names() for b in books]
sets = [set(n) for n in names]
assert([len(n) for n in names] == [len(s) for s in sets]), \
('Matching on names is unsupported if they are repeated in a same'
'workbook')
common_idces = sets[0] & sets[1]
extra = [s - common_idces for s in sets]
for idx in range(2):
for name in extra[idx]:
missing_sheets.append((1-idx, name))
sheets_map = [b.sheet_by_name for b in books]
else:
# Assume book 0 has less (or equal) sheets...
less_sheets = 0
if books[0].nsheets > books[1].nsheets:
# ... until proven otherwise
less_sheets = 1
for s_idx in range(books[less_sheets].nsheets,
books[1 - less_sheets].nsheets):
# (empty range if they have the same number of sheets)
missing_sheets.append((less_sheets, s_idx))
common_idces = range(books[less_sheets].nsheets)
sheets_map = [b.sheet_by_index for b in books]
# Cycle on common sheets:
for s_id in common_idces:
sheets = [m(s_id) for m in sheets_map]
less_rows = 0
if sheets[0].nrows > sheets[1].nrows:
less_rows = 1
for r_idx in range(sheets[less_rows].nrows,
sheets[1 - less_rows].nrows):
missing_rows.append((less_rows, s_id, r_idx))
less_cols = 0
if sheets[0].ncols > sheets[1].ncols:
less_cols = 1
for c_idx in range(sheets[less_cols].ncols,
sheets[1 - less_cols].ncols):
missing_cols.append((less_cols, s_id, c_idx))
for r_idx in range(sheets[less_rows].nrows):
for c_idx in range(sheets[less_cols].ncols):
values = [s.cell_value(r_idx, c_idx) for s in sheets]
if values[0] != values[1]:
diffs.append((s_id, r_idx, c_idx, values[0], values[1]))
# "sheet" can be both an integer and a string, depending on the flag "--names"
for book, sheet in missing_sheets:
print("Sheet %s missing from book %d" % (sheet, book+1))
for book, sheet, row in missing_rows:
print("Row %s missing in sheet %s of book %d" % (row + 1, sheet, book+1))
for book, sheet, col in missing_cols:
print("Col %s missing in sheet %s of book %d" % (xlrd.colname(col), sheet, book+1))
for sheet, row, col, val0, val1 in diffs:
print("Cell %s in sheet %s changed from \"%s\" to \"%s\"."
% (xlrd.cellname(row, col), sheet, val0, val1))