Skip to content

Commit 47668cc

Browse files
committed
clean-tsv-metadata: too many col error msg
1 parent 86c271b commit 47668cc

File tree

1 file changed

+12
-5
lines changed

1 file changed

+12
-5
lines changed

bin/clean-tsv

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,18 @@ def clean_tsv_file(input_file, output_file, n_cols, header, sort_col):
1111
"""
1212
if n_cols:
1313
# if --n-cols is passed, read in and back out to assert n columns
14-
data = pd.read_csv(input_file,
15-
sep="\t",
16-
header=None,
17-
names=list(range(n_cols)), # overriding column names; this adds missing tabs when there are too few
18-
usecols=list(range(n_cols))) # using first n only; this removes extra tabs
14+
try:
15+
data = pd.read_csv(input_file,
16+
sep="\t",
17+
header=None,
18+
names=list(range(n_cols)), # overriding column names; this adds missing tabs when there are too few
19+
usecols=list(range(n_cols))) # using first n only; this removes extra tabs
20+
except pd.errors.ParserError as e:
21+
if "Too many columns specified" in str(e):
22+
print(e)
23+
print(f"--n-cols {n_cols} was passed, but there are not this many columns in {input_file}. --n-cols can't add extra columns, it just enforces up to the existing number of columns in the tsv.")
24+
exit()
25+
raise e
1926
data.to_csv(output_file, sep="\t", index=False, header=False, quoting=csv.QUOTE_NONE)
2027
input_file = output_file # we now want to read in the column-corrected version
2128
data = pd.read_csv(input_file,

0 commit comments

Comments
 (0)