Skip to content

Commit 6b82c85

Browse files
committed
BUG: Raise MergeError when suffixes result in duplicate column names (GH#61402)
1 parent 9c5b9ee commit 6b82c85

File tree

2 files changed

+23
-4
lines changed

2 files changed

+23
-4
lines changed

pandas/core/reshape/merge.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -3058,16 +3058,19 @@ def renamer(x, suffix: str | None):
30583058
llabels = left._transform_index(lrenamer)
30593059
rlabels = right._transform_index(rrenamer)
30603060

3061-
dups = []
3061+
dups = set()
30623062
if not llabels.is_unique:
30633063
# Only warn when duplicates are caused because of suffixes, already duplicated
30643064
# columns in origin should not warn
3065-
dups = llabels[(llabels.duplicated()) & (~left.duplicated())].tolist()
3065+
dups.update(llabels[(llabels.duplicated()) & (~left.duplicated())])
30663066
if not rlabels.is_unique:
3067-
dups.extend(rlabels[(rlabels.duplicated()) & (~right.duplicated())].tolist())
3067+
dups.update(rlabels[(rlabels.duplicated()) & (~right.duplicated())])
3068+
# Suffix addition creates duplicate to pre-existing column name
3069+
dups.update(llabels.intersection(right.difference(to_rename)))
3070+
dups.update(rlabels.intersection(left.difference(to_rename)))
30683071
if dups:
30693072
raise MergeError(
3070-
f"Passing 'suffixes' which cause duplicate columns {set(dups)} is "
3073+
f"Passing 'suffixes' which cause duplicate columns {dups} is "
30713074
f"not allowed.",
30723075
)
30733076

pandas/tests/reshape/merge/test_merge.py

+16
Original file line numberDiff line numberDiff line change
@@ -3060,3 +3060,19 @@ def test_merge_on_all_nan_column():
30603060
{"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan], "z": [4, 5, 6], "zz": [4, 5, 6]}
30613061
)
30623062
tm.assert_frame_equal(result, expected)
3063+
3064+
3065+
def test_merge_for_suffix_collisions():
3066+
# GH#61402
3067+
# Case 1: suffixes=("_dup", "") test collision
3068+
df1 = DataFrame({"col1": [1], "col2": [2]})
3069+
df2 = DataFrame({"col1": [1], "col2": [2], "col2_dup": [3]})
3070+
with pytest.raises(MergeError, match="duplicate columns"):
3071+
merge(df1, df2, on="col1", suffixes=("_dup", ""))
3072+
3073+
# Case 2: suffixes=("", "_dup") test collision
3074+
df1 = DataFrame({"col1": [1], "col2": [2]})
3075+
df2 = DataFrame({"col1": [1], "col2": [2], "col2_dup": [3]})
3076+
with pytest.raises(MergeError, match="duplicate columns"):
3077+
merge(df1, df2, on="col1", suffixes=("", "_dup"))
3078+

0 commit comments

Comments
 (0)