From 7d3b7cc4b13a57c21c4b973c855bf8b1fcfbc8c3 Mon Sep 17 00:00:00 2001
From: Lee Iverson <leei@sociologi.ca>
Date: Mon, 31 Jul 2023 10:30:21 -0700
Subject: [PATCH] Overload makeunique to allow true/false, certain keywords
 (:update, :ignore) and a combine function to combine columns.

---
 src/abstractdataframe/abstractdataframe.jl | 106 ++++++++++++---------
 src/abstractdataframe/reshape.jl           |   8 +-
 src/dataframe/dataframe.jl                 |  65 ++++++++-----
 src/join/composer.jl                       |  99 ++++++++++++-------
 src/join/inplace.jl                        |  41 ++++++--
 src/other/index.jl                         |  47 ++++++---
 src/other/metadata.jl                      |  18 ++++
 src/other/tables.jl                        |   2 +-
 src/other/utils.jl                         |  36 ++++---
 test/cat.jl                                |  13 +++
 test/dataframe.jl                          |  20 +++-
 test/join.jl                               |  83 ++++++++++++++--
 12 files changed, 396 insertions(+), 142 deletions(-)

diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
index a40627c6a4..0a8d18b6db 100644
--- a/src/abstractdataframe/abstractdataframe.jl
+++ b/src/abstractdataframe/abstractdataframe.jl
@@ -117,9 +117,9 @@ Compat.hasproperty(df::AbstractDataFrame, s::AbstractString) = haskey(index(df),
 
 """
     rename!(df::AbstractDataFrame, vals::AbstractVector{Symbol};
-            makeunique::Bool=false)
+            makeunique=false)
     rename!(df::AbstractDataFrame, vals::AbstractVector{<:AbstractString};
-            makeunique::Bool=false)
+            makeunique=false)
     rename!(df::AbstractDataFrame, (from => to)::Pair...)
     rename!(df::AbstractDataFrame, d::AbstractDict)
     rename!(df::AbstractDataFrame, d::AbstractVector{<:Pair})
@@ -138,7 +138,8 @@ Each name is changed at most once. Permutation of names is allowed.
   of the same length as the number of columns in `df`
 - `makeunique` : if `false` (the default), an error will be raised
   if duplicate names are found; if `true`, duplicate names will be suffixed
-  with `_i` (`i` starting at 1 for the first duplicate).
+  with `_i` (`i` starting at 1 for the first duplicate). If a Function of two
+  inputs will use that function to combine the duplicate column with the original
 
 If pairs are passed to `rename!` (as positional arguments or in a dictionary or
 a vector) then:
@@ -197,7 +198,7 @@ julia> rename!(uppercase, df)
 ```
 """
 function rename!(df::AbstractDataFrame, vals::AbstractVector{Symbol};
-                 makeunique::Bool=false)
+                 makeunique=false)
     rename!(index(df), vals, makeunique=makeunique)
     # renaming columns of SubDataFrame has to clean non-note metadata in its parent
     _drop_all_nonnote_metadata!(parent(df))
@@ -205,7 +206,7 @@ function rename!(df::AbstractDataFrame, vals::AbstractVector{Symbol};
 end
 
 function rename!(df::AbstractDataFrame, vals::AbstractVector{<:AbstractString};
-                 makeunique::Bool=false)
+                 makeunique=false)
     rename!(index(df), Symbol.(vals), makeunique=makeunique)
     # renaming columns of SubDataFrame has to clean non-note metadata in its parent
     _drop_all_nonnote_metadata!(parent(df))
@@ -261,9 +262,9 @@ end
 
 """
     rename(df::AbstractDataFrame, vals::AbstractVector{Symbol};
-           makeunique::Bool=false)
+           makeunique=false)
     rename(df::AbstractDataFrame, vals::AbstractVector{<:AbstractString};
-           makeunique::Bool=false)
+           makeunique=false)
     rename(df::AbstractDataFrame, (from => to)::Pair...)
     rename(df::AbstractDataFrame, d::AbstractDict)
     rename(df::AbstractDataFrame, d::AbstractVector{<:Pair})
@@ -353,9 +354,9 @@ julia> rename(uppercase, df)
 ```
 """
 rename(df::AbstractDataFrame, vals::AbstractVector{Symbol};
-       makeunique::Bool=false) = rename!(copy(df), vals, makeunique=makeunique)
+       makeunique=false) = rename!(copy(df), vals, makeunique=makeunique)
 rename(df::AbstractDataFrame, vals::AbstractVector{<:AbstractString};
-       makeunique::Bool=false) = rename!(copy(df), vals, makeunique=makeunique)
+       makeunique=false) = rename!(copy(df), vals, makeunique=makeunique)
 rename(df::AbstractDataFrame, args...) = rename!(copy(df), args...)
 rename(f::Function, df::AbstractDataFrame) = rename!(f, copy(df))
 
@@ -1536,13 +1537,15 @@ end
 
 """
     hcat(df::AbstractDataFrame...;
-         makeunique::Bool=false, copycols::Bool=true)
+         makeunique=false, copycols::Bool=true)
 
 Horizontally concatenate data frames.
 
 If `makeunique=false` (the default) column names of passed objects must be unique.
 If `makeunique=true` then duplicate column names will be suffixed
 with `_i` (`i` starting at 1 for the first duplicate).
+If `makeunique` is a Function of two inputs then will use that function to combine
+the left-hand values with the right-hand values in the duplicated columns.
 
 If `copycols=true` (the default) then the `DataFrame` returned by `hcat` will
 contain copied columns from the source data frames.
@@ -1593,24 +1596,23 @@ julia> df3.A === df1.A
 true
 ```
 """
-function Base.hcat(df::AbstractDataFrame; makeunique::Bool=false, copycols::Bool=true)
+function Base.hcat(df::AbstractDataFrame; makeunique=false, copycols::Bool=true)
     df = DataFrame(df, copycols=copycols)
     _drop_all_nonnote_metadata!(df)
     return df
 end
 
 # TODO: after deprecation remove AbstractVector methods
-Base.hcat(df::AbstractDataFrame, x::AbstractVector; makeunique::Bool=false, copycols::Bool=true) =
+Base.hcat(df::AbstractDataFrame, x::AbstractVector; makeunique=false, copycols::Bool=true) =
     hcat!(DataFrame(df, copycols=copycols), x, makeunique=makeunique, copycols=copycols)
-Base.hcat(x::AbstractVector, df::AbstractDataFrame; makeunique::Bool=false, copycols::Bool=true) =
+Base.hcat(x::AbstractVector, df::AbstractDataFrame; makeunique=false, copycols::Bool=true) =
     hcat!(x, df, makeunique=makeunique, copycols=copycols)
 Base.hcat(df1::AbstractDataFrame, df2::AbstractDataFrame;
-          makeunique::Bool=false, copycols::Bool=true) =
-    hcat!(DataFrame(df1, copycols=copycols), df2,
-          makeunique=makeunique, copycols=copycols)
+          makeunique=false, copycols::Bool=true) =
+    hcat!(DataFrame(df1, copycols=copycols), df2, makeunique=makeunique, copycols=copycols)
 Base.hcat(df::AbstractDataFrame, x::Union{AbstractVector, AbstractDataFrame},
           y::Union{AbstractVector, AbstractDataFrame}...;
-          makeunique::Bool=false, copycols::Bool=true) =
+          makeunique=false, copycols::Bool=true) =
     hcat!(hcat(df, x, makeunique=makeunique, copycols=copycols), y...,
           makeunique=makeunique, copycols=copycols)
 
@@ -2869,7 +2871,8 @@ const INSERTCOLS_ARGUMENTS =
     - `after` : if `true` columns are inserted after `col`
     - `makeunique` : defines what to do if `name` already exists in `df`;
       if it is `false` an error will be thrown; if it is `true` a new unique name will
-      be generated by adding a suffix
+      be generated by adding a suffix; if it is a Function then combines the two duplicate
+      column using that function to combine the left-hand value with the right-hand.
     - `copycols` : whether vectors passed as columns should be copied
 
     If `val` is an `AbstractRange` then the result of `collect(val)` is inserted.
@@ -2891,7 +2894,7 @@ const INSERTCOLS_ARGUMENTS =
 
 """
     insertcols(df::AbstractDataFrame[, col], (name=>val)::Pair...;
-               after::Bool=false, makeunique::Bool=false, copycols::Bool=true)
+               after::Bool=false, makeunique=false, copycols::Bool=true)
 
 Insert a column into a copy of `df` data frame using the [`insertcols!`](@ref)
 function and return the newly created data frame.
@@ -2942,13 +2945,13 @@ julia> insertcols(df, :a, :d => 7:9, after=true)
 ```
 """
 insertcols(df::AbstractDataFrame, args...;
-           after::Bool=false, makeunique::Bool=false, copycols::Bool=true) =
+           after::Bool=false, makeunique=false, copycols::Bool=true) =
     insertcols!(copy(df), args...;
                 after=after, makeunique=makeunique, copycols=copycols)
 
 """
     insertcols!(df::AbstractDataFrame[, col], (name=>val)::Pair...;
-                after::Bool=false, makeunique::Bool=false, copycols::Bool=true)
+                after::Bool=false, makeunique=false, copycols::Bool=true)
 
 Insert a column into a data frame in place. Return the updated data frame.
 
@@ -2979,7 +2982,7 @@ julia> insertcols!(df, 1, :b => 'a':'c')
    2 │ b         2
    3 │ c         3
 
-julia> insertcols!(df, 2, :c => 2:4, :c => 3:5, makeunique=true)
+julia> insertcols!(df, 2, :c => 2:4, :c => 3:5, makeunique=false)
 3×4 DataFrame
  Row │ b     c      c_1    a
      │ Char  Int64  Int64  Int64
@@ -2999,7 +3002,9 @@ julia> insertcols!(df, :b, :d => 7:9, after=true)
 ```
 """
 function insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{Symbol}...;
-                     after::Bool=false, makeunique::Bool=false, copycols::Bool=true)
+                     after::Bool=false, makeunique=false, copycols::Bool=true)
+    makeunique = _makeunique_normalize(makeunique)
+
     if !is_column_insertion_allowed(df)
         throw(ArgumentError("insertcols! is only supported for DataFrame, or for " *
                             "SubDataFrame created with `:` as column selector"))
@@ -3025,15 +3030,15 @@ function insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{Sy
                             "$(ncol(df)) columns at index $col_ind"))
     end
 
-    if !makeunique
+    if makeunique == false
         if !allunique(first.(name_cols))
             throw(ArgumentError("Names of columns to be inserted into a data frame " *
-                                "must be unique when `makeunique=true`"))
+                                "must be unique when `makeunique=false`"))
         end
         for (n, _) in name_cols
             if hasproperty(df, n)
                 throw(ArgumentError("Column $n is already present in the data frame " *
-                                    "which is not allowed when `makeunique=true`"))
+                                    "which is not allowed when `makeunique=false`"))
             end
         end
     end
@@ -3103,19 +3108,11 @@ function insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{Sy
             dfp[!, name] = item_new
         else
             if hasproperty(dfp, name)
-                @assert makeunique
-                k = 1
-                while true
-                    nn = Symbol("$(name)_$k")
-                    if !hasproperty(dfp, nn)
-                        name = nn
-                        break
-                    end
-                    k += 1
-                end
+                col_ind = insert_unique(dfp, name, col_ind, item_new, makeunique)
+            else
+                insert!(index(dfp), col_ind, name)
+                insert!(_columns(dfp), col_ind, item_new)
             end
-            insert!(index(dfp), col_ind, name)
-            insert!(_columns(dfp), col_ind, item_new)
         end
         col_ind += 1
     end
@@ -3134,22 +3131,22 @@ function insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{Sy
 end
 
 insertcols!(df::AbstractDataFrame, col::ColumnIndex, name_cols::Pair{<:AbstractString}...;
-            after::Bool=false, makeunique::Bool=false, copycols::Bool=true) =
+            after::Bool=false, makeunique=false, copycols::Bool=true) =
     insertcols!(df, col, (Symbol(n) => v for (n, v) in name_cols)...,
                 after=after, makeunique=makeunique, copycols=copycols)
 
 insertcols!(df::AbstractDataFrame, name_cols::Pair{Symbol}...;
-            after::Bool=false, makeunique::Bool=false, copycols::Bool=true) =
+            after::Bool=false, makeunique=false, copycols::Bool=true) =
     insertcols!(df, ncol(df)+1, name_cols..., after=after,
                 makeunique=makeunique, copycols=copycols)
 
 insertcols!(df::AbstractDataFrame, name_cols::Pair{<:AbstractString}...;
-            after::Bool=false, makeunique::Bool=false, copycols::Bool=true) =
+            after::Bool=false, makeunique=false, copycols::Bool=true) =
     insertcols!(df, (Symbol(n) => v for (n, v) in name_cols)...,
                 after=after, makeunique=makeunique, copycols=copycols)
 
 function insertcols!(df::AbstractDataFrame, col::ColumnIndex; after::Bool=false,
-                     makeunique::Bool=false, copycols::Bool=true)
+                     makeunique=false, copycols::Bool=true)
     if col isa SymbolOrString
         col_ind = Int(columnindex(df, col))
         if col_ind == 0
@@ -3173,11 +3170,34 @@ function insertcols!(df::AbstractDataFrame, col::ColumnIndex; after::Bool=false,
 end
 
 function insertcols!(df::AbstractDataFrame; after::Bool=false,
-                     makeunique::Bool=false, copycols::Bool=true)
+                     makeunique=false, copycols::Bool=true)
     _drop_all_nonnote_metadata!(parent(df))
     return df
 end
 
+function insert_unique(dfp, name, col_ind, item_new, makeunique::Bool=false)
+    if makeunique
+        k = 1
+        while true
+            nn = Symbol("$(name)_$k")
+            if !hasproperty(dfp, nn)
+                name = nn
+                break
+            end
+            k += 1
+        end
+        insert!(index(dfp), col_ind, name)
+        insert!(_columns(dfp), col_ind, item_new)
+    end
+    col_ind
+end
+
+function insert_unique(dfp, name, col_ind, item_new, makeunique::Function)
+    # Just update by using function without adding to index
+    dfp[!, name] = makeunique.(dfp[!, name], item_new)
+    col_ind - 1
+end
+
 """
     Iterators.partition(df::AbstractDataFrame, n::Integer)
 
diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl
index 2effb6f2fd..88726676e5 100644
--- a/src/abstractdataframe/reshape.jl
+++ b/src/abstractdataframe/reshape.jl
@@ -723,7 +723,7 @@ Base.transpose(::AbstractDataFrame, args...; kwargs...) =
     permutedims(df::AbstractDataFrame,
                 [src_namescol::Union{Int, Symbol, AbstractString}],
                 [dest_namescol::Union{Symbol, AbstractString}];
-                makeunique::Bool=false, strict::Bool=true)
+                makeunique=false, strict::Bool=true)
 
 Turn `df` on its side such that rows become columns
 and values in the column indexed by `src_namescol` become the names of new columns.
@@ -823,7 +823,7 @@ julia> permutedims(df2, 1, "different_name")
 """
 function Base.permutedims(df::AbstractDataFrame, src_namescol::ColumnIndex,
                           dest_namescol::Union{Symbol, AbstractString};
-                          makeunique::Bool=false, strict::Bool=true)
+                          makeunique=false, strict::Bool=true)
 
     if src_namescol isa Integer
         1 <= src_namescol <= ncol(df) || throw(BoundsError(index(df), src_namescol))
@@ -865,7 +865,7 @@ function Base.permutedims(df::AbstractDataFrame, src_namescol::ColumnIndex,
 end
 
 function Base.permutedims(df::AbstractDataFrame, src_namescol::ColumnIndex;
-                          makeunique::Bool=false, strict::Bool=true)
+                          makeunique=false, strict::Bool=true)
     if src_namescol isa Integer
         1 <= src_namescol <= ncol(df) || throw(BoundsError(index(df), src_namescol))
         dest_namescol = _names(df)[src_namescol]
@@ -883,7 +883,7 @@ function Base.permutedims(df::AbstractDataFrame)
 end
 
 function Base.permutedims(df::AbstractDataFrame, cnames::AbstractVector;
-                          makeunique::Bool=false)
+                          makeunique=false)
     out_df = DataFrame(permutedims(Matrix(df)), cnames, makeunique=makeunique)
     _copy_table_note_metadata!(out_df, df)
     return out_df
diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl
index 3f4afafecf..5c4b110d76 100755
--- a/src/dataframe/dataframe.jl
+++ b/src/dataframe/dataframe.jl
@@ -8,16 +8,16 @@ particularly a `Vector`, `PooledVector` or `CategoricalVector`.
 
 # Constructors
 ```julia
-DataFrame(pairs::Pair...; makeunique::Bool=false, copycols::Bool=true)
-DataFrame(pairs::AbstractVector{<:Pair}; makeunique::Bool=false, copycols::Bool=true)
+DataFrame(pairs::Pair...; makeunique=false, copycols::Bool=true)
+DataFrame(pairs::AbstractVector{<:Pair}; makeunique=false, copycols::Bool=true)
 DataFrame(ds::AbstractDict; copycols::Bool=true)
 DataFrame(; kwargs..., copycols::Bool=true)
 
 DataFrame(table; copycols::Union{Bool, Nothing}=nothing)
 DataFrame(table, names::AbstractVector;
-          makeunique::Bool=false, copycols::Union{Bool, Nothing}=nothing)
+          makeunique=false, copycols::Union{Bool, Nothing}=nothing)
 DataFrame(columns::AbstractVecOrMat, names::AbstractVector;
-          makeunique::Bool=false, copycols::Bool=true)
+          makeunique=false, copycols::Bool=true)
 
 DataFrame(::DataFrameRow; copycols::Bool=true)
 DataFrame(::GroupedDataFrame; copycols::Bool=true, keepkeys::Bool=true)
@@ -88,6 +88,9 @@ By default an error will be raised if duplicates in column names are found. Pass
 in which case they will be suffixed with `_i` (`i` starting at 1 for the first
 duplicate).
 
+If duplicate column names are found and `makeunique` is a Function then the left-hand column is `updated`
+with the ouput of the function applied to the values from the left-hand column and the right-hand column.
+
 If an `AbstractRange` is passed to a `DataFrame` constructor as a column it is
 always collected to a `Vector` (even if `copycols=false`). As a general rule
 `AbstractRange` values are always materialized to a `Vector` by all functions in
@@ -194,7 +197,7 @@ mutable struct DataFrame <: AbstractDataFrame
                        colindex::Index; copycols::Bool=true)
         if length(columns) == length(colindex) == 0
             return new(AbstractVector[], Index(), nothing, nothing, true)
-        elseif length(columns) != length(colindex)
+        elseif length(columns) != column_length(colindex)
             throw(DimensionMismatch("Number of columns ($(length(columns))) and number of " *
                                     "column names ($(length(colindex))) are not equal"))
         end
@@ -232,6 +235,22 @@ mutable struct DataFrame <: AbstractDataFrame
             firstindex(col) != 1 && _onebased_check_error(i, col)
         end
 
+        # process updates if they exist
+        if !isempty(colindex.updates)
+            updated = Vector{Any}(nothing, length(colindex.names))
+            for src in eachindex(colindex.updates)
+                name = colindex.updates[src]
+                dst = colindex.lookup[name]
+                if isnothing(updated[dst])
+                    updated[dst] = columns[src]
+                else
+                    updated[dst] = colindex.updatefun.(updated[dst], columns[src])
+                end
+            end
+            columns = updated
+            colindex = Index(colindex.lookup, colindex.names)
+        end
+
         return new(convert(Vector{AbstractVector}, columns), colindex, nothing, nothing, true)
     end
 end
@@ -254,7 +273,8 @@ end
 
 DataFrame(df::DataFrame; copycols::Bool=true) = copy(df, copycols=copycols)
 
-function DataFrame(pairs::Pair{Symbol, <:Any}...; makeunique::Bool=false,
+function DataFrame(pairs::Pair{Symbol, <:Any}...; 
+                   makeunique=false,
                    copycols::Bool=true)::DataFrame
     colnames = [Symbol(k) for (k, v) in pairs]
     columns = Any[v for (k, v) in pairs]
@@ -262,7 +282,8 @@ function DataFrame(pairs::Pair{Symbol, <:Any}...; makeunique::Bool=false,
                      copycols=copycols)
 end
 
-function DataFrame(pairs::Pair{<:AbstractString, <:Any}...; makeunique::Bool=false,
+function DataFrame(pairs::Pair{<:AbstractString, <:Any}...; 
+                   makeunique=false,
                    copycols::Bool=true)::DataFrame
     colnames = [Symbol(k) for (k, v) in pairs]
     columns = Any[v for (k, v) in pairs]
@@ -271,8 +292,8 @@ function DataFrame(pairs::Pair{<:AbstractString, <:Any}...; makeunique::Bool=fal
 end
 
 # this is needed as a workaround for Tables.jl dispatch
-function DataFrame(pairs::AbstractVector{<:Pair}; makeunique::Bool=false,
-                   copycols::Bool=true)
+function DataFrame(pairs::AbstractVector{<:Pair}; 
+                   makeunique=false, copycols::Bool=true)
     if isempty(pairs)
         return DataFrame()
     else
@@ -334,7 +355,7 @@ function DataFrame(; kwargs...)
 end
 
 function DataFrame(columns::AbstractVector, cnames::AbstractVector{Symbol};
-                   makeunique::Bool=false, copycols::Bool=true)::DataFrame
+                   makeunique=false, copycols::Bool=true)::DataFrame
     if !(eltype(columns) <: AbstractVector) && !all(col -> isa(col, AbstractVector), columns)
         return rename!(DataFrame(columns, copycols=copycols), cnames, makeunique=makeunique)
     end
@@ -351,17 +372,17 @@ function _name2symbol(str::AbstractVector)
 end
 
 DataFrame(columns::AbstractVector, cnames::AbstractVector;
-          makeunique::Bool=false, copycols::Bool=true) =
+          makeunique=false, copycols::Bool=true) =
     DataFrame(columns, _name2symbol(cnames), makeunique=makeunique, copycols=copycols)
 
 DataFrame(columns::AbstractVector{<:AbstractVector}, cnames::AbstractVector{Symbol};
-          makeunique::Bool=false, copycols::Bool=true)::DataFrame =
+          makeunique=false, copycols::Bool=true)::DataFrame =
     DataFrame(collect(AbstractVector, columns),
               Index(convert(Vector{Symbol}, cnames), makeunique=makeunique),
               copycols=copycols)
 
 DataFrame(columns::AbstractVector{<:AbstractVector}, cnames::AbstractVector;
-          makeunique::Bool=false, copycols::Bool=true) =
+          makeunique=false, copycols::Bool=true) =
     DataFrame(columns, _name2symbol(cnames); makeunique=makeunique, copycols=copycols)
 
 function DataFrame(columns::AbstractVector, cnames::Symbol; copycols::Bool=true)
@@ -375,14 +396,14 @@ function DataFrame(columns::AbstractVector, cnames::Symbol; copycols::Bool=true)
 end
 
 function DataFrame(columns::AbstractMatrix, cnames::AbstractVector{Symbol};
-                   makeunique::Bool=false, copycols::Bool=true)
+                   makeunique=false, copycols::Bool=true)
     getter = copycols ? getindex : view
     return DataFrame(AbstractVector[getter(columns, :, i) for i in 1:size(columns, 2)],
                      cnames, makeunique=makeunique, copycols=false)
 end
 
 DataFrame(columns::AbstractMatrix, cnames::AbstractVector;
-          makeunique::Bool=false, copycols::Bool=true) =
+          makeunique=false, copycols::Bool=true) =
     DataFrame(columns, _name2symbol(cnames); makeunique=makeunique, copycols=copycols)
 
 function DataFrame(columns::AbstractMatrix, cnames::Symbol; copycols::Bool=true)
@@ -408,13 +429,13 @@ DataFrame(vecs::Vector{<:AbstractVector}) =
                         "generate column names: `DataFrame(vecs, :auto)`"))
 
 DataFrame(column_eltypes::AbstractVector{<:Type}, cnames::AbstractVector{Symbol},
-          nrows::Integer=0; makeunique::Bool=false) =
+          nrows::Integer=0; makeunique=false) =
     throw(ArgumentError("`DataFrame` constructor with passed eltypes is " *
                         "not supported. Pass explicitly created columns to a " *
                         "`DataFrame` constructor instead."))
 
 DataFrame(column_eltypes::AbstractVector{<:Type}, cnames::AbstractVector{<:AbstractString},
-          nrows::Integer=0; makeunique::Bool=false) =
+          nrows::Integer=0; makeunique=false) =
     throw(ArgumentError("`DataFrame` constructor with passed eltypes is " *
                         "not supported. Pass explicitly created columns to a " *
                         "`DataFrame` constructor instead."))
@@ -1202,7 +1223,7 @@ end
 
 # hcat! for 2 arguments, only a vector or a data frame is allowed
 function hcat!(df1::DataFrame, df2::AbstractDataFrame;
-               makeunique::Bool=false, copycols::Bool=true)
+               makeunique=false, copycols::Bool=true)
     u = add_names(index(df1), index(df2), makeunique=makeunique)
 
     _drop_all_nonnote_metadata!(df1)
@@ -1217,14 +1238,14 @@ end
 
 # TODO: after deprecation remove AbstractVector methods
 
-function hcat!(df::DataFrame, x::AbstractVector; makeunique::Bool=false, copycols::Bool=true)
+function hcat!(df::DataFrame, x::AbstractVector; makeunique=false, copycols::Bool=true)
     Base.depwarn("horizontal concatenation of data frame with a vector is deprecated. " *
                  "Pass DataFrame(x1=x) instead.", :hcat!)
     return hcat!(df, DataFrame(AbstractVector[x], [:x1], copycols=false),
                  makeunique=makeunique, copycols=copycols)
 end
 
-function hcat!(x::AbstractVector, df::DataFrame; makeunique::Bool=false, copycols::Bool=true)
+function hcat!(x::AbstractVector, df::DataFrame; makeunique=false, copycols::Bool=true)
     Base.depwarn("horizontal concatenation of data frame with a vector is deprecated. " *
                  "Pass DataFrame(x1=x) instead.", :hcat!)
     return hcat!(DataFrame(AbstractVector[x], [:x1], copycols=copycols), df,
@@ -1232,14 +1253,14 @@ function hcat!(x::AbstractVector, df::DataFrame; makeunique::Bool=false, copycol
 end
 
 # hcat! for 1-n arguments
-function hcat!(df::DataFrame; makeunique::Bool=false, copycols::Bool=true)
+function hcat!(df::DataFrame; makeunique=false, copycols::Bool=true)
     _drop_all_nonnote_metadata!(df)
     return df
 end
 
 hcat!(a::DataFrame, b::Union{AbstractDataFrame, AbstractVector},
       c::Union{AbstractDataFrame, AbstractVector}...;
-      makeunique::Bool=false, copycols::Bool=true) =
+      makeunique=false, copycols::Bool=true) =
     hcat!(hcat!(a, b, makeunique=makeunique, copycols=copycols),
           c..., makeunique=makeunique, copycols=copycols)
 
diff --git a/src/join/composer.jl b/src/join/composer.jl
index 3cd4e90b3e..c3a50209f5 100644
--- a/src/join/composer.jl
+++ b/src/join/composer.jl
@@ -118,8 +118,10 @@ _rename_cols(old_names::AbstractVector{Symbol},
            for n in old_names]
 
 function _propagate_join_metadata!(joiner::DataFrameJoiner, dfr_noon::AbstractDataFrame,
-                                   res::DataFrame, kind::Symbol)
+                                   res::DataFrame, kind::Symbol;
+                                   makeunique=false, names=nothing)
     @assert kind == :left || kind == :right || kind == :outer || kind == :inner
+    makeunique = _makeunique_normalize(makeunique)
 
     # The steps taken in this function are (all applies only to :note-style metadata):
     # We initially copy metadata from left table as left table is always used
@@ -174,8 +176,17 @@ function _propagate_join_metadata!(joiner::DataFrameJoiner, dfr_noon::AbstractDa
         end
     end
 
-    for i in 1:ncol(dfr_noon)
-        _copy_col_note_metadata!(res, ncol(joiner.dfl) + i, dfr_noon, i)
+    if makeunique isa Bool
+        for i in 1:ncol(dfr_noon)
+            _copy_col_note_metadata!(res, ncol(joiner.dfl) + i, dfr_noon, i)
+        end
+    else
+        map = Index(names, makeunique=makeunique)
+        for i in 1:ncol(dfr_noon)
+            name = map.updates[ncol(joiner.dfl) + i]
+            dst = map.lookup[name]
+            _merge_col_note_metadata!(res, dst, dfr_noon, i)
+        end
     end
 
     if kind == :outer || kind == :inner
@@ -234,8 +245,7 @@ function _count_sortperm!(input::Vector{Int}, count::Vector,
     return output
 end
 
-function compose_inner_table(joiner::DataFrameJoiner,
-                             makeunique::Bool,
+function compose_inner_table(joiner::DataFrameJoiner, makeunique,
                              left_rename::Union{Function, AbstractString, Symbol},
                              right_rename::Union{Function, AbstractString, Symbol},
                              order::Symbol)
@@ -280,7 +290,7 @@ function compose_inner_table(joiner::DataFrameJoiner,
                      _rename_cols(_names(dfr_noon), right_rename))
     res = DataFrame(cols, new_names, makeunique=makeunique, copycols=false)
 
-    _propagate_join_metadata!(joiner, dfr_noon, res, :inner)
+    _propagate_join_metadata!(joiner, dfr_noon, res, :inner, makeunique=makeunique, names=new_names)
     return res
 end
 
@@ -292,7 +302,7 @@ function find_missing_idxs(present::Vector{Int}, target_len::Int)
     return _findall(not_seen)
 end
 
-function compose_joined_table(joiner::DataFrameJoiner, kind::Symbol, makeunique::Bool,
+function compose_joined_table(joiner::DataFrameJoiner, kind::Symbol, makeunique,
                               left_rename::Union{Function, AbstractString, Symbol},
                               right_rename::Union{Function, AbstractString, Symbol},
                               indicator::Union{Nothing, Symbol, AbstractString},
@@ -319,7 +329,7 @@ function compose_joined_table(joiner::DataFrameJoiner, kind::Symbol, makeunique:
                                  leftonly_ixs, rightonly_ixs, order)
 end
 
-function _compose_joined_table(joiner::DataFrameJoiner, kind::Symbol, makeunique::Bool,
+function _compose_joined_table(joiner::DataFrameJoiner, kind::Symbol, makeunique,
                                left_rename::Union{Function, AbstractString, Symbol},
                                right_rename::Union{Function, AbstractString, Symbol},
                                indicator::Union{Nothing, Symbol, AbstractString},
@@ -447,7 +457,7 @@ function _compose_joined_table(joiner::DataFrameJoiner, kind::Symbol, makeunique
         permute!(res, new_order)
     end
 
-    _propagate_join_metadata!(joiner, dfr_noon, res, kind)
+    _propagate_join_metadata!(joiner, dfr_noon, res, kind, makeunique=makeunique, names=new_names)
 
     return res, src_indicator
 end
@@ -484,7 +494,7 @@ function _sort_compose_helper(fillval::Int, # value to use to fill unused indice
 end
 
 function _join(df1::AbstractDataFrame, df2::AbstractDataFrame;
-               on::Union{<:OnType, AbstractVector}, kind::Symbol, makeunique::Bool,
+               on::Union{<:OnType, AbstractVector}, kind::Symbol, makeunique,
                indicator::Union{Nothing, Symbol, AbstractString},
                validate::Union{Pair{Bool, Bool}, Tuple{Bool, Bool}},
                left_rename::Union{Function, AbstractString, Symbol},
@@ -604,9 +614,10 @@ function _join(df1::AbstractDataFrame, df2::AbstractDataFrame;
                                        "both" => 3)
         indicatorcol = PooledArray(PooledArrays.RefArray(src_indicator),
                                    invpool, pool)
-
+                                   
+        makeunique = _makeunique_normalize(makeunique)
         unique_indicator = indicator
-        if makeunique
+        if makeunique == true
             try_idx = 0
             while hasproperty(joined, unique_indicator)
                 try_idx += 1
@@ -614,12 +625,16 @@ function _join(df1::AbstractDataFrame, df2::AbstractDataFrame;
             end
         end
 
-        if hasproperty(joined, unique_indicator)
-            throw(ArgumentError("joined data frame already has column " *
-                                ":$unique_indicator. Pass makeunique=true to " *
-                                "make it unique using a suffix automatically."))
+        if unique_indicator == indicator && !isa(makeunique, Bool)
+            joined[!, indicator] = makeunique.(joined[!, indicator], indicatorcol)
+        else
+            if hasproperty(joined, unique_indicator)
+                throw(ArgumentError("joined data frame already has column " *
+                                    ":$unique_indicator. Pass makeunique=true to " *
+                                    "make it unique using a suffix automatically."))
+            end
+            joined[!, unique_indicator] = indicatorcol
         end
-        joined[!, unique_indicator] = indicatorcol
     else
         @assert isnothing(src_indicator)
     end
@@ -755,7 +770,7 @@ julia> innerjoin(name, job2, on = [:ID => :identifier], renamecols = uppercase =
 """
 function innerjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
                    on::Union{<:OnType, AbstractVector} = Symbol[],
-                   makeunique::Bool=false,
+                   makeunique=false,
                    validate::Union{Pair{Bool, Bool}, Tuple{Bool, Bool}}=(false, false),
                    renamecols::Pair=identity => identity,
                    matchmissing::Symbol=:error,
@@ -772,7 +787,7 @@ end
 
 function innerjoin(df1::AbstractDataFrame, df2::AbstractDataFrame, dfs::AbstractDataFrame...;
                    on::Union{<:OnType, AbstractVector} = Symbol[],
-                   makeunique::Bool=false,
+                   makeunique=false,
                    validate::Union{Pair{Bool, Bool}, Tuple{Bool, Bool}}=(false, false),
                    matchmissing::Symbol=:error,
                    order::Symbol=:undefined)
@@ -813,8 +828,10 @@ change in future releases.
   `isequal`. `on` is a required argument.
 - `makeunique` : if `false` (the default), an error will be raised
   if duplicate names are found in columns not joined on;
-  if `true`, duplicate names will be suffixed with `_i`
+  if `true`, duplicate names will be suffixed with `_i` (deprecated)
   (`i` starting at 1 for the first duplicate).
+  if a Function then combines the duplicated column values by invoking the function
+    with the left-hand values and right-hand values as inputs.
 - `source` : Default: `nothing`. If a `Symbol` or string, adds indicator
   column with the given name, for whether a row appeared in only `df1` (`"left_only"`)
   or in both (`"both"`). If the name is already in use,
@@ -915,12 +932,14 @@ julia> leftjoin(name, job2, on = [:ID => :identifier], renamecols = uppercase =>
 ```
 """
 function leftjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
-                  on::Union{<:OnType, AbstractVector} = Symbol[], makeunique::Bool=false,
+                  on::Union{<:OnType, AbstractVector} = Symbol[], 
+                  makeunique=false,
                   source::Union{Nothing, Symbol, AbstractString}=nothing,
                   indicator::Union{Nothing, Symbol, AbstractString}=nothing,
                   validate::Union{Pair{Bool, Bool}, Tuple{Bool, Bool}}=(false, false),
                   renamecols::Pair=identity => identity, matchmissing::Symbol=:error,
                   order::Symbol=:undefined)
+
     if !all(x -> x isa Union{Function, AbstractString, Symbol}, renamecols)
         throw(ArgumentError("renamecols keyword argument must be a `Pair` " *
                             "containing functions, strings, or `Symbol`s"))
@@ -970,7 +989,9 @@ change in future releases.
 - `makeunique` : if `false` (the default), an error will be raised
   if duplicate names are found in columns not joined on;
   if `true`, duplicate names will be suffixed with `_i`
-  (`i` starting at 1 for the first duplicate).
+  (`i` starting at 1 for the first duplicate); otherwise
+  if a Function then invokes that function with the left-hand side value
+  and the right-hand side value as inputs to produce the output value.
 - `source` : Default: `nothing`. If a `Symbol` or string, adds indicator
   column with the given name for whether a row appeared in only `df2` (`"right_only"`)
   or in both (`"both"`). If the name is already in use,
@@ -1071,7 +1092,8 @@ julia> rightjoin(name, job2, on = [:ID => :identifier], renamecols = uppercase =
 ```
 """
 function rightjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
-                   on::Union{<:OnType, AbstractVector} = Symbol[], makeunique::Bool=false,
+                   on::Union{<:OnType, AbstractVector} = Symbol[], 
+                   makeunique=false,
                    source::Union{Nothing, Symbol, AbstractString}=nothing,
                    indicator::Union{Nothing, Symbol, AbstractString}=nothing,
                    validate::Union{Pair{Bool, Bool}, Tuple{Bool, Bool}}=(false, false),
@@ -1102,7 +1124,7 @@ end
 """
     outerjoin(df1, df2; on, makeunique=false, source=nothing, validate=(false, false),
               renamecols=(identity => identity), matchmissing=:error, order=:undefined)
-    outerjoin(df1, df2, dfs...; on, makeunique = false,
+    outerjoin(df1, df2, dfs...; on, makeunique=false, 
               validate = (false, false), matchmissing=:error, order=:undefined)
 
 Perform an outer join of two or more data frame objects and return a `DataFrame`
@@ -1128,7 +1150,9 @@ This behavior may change in future releases.
 - `makeunique` : if `false` (the default), an error will be raised
   if duplicate names are found in columns not joined on;
   if `true`, duplicate names will be suffixed with `_i`
-  (`i` starting at 1 for the first duplicate).
+  (`i` starting at 1 for the first duplicate), otherwise
+  if a Function then this function will be invoked on the values from the left-hand
+  column and the right-hand side column as inputs to create an output value.
 - `source` : Default: `nothing`. If a `Symbol` or string, adds indicator
   column with the given name for whether a row appeared in only `df1` (`"left_only"`),
   only `df2` (`"right_only"`) or in both (`"both"`). If the name is already in use,
@@ -1240,7 +1264,8 @@ julia> outerjoin(name, job2, on = [:ID => :identifier], renamecols = uppercase =
 ```
 """
 function outerjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
-                   on::Union{<:OnType, AbstractVector} = Symbol[], makeunique::Bool=false,
+                   on::Union{<:OnType, AbstractVector} = Symbol[], 
+                   makeunique=false,
                    source::Union{Nothing, Symbol, AbstractString}=nothing,
                    indicator::Union{Nothing, Symbol, AbstractString}=nothing,
                    validate::Union{Pair{Bool, Bool}, Tuple{Bool, Bool}}=(false, false),
@@ -1269,7 +1294,8 @@ function outerjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
 end
 
 function outerjoin(df1::AbstractDataFrame, df2::AbstractDataFrame, dfs::AbstractDataFrame...;
-                   on::Union{<:OnType, AbstractVector} = Symbol[], makeunique::Bool=false,
+                   on::Union{<:OnType, AbstractVector} = Symbol[], 
+                   makeunique=false,
                    validate::Union{Pair{Bool, Bool}, Tuple{Bool, Bool}}=(false, false),
                    matchmissing::Symbol=:error, order::Symbol=:undefined)
     res = outerjoin(df1, df2, on=on, makeunique=makeunique, validate=validate,
@@ -1384,7 +1410,7 @@ julia> semijoin(name, job2, on = [:ID => :identifier])
 ```
 """
 semijoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
-         on::Union{<:OnType, AbstractVector} = Symbol[], makeunique::Bool=false,
+         on::Union{<:OnType, AbstractVector} = Symbol[], makeunique=false,
          validate::Union{Pair{Bool, Bool}, Tuple{Bool, Bool}}=(false, false),
          matchmissing::Symbol=:error) =
     _join(df1, df2, on=on, kind=:semi, makeunique=makeunique,
@@ -1488,7 +1514,7 @@ julia> antijoin(name, job2, on = [:ID => :identifier])
 ```
 """
 antijoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
-         on::Union{<:OnType, AbstractVector} = Symbol[], makeunique::Bool=false,
+         on::Union{<:OnType, AbstractVector} = Symbol[], makeunique=false,
          validate::Union{Pair{Bool, Bool}, Tuple{Bool, Bool}}=(false, false),
          matchmissing::Symbol=:error) =
     _join(df1, df2, on=on, kind=:anti, makeunique=makeunique,
@@ -1499,7 +1525,7 @@ antijoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
 
 """
     crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
-              makeunique::Bool=false, renamecols=identity => identity)
+              makeunique=false, renamecols=identity => identity)
     crossjoin(df1, df2, dfs...; makeunique = false)
 
 Perform a cross join of two or more data frame objects and return a `DataFrame`
@@ -1565,10 +1591,11 @@ julia> crossjoin(df1, df2)
 ```
 """
 function crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
-                   makeunique::Bool=false, renamecols::Pair=identity => identity)
+                   makeunique=false, renamecols::Pair=identity => identity)
     _check_consistency(df1)
     _check_consistency(df2)
     r1, r2 = size(df1, 1), size(df2, 1)
+    makeunique = _makeunique_normalize(makeunique)
 
     new_names = vcat(_rename_cols(_names(df1), first(renamecols)),
                      _rename_cols(_names(df2), last(renamecols)))
@@ -1579,8 +1606,10 @@ function crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
     for i in 1:ncol(df1)
         _copy_col_note_metadata!(res, i, df1, i)
     end
-    for i in 1:ncol(df2)
-        _copy_col_note_metadata!(res, ncol(df1) + i, df2, i)
+    if makeunique isa Bool
+        for i in 1:ncol(df2)
+            _copy_col_note_metadata!(res, ncol(df1) + i, df2, i)
+        end
     end
 
     _merge_matching_table_note_metadata!(res, (df1, df2))
@@ -1589,13 +1618,13 @@ function crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame;
 end
 
 crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame, dfs::AbstractDataFrame...;
-          makeunique::Bool=false) =
+          makeunique=false) =
     crossjoin(crossjoin(df1, df2, makeunique=makeunique), dfs..., makeunique=makeunique)
 
 # an explicit error is thrown as join was supported in the past
 Base.join(df1::AbstractDataFrame, df2::AbstractDataFrame, dfs::AbstractDataFrame...;
           on::Union{<:OnType, AbstractVector} = Symbol[],
-          kind::Symbol = :inner, makeunique::Bool=false,
+          kind::Symbol = :inner, makeunique=false,
           indicator::Union{Nothing, Symbol} = nothing,
           validate::Union{Pair{Bool, Bool}, Tuple{Bool, Bool}}=(false, false)) =
     throw(ArgumentError("join function for data frames is not supported. Use innerjoin, " *
diff --git a/src/join/inplace.jl b/src/join/inplace.jl
index 9f1a9e0c6c..1cec6b76e7 100644
--- a/src/join/inplace.jl
+++ b/src/join/inplace.jl
@@ -24,7 +24,10 @@ added to `df1`.
 - `makeunique` : if `false` (the default), an error will be raised
   if duplicate names are found in columns not joined on;
   if `true`, duplicate names will be suffixed with `_i`
-  (`i` starting at 1 for the first duplicate).
+  (`i` starting at 1 for the first duplicate); otherwise
+  if a Function then applies that function to the values from the first
+  duplicate column and the second duplicate column to produce the output
+  values. 
 - `source` : Default: `nothing`. If a `Symbol` or string, adds indicator
   column with the given name, for whether a row appeared in only `df1` (`"left_only"`)
   or in both (`"both"`). If the name is already in use,
@@ -95,12 +98,14 @@ julia> leftjoin!(name, job2, on = :ID => :identifier, makeunique=true, source=:s
 ```
 """
 function leftjoin!(df1::AbstractDataFrame, df2::AbstractDataFrame;
-                   on::Union{<:OnType, AbstractVector}=Symbol[], makeunique::Bool=false,
+                   on::Union{<:OnType, AbstractVector}=Symbol[], 
+                   makeunique=false,
                    source::Union{Nothing, Symbol, AbstractString}=nothing,
                    matchmissing::Symbol=:error)
 
     _check_consistency(df1)
     _check_consistency(df2)
+    makeunique = _makeunique_normalize(makeunique)
 
     if !is_column_insertion_allowed(df1)
         throw(ArgumentError("leftjoin! is only supported if `df1` is a `DataFrame`, " *
@@ -114,7 +119,7 @@ function leftjoin!(df1::AbstractDataFrame, df2::AbstractDataFrame;
     joiner = DataFrameJoiner(df1, df2, on, matchmissing, :left)
 
     right_noon_names = names(joiner.dfr, Not(joiner.right_on))
-    if !(makeunique || isempty(intersect(right_noon_names, names(df1))))
+    if makeunique == false && !isempty(intersect(right_noon_names, names(df1)))
         throw(ArgumentError("the following columns are present in both " *
                             "left and right data frames but not listed in `on`: " *
                             join(intersect(right_noon_names, names(df1)), ", ") *
@@ -149,7 +154,7 @@ function leftjoin!(df1::AbstractDataFrame, df2::AbstractDataFrame;
                                    invpool, pool)
 
         unique_indicator = source
-        if makeunique
+        if makeunique == true
             try_idx = 0
             while hasproperty(df1, unique_indicator)
                 try_idx += 1
@@ -158,11 +163,18 @@ function leftjoin!(df1::AbstractDataFrame, df2::AbstractDataFrame;
         end
 
         if hasproperty(df1, unique_indicator)
-            throw(ArgumentError("joined data frame already has column " *
-                                ":$unique_indicator. Pass makeunique=true to " *
-                                "make it unique using a suffix automatically."))
+            if makeunique isa Bool
+                throw(ArgumentError("joined data frame already has column " *
+                ":$unique_indicator. Pass makeunique=true to " *
+                "make it unique using a suffix automatically or a makeunique function " *
+                "to combine left-hand column and right-hand column values."))
+            else
+                df1[!, unique_indicator] = makeunique.(df1[!, unique_indicator], indicatorcol)
+            end
+        else
+            df1[!, unique_indicator] = indicatorcol
         end
-        df1[!, unique_indicator] = indicatorcol
+        
     end
 
     return df1
@@ -192,3 +204,16 @@ function compose_joined_rcol!(rcol::AbstractVector,
     end
     return rcol_joined
 end
+
+function outerjoin!(df1::AbstractDataFrame, df2::AbstractDataFrame;
+    on::Union{<:OnType, AbstractVector}=Symbol[], makeunique=false,
+    source::Union{Nothing, Symbol, AbstractString}=nothing,
+    matchmissing::Symbol=:error)
+
+    leftjoin!(df1, df2, on=on, makeunique=makeunique, source=source, matchmissing=matchmissing)
+
+    aj = antijoin(df2, df1, on=on, makeunique=makeunique, matchmissing=matchmissing)
+    append!(df1, aj)
+
+    return df1
+end
diff --git a/src/other/index.jl b/src/other/index.jl
index 51aa3a31cc..805e65d7d6 100644
--- a/src/other/index.jl
+++ b/src/other/index.jl
@@ -21,18 +21,30 @@ const MULTICOLUMNINDEX_STR = "`:`, `Cols`, `All`, `Between`, `Not`, a regular ex
 struct Index <: AbstractIndex   # an OrderedDict would be nice here...
     lookup::Dict{Symbol, Int}      # name => names array position
     names::Vector{Symbol}
+    updates::Vector{Symbol}
+    updatefun::Function
 end
 
-function Index(names::AbstractVector{Symbol}; makeunique::Bool=false)
-    u = make_unique(names, makeunique=makeunique)
-    lookup = Dict{Symbol, Int}(zip(u, 1:length(u)))
-    return Index(lookup, u)
+Index(l,u) = Index(l,u,[],() -> nothing)
+
+function Index(names::AbstractVector{Symbol}; makeunique=false)
+    makeunique = _makeunique_normalize(makeunique)
+    if makeunique isa Bool
+        u = make_unique(names, makeunique=makeunique)
+        lookup = Dict{Symbol, Int}(zip(u, 1:length(u)))
+        return Index(lookup, u)
+    else
+        lookup = Dict{Symbol, Int}(zip(reverse(names), length(names):-1:1))
+        return Index(lookup, unique(names), names, makeunique)
+    end
 end
 
 Index() = Index(Dict{Symbol, Int}(), Symbol[])
 Base.length(x::Index) = length(x.names)
 Base.names(x::Index) = string.(x.names)
 
+column_length(x::Index) = isempty(x.updates) ? length(x.names) : length(x.updates)
+
 # _names returns Vector{Symbol}
 _names(x::Index) = x.names
 
@@ -41,8 +53,9 @@ Base.isequal(x::AbstractIndex, y::AbstractIndex) = _names(x) == _names(y) # it i
 Base.:(==)(x::AbstractIndex, y::AbstractIndex) = isequal(x, y)
 
 
-function rename!(x::Index, nms::AbstractVector{Symbol}; makeunique::Bool=false)
-    if !makeunique
+function rename!(x::Index, nms::AbstractVector{Symbol}; makeunique=false)
+    makeunique = _makeunique_normalize(makeunique)
+    if makeunique == false
         if length(unique(nms)) != length(nms)
             dup = unique(nms[nonunique(DataFrame(nms=nms))])
             dupstr = join(string.(':', dup), ", ", " and ")
@@ -128,7 +141,7 @@ function Base.push!(x::Index, nm::Symbol)
     return x
 end
 
-function Base.merge!(x::Index, y::AbstractIndex; makeunique::Bool=false)
+function Base.merge!(x::Index, y::AbstractIndex; makeunique=false)
     adds = add_names(x, y, makeunique=makeunique)
     i = length(x)
     for add in adds
@@ -139,7 +152,7 @@ function Base.merge!(x::Index, y::AbstractIndex; makeunique::Bool=false)
     return x
 end
 
-Base.merge(x::AbstractIndex, y::AbstractIndex; makeunique::Bool=false) =
+Base.merge(x::AbstractIndex, y::AbstractIndex; makeunique=false) =
     merge!(copy(x), y, makeunique=makeunique)
 
 function Base.delete!(x::Index, idx::Integer)
@@ -432,8 +445,9 @@ end
 
 # return Vector{Symbol} of names from add_ind that do not clash with `ind`.
 # if `makeunique=false` error on collision
-# if `makeunique=false` generate new names that are deduplicated
-function add_names(ind::Index, add_ind::AbstractIndex; makeunique::Bool=false)
+# if `makeunique=true` generate new names that are deduplicated
+# if `makeunique` is a Function just return the names including duplicates
+function add_names(ind::Index, add_ind::AbstractIndex; makeunique=false)
     u = copy(_names(add_ind))
 
     seen = Set(_names(ind))
@@ -443,6 +457,12 @@ function add_names(ind::Index, add_ind::AbstractIndex; makeunique::Bool=false)
         name = u[i]
         in(name, seen) ? push!(dups, i) : push!(seen, name)
     end
+
+    makeunique = _makeunique_normalize(makeunique)
+    return nondup_names(u, dups, seen, makeunique)
+end
+
+function nondup_names(u, dups, seen, makeunique::Bool)
     if length(dups) > 0
         if !makeunique
             dupstr = join(string.(':', unique(u[dups])), ", ", " and ")
@@ -451,6 +471,7 @@ function add_names(ind::Index, add_ind::AbstractIndex; makeunique::Bool=false)
             throw(ArgumentError(msg))
         end
     end
+
     for i in dups
         nm = u[i]
         k = 1
@@ -468,6 +489,10 @@ function add_names(ind::Index, add_ind::AbstractIndex; makeunique::Bool=false)
     return u
 end
 
+function nondup_names(u, dups, seen, makeunique::Function)
+    return u
+end
+
 @inline parentcols(ind::Index) = Base.OneTo(length(ind))
 @inline parentcols(ind::Index, cols) = ind[cols]
 
@@ -578,7 +603,7 @@ function Base.getindex(x::SubIndex, idx::Union{AbstractVector{Symbol},
     return [x[i] for i in idx]
 end
 
-rename!(x::SubIndex, nms::AbstractVector{Symbol}; makeunique::Bool=false) =
+rename!(x::SubIndex, nms::AbstractVector{Symbol}; makeunique=false) =
     throw(ArgumentError("rename! is not supported for views other than created " *
                         "with Colon as a column selector"))
 
diff --git a/src/other/metadata.jl b/src/other/metadata.jl
index 60a283d5a0..38ac848769 100644
--- a/src/other/metadata.jl
+++ b/src/other/metadata.jl
@@ -705,6 +705,24 @@ function _copy_col_note_metadata!(dst::DataFrame, dst_col, src, src_col)
     return nothing
 end
 
+# copy column-level :note-style metadata from Tables.jl table src to dst
+# from column src_col to dst_col
+# discarding previous metadata contents of dst
+function _merge_col_note_metadata!(dst::DataFrame, dst_col, src, src_col)
+    #emptycolmetadata!(dst, dst_col)
+    metadata = colmetadata(dst, dst_col)
+    if DataAPI.colmetadatasupport(typeof(src)).read
+        for key in colmetadatakeys(src, src_col)
+            val, style = colmetadata(src, src_col, key, style=true)
+            # TODO write only if does not overwrite
+            if style === :note && !haskey(metadata, key)
+                colmetadata!(dst, dst_col, key, val, style=:note)
+            end
+        end
+    end
+    return nothing
+end
+
 # this is a function used to copy table-level and column-level :note-style metadata
 # from Tables.jl table src to dst, discarding previous metadata contents of dst
 function _copy_all_note_metadata!(dst::DataFrame, src)
diff --git a/src/other/tables.jl b/src/other/tables.jl
index 4213c98881..2fdb9a559e 100644
--- a/src/other/tables.jl
+++ b/src/other/tables.jl
@@ -64,7 +64,7 @@ end
 
 # the logic here relies on the fact that Tables.CopiedColumns
 # is the only exception for default copycols value 
-DataFrame(x, cnames::AbstractVector; makeunique::Bool=false,
+DataFrame(x, cnames::AbstractVector; makeunique=false,
           copycols::Union{Nothing, Bool}=nothing) =
     rename!(DataFrame(x, copycols=something(copycols, !(x isa Tables.CopiedColumns))),
             _name2symbol(cnames),
diff --git a/src/other/utils.jl b/src/other/utils.jl
index 455c406f46..7a4fe8a446 100644
--- a/src/other/utils.jl
+++ b/src/other/utils.jl
@@ -72,10 +72,20 @@ struct AsTable
     end
 end
 
+makeunique_update(v1, v2) = ismissing(v2) ? v1 : v2
+makeunique_ignore(v1, v2) = v1
+
+_makeunique_keys = Dict(:update => makeunique_update, 
+    :ignore => makeunique_ignore, 
+    :error => false, 
+    :makeunique => true)
+
+_makeunique_normalize(makeunique) = get(_makeunique_keys, makeunique, makeunique)
+
 Base.broadcastable(x::AsTable) = Ref(x)
 
 function make_unique!(names::Vector{Symbol}, src::AbstractVector{Symbol};
-                      makeunique::Bool=false)
+                      makeunique=false)
     if length(names) != length(src)
         throw(DimensionMismatch("Length of src doesn't match length of names."))
     end
@@ -92,7 +102,7 @@ function make_unique!(names::Vector{Symbol}, src::AbstractVector{Symbol};
     end
 
     if length(dups) > 0
-        if !makeunique
+        if makeunique == false
             dupstr = join(string.(':', unique(src[dups])), ", ", " and ")
             msg = "Duplicate variable names: $dupstr. Pass makeunique=true " *
                   "to make them unique using a suffix automatically."
@@ -102,22 +112,26 @@ function make_unique!(names::Vector{Symbol}, src::AbstractVector{Symbol};
 
     for i in dups
         nm = src[i]
-        k = 1
-        while true
-            newnm = Symbol("$(nm)_$k")
-            if !in(newnm, seen)
-                names[i] = newnm
-                push!(seen, newnm)
-                break
+        if makeunique == true
+            k = 1
+            while true
+                newnm = Symbol("$(nm)_$k")
+                if !in(newnm, seen)
+                    names[i] = newnm
+                    push!(seen, newnm)
+                    break
+                end
+                k += 1
             end
-            k += 1
+        else
+            names[i] = nm
         end
     end
 
     return names
 end
 
-function make_unique(names::AbstractVector{Symbol}; makeunique::Bool=false)
+function make_unique(names::AbstractVector{Symbol}; makeunique=false)
     make_unique!(similar(names), names, makeunique=makeunique)
 end
 
diff --git a/test/cat.jl b/test/cat.jl
index b5aa1cfd9b..888ff3a92f 100644
--- a/test/cat.jl
+++ b/test/cat.jl
@@ -20,6 +20,12 @@ const ≅ = isequal
     @test dfh[!, :x1] ≅ df3[!, :x1]
     @test dfh ≅ DataFrames.hcat!(DataFrame(), df3, df4, makeunique=true)
 
+    dfhu = hcat(df3, df4, makeunique=:update)
+    @test ref_df ≅ df3 # make sure that df3 is not mutated by hcat
+    @test size(dfhu, 2) == 2
+    @test names(dfhu) ≅ ["x1", "x2"]
+    @test ! (dfhu[!, :x1] ≅ df3[!, :x1])
+
     dfa = DataFrame(a=[1, 2])
     dfb = DataFrame(b=[3, missing])
     @test hcat(dfa, dfb) ≅ [dfa dfb]
@@ -30,6 +36,11 @@ const ≅ = isequal
     @test dfh3 ≅ DataFrames.hcat!(DataFrame(), df3, df4, df5, makeunique=true)
 
     @test df2 ≅ DataFrames.hcat!(df2, makeunique=true)
+
+    dfh3 = hcat(df3, df4, df5, makeunique=:update)
+    @test names(dfh3) == ["x1", "x2"]
+    @test dfh3 ≅ hcat(dfhu, df5, makeunique=:update)
+    @test dfh3 ≅ DataFrames.hcat!(DataFrame(), df3, df4, df5, makeunique=:update)
 end
 
 @testset "hcat: copying" begin
@@ -56,6 +67,8 @@ end
     @test hdf[!, 1] !== hdf[!, 3]
     @test hdf[!, 2] == hdf[!, 3]
     @test hdf[!, 2] !== hdf[!, 3]
+    hdf = hcat(df, df, makeunique=:update)
+    @test hdf ≅ df
 end
 
 @testset "hcat ::AbstractDataFrame" begin
diff --git a/test/dataframe.jl b/test/dataframe.jl
index 971d7626dd..340beff446 100644
--- a/test/dataframe.jl
+++ b/test/dataframe.jl
@@ -152,6 +152,7 @@ end
 
     @test names(rename(df, [:f, :g])) == ["f", "g"]
     @test names(rename(df, [:f, :f], makeunique=true)) == ["f", "f_1"]
+    @test names(rename(df, [:f, :f], makeunique=:update)) == ["f", "f"]
     @test names(df) == ["a", "b"]
 
     rename!(df, [:f, :g])
@@ -253,11 +254,18 @@ end
 
     df = DataFrame(a=[1, 2], a_1=[3, 4])
     @test_throws ArgumentError insertcols!(df, 1, :a => [11, 12])
-    @test df == DataFrame(a=[1, 2], a_1=[3, 4])
+
+    df = DataFrame(a=[1, 2], a_1=[3, 4])
     insertcols!(df, 1, :a => [11, 12], makeunique=true)
     @test propertynames(df) == [:a_2, :a, :a_1]
     insertcols!(df, 4, :a => [11, 12], makeunique=true)
     @test propertynames(df) == [:a_2, :a, :a_1, :a_3]
+
+    df = DataFrame(a=[1, 2], a_1=[3, 4])
+    insertcols!(df, 1, :a => [11, 12], makeunique=:update)
+    @test propertynames(df) == [:a, :a_1]
+    @test df == DataFrame(a=[11, 12], a_1=[3, 4])
+
     @test_throws ArgumentError insertcols!(df, 10, :a => [11, 12], makeunique=true)
 
     dfc = copy(df)
@@ -303,6 +311,11 @@ end
     @test df.a_1 === v2
     @test df.a_2 === v3
 
+    df = DataFrame()
+    @test insertcols!(df, 1, :a=>v1, :a=>v2, :a=>v3, makeunique=:update, copycols=false) ==
+          DataFrame(a=v3)
+    @test df.a isa Vector{Int}
+
     df = DataFrame(p='a':'b', q='r':'s')
     @test insertcols!(df, 2, :a=>v1, :b=>v2, :c=>v3) ==
           DataFrame(p='a':'b', a=v1, b=v2, c=v3, q='r':'s')
@@ -319,6 +332,11 @@ end
     @test df.q_1 !== v2
     @test df.p_2 !== v3
 
+    df = DataFrame(p='a':'b', q='r':'s')
+    @test_throws ArgumentError insertcols!(df, 2, :p=>v1, :q=>v2, :p=>v3)
+    @test insertcols!(df, 2, :p=>v1, :q=>v2, :p=>v3, makeunique=:update, copycols=true) ==
+          DataFrame(p=v3, q=v2)
+
     df = DataFrame(a=1:3, b=4:6)
     @test insertcols!(copy(df), :c=>7:9) == insertcols!(copy(df), 3, :c=>7:9)
     df = DataFrame()
diff --git a/test/join.jl b/test/join.jl
index 478cca98d3..81b76d052f 100644
--- a/test/join.jl
+++ b/test/join.jl
@@ -182,12 +182,12 @@ end
     @test typeof.(eachcol(crossjoin(df1, df2, makeunique=true))) ==
         [Vector{Int}, Vector{Float64}, Vector{Int}, Vector{Float64}]
 
-    i(on) = innerjoin(df1, df2, on=on, makeunique=true)
-    l(on) = leftjoin(df1, df2, on=on, makeunique=true)
-    r(on) = rightjoin(df1, df2, on=on, makeunique=true)
-    o(on) = outerjoin(df1, df2, on=on, makeunique=true)
-    s(on) = semijoin(df1, df2, on=on, makeunique=true)
-    a(on) = antijoin(df1, df2, on=on, makeunique=true)
+    i(on,makeunique=true) = innerjoin(df1, df2, on=on, makeunique=makeunique)
+    l(on,makeunique=true) = leftjoin(df1, df2, on=on, makeunique=makeunique)
+    r(on,makeunique=true) = rightjoin(df1, df2, on=on, makeunique=makeunique)
+    o(on,makeunique=true) = outerjoin(df1, df2, on=on, makeunique=makeunique)
+    s(on,makeunique=true) = semijoin(df1, df2, on=on, makeunique=makeunique)
+    a(on,makeunique=true) = antijoin(df1, df2, on=on, makeunique=makeunique)
 
     @test s(:id) ==
           s(:fid) ==
@@ -251,6 +251,77 @@ end
     @test typeof.(eachcol(o(on))) == [Vector{Int}, Vector{Float64}]
 end
 
+@testset "update joins" begin
+    df1 = DataFrame(Any[[1, 3, 5], [1.0, 3.0, 5.0]], [:id, :fid])
+    df2 = DataFrame(Any[[0, 1, 2, 3, 4], [0.0, 1.0, 2.0, 3.0, 4.0]], [:id, :fid])
+
+    update = DataFrames.makeunique_update
+
+    @test crossjoin(df1, df2, makeunique=update) ==
+        DataFrame(Any[repeat([0, 1, 2, 3, 4], outer=3),
+                      repeat([0.0, 1.0, 2.0, 3.0, 4.0], outer=3)],
+                  [:id, :fid])
+    @test crossjoin(df1, df2, makeunique=:update) ==
+        DataFrame(Any[repeat([0, 1, 2, 3, 4], outer=3),
+                      repeat([0.0, 1.0, 2.0, 3.0, 4.0], outer=3)],
+                  [:id, :fid])
+
+    i(on,makeunique=:update) = innerjoin(df1, df2, on=on, makeunique=makeunique)
+    l(on,makeunique=:update) = leftjoin(df1, df2, on=on, makeunique=makeunique)
+    r(on,makeunique=:update) = rightjoin(df1, df2, on=on, makeunique=makeunique)
+    o(on,makeunique=:update) = outerjoin(df1, df2, on=on, makeunique=makeunique)
+    s(on,makeunique=:update) = semijoin(df1, df2, on=on, makeunique=makeunique)
+    a(on,makeunique=:update) = antijoin(df1, df2, on=on, makeunique=makeunique)
+
+    @test s(:id) ==
+          s(:fid) ==
+          s([:id, :fid]) == DataFrame([[1, 3], [1, 3]], [:id, :fid])
+    @test typeof.(eachcol(s(:id))) ==
+          typeof.(eachcol(s(:fid))) ==
+          typeof.(eachcol(s([:id, :fid]))) == [Vector{Int}, Vector{Float64}]
+    @test a(:id) ==
+          a(:fid) ==
+          a([:id, :fid]) == DataFrame([[5], [5]], [:id, :fid])
+    @test typeof.(eachcol(a(:id))) ==
+          typeof.(eachcol(a(:fid))) ==
+          typeof.(eachcol(a([:id, :fid]))) == [Vector{Int}, Vector{Float64}]
+
+    on = :id
+    @test i(on) == DataFrame([[1, 3], [1, 3]], [:id, :fid])
+    @test typeof.(eachcol(i(on))) == [Vector{Int}, Vector{Float64}]
+    @test l(on) ≅ DataFrame(id=[1, 3, 5], fid=[1, 3, 5])
+    @test typeof.(eachcol(l(on))) == [Vector{Int}, Vector{Float64}]
+    @test r(on) ≅ DataFrame(id=[1, 3, 0, 2, 4], fid=[1, 3, 0, 2, 4])
+    @test typeof.(eachcol(r(on))) == [Vector{Int}, Vector{Float64}]
+    @test o(on) ≅ DataFrame(id=[1, 3, 5, 0, 2, 4],
+                            fid=[1, 3, 5, 0, 2, 4])
+    @test typeof.(eachcol(o(on))) == [Vector{Int}, Vector{Float64}]
+
+    on = :fid
+    df1.id = [1, missing, 5]
+    @test i(on) == DataFrame([[1, 3], [1.0, 3.0]], [:id, :fid])
+    @test typeof.(eachcol(i(on))) == [Vector{Int}, Vector{Float64}]
+    @test l(on) ≅ DataFrame(id=[1, 3, 5], fid=[1, 3, 5])
+    @test typeof.(eachcol(l(on))) == [Vector{Int}, Vector{Float64}]
+    @test r(on) ≅ DataFrame(id=[1, 3, 0, 2, 4],
+                            fid=[1, 3, 0, 2, 4])
+    @test typeof.(eachcol(r(on))) == [Vector{Int}, Vector{Float64}]
+    @test o(on) ≅ DataFrame(id=[1, 3, 5, 0, 2, 4],
+                            fid=[1, 3, 5, 0, 2, 4])
+    @test typeof.(eachcol(o(on))) == [Vector{Int}, Vector{Float64}]
+
+    on = [:id, :fid]
+    df1.id = [1, 3, 5]
+    @test i(on) == DataFrame([[1, 3], [1, 3]], [:id, :fid])
+    @test typeof.(eachcol(i(on))) == [Vector{Int}, Vector{Float64}]
+    @test l(on) == DataFrame(id=[1, 3, 5], fid=[1, 3, 5])
+    @test typeof.(eachcol(l(on))) == [Vector{Int}, Vector{Float64}]
+    @test r(on) == DataFrame(id=[1, 3, 0, 2, 4], fid=[1, 3, 0, 2, 4])
+    @test typeof.(eachcol(r(on))) == [Vector{Int}, Vector{Float64}]
+    @test o(on) == DataFrame(id=[1, 3, 5, 0, 2, 4], fid=[1, 3, 5, 0, 2, 4])
+    @test typeof.(eachcol(o(on))) == [Vector{Int}, Vector{Float64}]
+end
+
 @testset "all joins with CategoricalArrays" begin
     df1 = DataFrame(Any[CategoricalArray([1, 3, 5]),
                         CategoricalArray([1.0, 3.0, 5.0])], [:id, :fid])