dotnet · suhsteve · Oct 2, 2020 · Apr 20, 2020 · Apr 20, 2020 · May 6, 2020
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameWriterV2Tests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameWriterV2Tests.cs
@@ -0,0 +1,79 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Generic;
+using Microsoft.Spark.E2ETest.Utils;
+using Microsoft.Spark.Sql;
+using Xunit;
+
+namespace Microsoft.Spark.E2ETest.IpcTests
+{
+    [Collection("Spark E2E Tests")]
+    public class DataFrameWriterV2Tests
+    {
+        private readonly SparkSession _spark;
+
+        public DataFrameWriterV2Tests(SparkFixture fixture)
+        {
+            _spark = fixture.Spark;
+        }
+
+        /// <summary>
+        /// Test signatures for APIs introduced in Spark 3.*.
+        /// </summary>
+        [SkipIfSparkVersionIsLessThan(Versions.V3_0_0)]
+        public void TestSignaturesV3_0_X()
+        {
+            DataFrame df = _spark
+                .Read()
+                .Schema("age INT, name STRING")
+                .Json($"{TestEnvironment.ResourceDirectory}people.json");
+
+            DataFrameWriterV2 dfwV2 = df.WriteTo("testtable");
+
+            Assert.IsType<DataFrameWriterV2>(dfwV2.Using("json"));
+
+            Assert.IsType<DataFrameWriterV2>(dfwV2.Option("key1", "value"));
+            Assert.IsType<DataFrameWriterV2>(dfwV2.Option("key2", true));
+            Assert.IsType<DataFrameWriterV2>(dfwV2.Option("key3", 1L));
+            Assert.IsType<DataFrameWriterV2>(dfwV2.Option("key4", 2D));
+
+            Assert.IsType<DataFrameWriterV2>(dfwV2.Options(
+                new Dictionary<string, string>() { { "key", "value" } }));
+
+            Assert.IsType<DataFrameWriterV2>(dfwV2.TableProperty("prop", "value"));
+
+            _spark.Sql("DROP TABLE IF EXISTS default.testtable");
+            dfwV2.Create();
+
+            Assert.IsType<DataFrameWriterV2>(dfwV2.PartitionedBy(df.Col("age")));
+
+            // Throws the following exception:
+            // org.apache.spark.sql.AnalysisException: REPLACE TABLE AS SELECT is only supported
+            // with v2 tables.
+            Assert.Throws<Exception>(() => dfwV2.Replace());
+
+            // Throws the following exception:
+            // org.apache.spark.sql.AnalysisException: REPLACE TABLE AS SELECT is only supported
+            // with v2 tables.
+            Assert.Throws<Exception>(() => dfwV2.CreateOrReplace());
+
+            // Throws the following exception:
+            // org.apache.spark.sql.AnalysisException: Table default.testtable does not support
+            // append in batch mode.
+            Assert.Throws<Exception>(() => dfwV2.Append());
+
+            // Throws the following exception:
+            // org.apache.spark.sql.AnalysisException: Table default.testtable does not support
+            // overwrite by filter in batch mode.
+            Assert.Throws<Exception>(() => dfwV2.Overwrite(df.Col("age")));
+
+            // Throws the following exception:
+            // org.apache.spark.sql.AnalysisException: Table default.testtable does not support
+            // dynamic overwrite in batch mode.
+            Assert.Throws<Exception>(() => dfwV2.OverwritePartitions());
+        }
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/Sql/DataFrame.cs b/src/csharp/Microsoft.Spark/Sql/DataFrame.cs
@@ -535,6 +535,15 @@ public DataFrame Agg(Column expr, params Column[] exprs) =>
         public DataFrame Observe(string name, Column expr, params Column[] exprs) =>
             WrapAsDataFrame(_jvmObject.Invoke("observe", name, expr, exprs));
 
+        /// <summary>
+        /// Create a write configuration builder for v2 sources.
+        /// </summary>
+        /// <param name="table">Name of table to write to</param>
+        /// <returns>DataFrameWriterV2 object</returns>
+        [Since(Versions.V3_0_0)]
+        public DataFrameWriterV2 WriteTo(string table) =>
+            new DataFrameWriterV2((JvmObjectReference)_jvmObject.Invoke("writeTo", table));
+
         /// <summary>
         /// Returns a new `DataFrame` by taking the first `number` rows.
         /// </summary>

diff --git a/src/csharp/Microsoft.Spark/Sql/DataFrameWriterV2.cs b/src/csharp/Microsoft.Spark/Sql/DataFrameWriterV2.cs
@@ -0,0 +1,153 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Collections.Generic;
+using Microsoft.Spark.Interop.Ipc;
+
+namespace Microsoft.Spark.Sql
+{
+    /// <summary>
+    /// Interface used to write a <see cref="DataFrame"/> to external storage using the v2
+    /// API.
+    /// </summary>
+    [Since(Versions.V3_0_0)]
+    public sealed class DataFrameWriterV2 : IJvmObjectReferenceProvider
+    {
+        private readonly JvmObjectReference _jvmObject;
+
+        internal DataFrameWriterV2(JvmObjectReference jvmObject) => _jvmObject = jvmObject;
+
+        JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject;
+
+        /// <summary>
+        /// Specifies a provider for the underlying output data source. Spark's default catalog
+        /// supports "parquet", "json", etc.
+        /// </summary>
+        /// <param name="provider">Provider name</param>
+        /// <returns>This DataFrameWriterV2 object</returns>
+        public DataFrameWriterV2 Using(string provider)
+        {
+            _jvmObject.Invoke("using", provider);
+            return this;
+        }
+
+        /// <summary>
+        /// Adds an output option for the underlying data source.
+        /// </summary>
+        /// <param name="key">Name of the option</param>
+        /// <param name="value">string value of the option</param>
+        /// <returns>This DataFrameWriterV2 object</returns>
+        public DataFrameWriterV2 Option(string key, string value)
+        {
+            _jvmObject.Invoke("option", key, value);
+            return this;
+        }
+
+        /// <summary>
+        /// Adds an output option for the underlying data source.
+        /// </summary>
+        /// <param name="key">Name of the option</param>
+        /// <param name="value">bool value of the option</param>
+        /// <returns>This DataFrameWriterV2 object</returns>
+        public DataFrameWriterV2 Option(string key, bool value)
+        {
+            _jvmObject.Invoke("option", key, value);
+            return this;
+        }
+
+        /// <summary>
+        /// Adds an output option for the underlying data source.
+        /// </summary>
+        /// <param name="key">Name of the option</param>
+        /// <param name="value">Long value of the option</param>
+        /// <returns>This DataFrameWriterV2 object</returns>
+        public DataFrameWriterV2 Option(string key, long value)
+        {
+            _jvmObject.Invoke("option", key, value);
+            return this;
+        }
+
+        /// <summary>
+        /// Adds an output option for the underlying data source.
+        /// </summary>
+        /// <param name="key">Name of the option</param>
+        /// <param name="value">Double value of the option</param>
+        /// <returns>This DataFrameWriterV2 object</returns>
+        public DataFrameWriterV2 Option(string key, double value)
+        {
+            _jvmObject.Invoke("option", key, value);
+            return this;
+        }
+
+        /// <summary>
+        /// Adds output options for the underlying data source.
+        /// </summary>
+        /// <param name="options">Key/value options</param>
+        /// <returns>This DataFrameWriterV2 object</returns>
+        public DataFrameWriterV2 Options(Dictionary<string, string> options)
+        {
+            _jvmObject.Invoke("options", options);
+            return this;
+        }
+
+        /// <summary>
+        /// Add a table property.
+        /// </summary>
+        /// <param name="property">Name of property</param>
+        /// <param name="value">Value of the property</param>
+        /// <returns>This DataFrameWriterV2 object</returns>
+        public DataFrameWriterV2 TableProperty(string property, string value)
+        {
+            _jvmObject.Invoke("tableProperty", property, value);
+            return this;
+        }
+
+        /// <summary>
+        /// Partition the output table created by <see cref="Create"/>,
+        /// <see cref="CreateOrReplace"/>, or <see cref="Replace"/> using the given columns or
+        /// transforms.
+        /// </summary>
+        /// <param name="column">Column name to partition on</param>
+        /// <param name="columns">Columns to partition on</param>
+        /// <returns>This DataFrameWriterV2 object</returns>
+        public DataFrameWriterV2 PartitionedBy(Column column, params Column[] columns)
+        {
+            _jvmObject.Invoke("partitionedBy", column, columns);
+            return this;
+        }
+
+        /// <summary>
+        /// Create a new table from the contents of the data frame.
+        /// </summary>
+        public void Create() => _jvmObject.Invoke("create");
+
+        /// <summary>
+        /// Replace an existing table with the contents of the data frame.
+        /// </summary>
+        public void Replace() => _jvmObject.Invoke("replace");
+
+        /// <summary>
+        /// Create a new table or replace an existing table with the contents of the data frame.
+        /// </summary>
+        public void CreateOrReplace() => _jvmObject.Invoke("createOrReplace");
+
+        /// <summary>
+        /// Append the contents of the data frame to the output table.
+        /// </summary>
+        public void Append() => _jvmObject.Invoke("append");
+
+        /// <summary>
+        /// Overwrite rows matching the given filter condition with the contents of the data frame
+        /// in the output table.
+        /// </summary>
+        /// <param name="condition">Condition filter to overwrite based on</param>
+        public void Overwrite(Column condition) => _jvmObject.Invoke("overwrite", condition);
+
+        /// <summary>
+        /// Overwrite all partition for which the data frame contains at least one row with the
+        /// contents of the data frame in the output table.
+        /// </summary>
+        public void OverwritePartitions() => _jvmObject.Invoke("overwritePartitions");
+    }
+}