trinodb
diff --git a/‎.github/workflows/ci.yml
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/ci.yml
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/src/main/sphinx/connector/redshift.md
Lines changed: 32 additions & 0 deletions b/‎docs/src/main/sphinx/connector/redshift.md
Lines changed: 32 additions & 0 deletions
diff --git a/‎plugin/trino-redshift/pom.xml
Lines changed: 15 additions & 0 deletions b/‎plugin/trino-redshift/pom.xml
Lines changed: 15 additions & 0 deletions
diff --git a/‎plugin/trino-redshift/src/main/java/io/trino/plugin/redshift/RedshiftBatchedInsertsCopyPageSink.java
Lines changed: 225 additions & 0 deletions b/‎plugin/trino-redshift/src/main/java/io/trino/plugin/redshift/RedshiftBatchedInsertsCopyPageSink.java
Lines changed: 225 additions & 0 deletions
@@ -737,6 +737,7 @@ jobs:
           REDSHIFT_VPC_SECURITY_GROUP_IDS: ${{ vars.REDSHIFT_VPC_SECURITY_GROUP_IDS }}
           REDSHIFT_S3_TPCH_TABLES_ROOT: ${{ vars.REDSHIFT_S3_TPCH_TABLES_ROOT }}
           REDSHIFT_S3_UNLOAD_ROOT: ${{ vars.REDSHIFT_S3_UNLOAD_ROOT }}
+          REDSHIFT_S3_COPY_ROOT: ${{ vars.REDSHIFT_S3_COPY_ROOT }}
         if: >-
           contains(matrix.modules, 'trino-redshift') &&
           (contains(matrix.profile, 'cloud-tests') || contains(matrix.profile, 'fte-tests')) &&
@@ -750,6 +751,7 @@ jobs:
             -Dtest.redshift.jdbc.endpoint="${REDSHIFT_ENDPOINT}:${REDSHIFT_PORT}/" \
             -Dtest.redshift.s3.tpch.tables.root="${REDSHIFT_S3_TPCH_TABLES_ROOT}" \
             -Dtest.redshift.s3.unload.root="${REDSHIFT_S3_UNLOAD_ROOT}" \
+            -Dtest.redshift.s3.copy.root="${REDSHIFT_S3_COPY_ROOT}" \
             -Dtest.redshift.iam.role="${REDSHIFT_IAM_ROLES}" \
             -Dtest.redshift.aws.region="${AWS_REGION}" \
             -Dtest.redshift.aws.access-key="${AWS_ACCESS_KEY_ID}" \
 
@@ -262,3 +262,35 @@ potentially re-activate it again afterward.
 Additionally, define further required [S3 configuration such as IAM key, role,
 or region](/object-storage/file-system-s3), except `fs.native-s3.enabled`,
 
+### Batch inserts from S3 using Redshift `COPY FROM`
+
+The connector supports the Redshift `COPY FROM` command to 
+efficiently write large batches of data to Redshift by staging them as 
+Parquet files on Amazon S3. This method significantly improves sink 
+performance compared to the default JDBC batch inserts for Redshift.
+
+To enable this feature, configure a writeable S3 location with the 
+following configuration properties:
+
+:::{list-table} Parallel write configuration properties
+:widths: 30, 60
+:header-rows: 1
+
+* - Property
+   - Description
+* - `redshift.batched-inserts-copy-location`
+   - A writeable location in Amazon S3 in the same AWS region as the 
+     Redshift cluster. Used for temporary Parquet staging files during 
+     insert operations. These files are automatically cleaned up after the load.
+* - `redshift.batched-inserts-copy-iam-role`
+   - Fully specified ARN of the IAM Role to use for the `COPY FROM` 
+     command. This role must have read access to the S3 bucket.
+
+:::
+
+Use the `batched_inserts_copy_enabled` [catalog session property](/sql/set-session) to
+deactivate the batch inserts using copy for a specific query, and
+potentially re-activate it again afterward.
+
+Additionally, define further required [S3 configuration such as IAM key, role,
+or region](/object-storage/file-system-s3).
@@ -74,6 +74,10 @@
             <groupId>io.trino</groupId>
             <artifactId>trino-filesystem-s3</artifactId>
         </dependency>
+        <dependency>
+            <groupId>io.trino</groupId>
+            <artifactId>trino-hive</artifactId>
+        </dependency>
 
         <dependency>
             <groupId>io.trino</groupId>
@@ -110,6 +114,17 @@
             <artifactId>parquet-column</artifactId>
         </dependency>
 
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-format-structures</artifactId>
+            <exclusions>
+                <exclusion>
+                    <groupId>javax.annotation</groupId>
+                    <artifactId>javax.annotation-api</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
         <dependency>
             <groupId>org.jdbi</groupId>
             <artifactId>jdbi3-core</artifactId>
 
@@ -0,0 +1,225 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.plugin.redshift;
+
+import com.google.common.collect.ImmutableList;
+import io.airlift.log.Logger;
+import io.airlift.slice.Slice;
+import io.trino.filesystem.Location;
+import io.trino.filesystem.TrinoFileSystem;
+import io.trino.filesystem.TrinoFileSystemFactory;
+import io.trino.parquet.writer.ParquetSchemaConverter;
+import io.trino.parquet.writer.ParquetWriterOptions;
+import io.trino.plugin.hive.parquet.ParquetFileWriter;
+import io.trino.spi.Page;
+import io.trino.spi.connector.ConnectorPageSink;
+import io.trino.spi.connector.ConnectorPageSinkId;
+import io.trino.spi.connector.ConnectorSession;
+import io.trino.spi.type.ArrayType;
+import io.trino.spi.type.MapType;
+import io.trino.spi.type.RowType;
+import io.trino.spi.type.TimestampWithTimeZoneType;
+import io.trino.spi.type.Type;
+import io.trino.spi.type.TypeOperators;
+import org.apache.parquet.format.CompressionCodec;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Optional;
+import java.util.concurrent.CompletableFuture;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Verify.verify;
+import static com.google.common.collect.ImmutableList.toImmutableList;
+import static io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS;
+import static java.lang.String.format;
+import static java.util.Objects.requireNonNull;
+
+public class RedshiftBatchedInsertsCopyPageSink
+        implements ConnectorPageSink
+{
+    private static final Logger log = Logger.get(RedshiftBatchedInsertsCopyPageSink.class);
+
+    private static final int MAX_ROWS_PER_FILE = 100_000;
+
+    private final TrinoFileSystem fileSystem;
+    private final TypeOperators typeOperators;
+    private final Location copyLocation;
+    private final List<Type> columnTypes;
+    private final String trinoVersion;
+    private final ConnectorPageSinkId pageSinkId;
+    private ParquetFileWriter parquetWriter;
+    private long rowsInCurrentFile;
+    private int filePartNumber;
+
+    public RedshiftBatchedInsertsCopyPageSink(
+            ConnectorSession session,
+            ConnectorPageSinkId pageSinkId,
+            TrinoFileSystemFactory fileSystemFactory,
+            TypeOperators typeOperators,
+            Location copyLocationWithPrefix,
+            List<String> columnNames,
+            List<Type> columnTypes,
+            String trinoVersion)
+    {
+        this.pageSinkId = requireNonNull(pageSinkId, "pageSinkId is null");
+        this.fileSystem = requireNonNull(fileSystemFactory, "fileSystemFactory is null").create(session);
+        this.typeOperators = requireNonNull(typeOperators, "typeOperators is null");
+        this.copyLocation = copyLocationWithPrefix;
+        this.columnTypes = ImmutableList.copyOf(requireNonNull(columnTypes, "columnTypes is null"));
+        this.trinoVersion = trinoVersion;
+        checkArgument(columnNames.size() == columnTypes.size(), "columnNames and columnTypes must have the same size");
+
+        startNewPart();
+    }
+
+    @Override
+    public CompletableFuture<?> appendPage(Page page)
+    {
+        long positionCount = page.getPositionCount();
+        if (positionCount == 0) {
+            return NOT_BLOCKED;
+        }
+
+        parquetWriter.appendRows(page);
+        rowsInCurrentFile += positionCount;
+
+        if (rowsInCurrentFile >= MAX_ROWS_PER_FILE) {
+            flushCurrentFile();
+            startNewPart();
+            rowsInCurrentFile = 0;
+        }
+
+        return NOT_BLOCKED;
+    }
+
+    private void startNewPart()
+    {
+        Location objectKey = copyLocation.appendPath(Long.toHexString(pageSinkId.getId())).appendPath(format("part-%d.parquet", filePartNumber++));
+        this.parquetWriter = createParquetFileWriter(objectKey);
+    }
+
+    @Override
+    public CompletableFuture<Collection<Slice>> finish()
+    {
+        if (rowsInCurrentFile > 0) {
+            flushCurrentFile();
+        }
+
+        return CompletableFuture.completedFuture(List.of());
+    }
+
+    @Override
+    public void abort()
+    {
+        cleanupFiles();
+    }
+
+    public void cleanupFiles()
+    {
+        try {
+            fileSystem.deleteDirectory(copyLocation);
+        }
+        catch (IOException e) {
+            log.warn("Unable to cleanup location %s: %s", copyLocation, e.getMessage());
+            // We don't want to rethrow here, as the query has already completed successfully
+        }
+    }
+
+    private ParquetFileWriter createParquetFileWriter(Location path)
+    {
+        log.debug("Creating parquet file at location: %s", path.toString());
+        ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().build();
+        CompressionCodec compressionCodec = CompressionCodec.SNAPPY;
+
+        try {
+            Closeable rollbackAction = this::abort;
+
+            // According to Redshift docs, COPY inserts columns in the same order as the columns in the parquet. We
+            // don't need the column names to match. We will instead create arbitrary column name to avoid any of the
+            // parquet restrictions on column names.
+            // See: https://docs.aws.amazon.com/redshift/latest/dg/copy-usage_notes-copy-from-columnar.html
+            List<String> columnNames = new ArrayList<>();
+            for (int i = 0; i < columnTypes.size(); i++) {
+                columnNames.add(String.format("col%d", i));
+            }
+
+            ParquetSchemaConverter converter = new ParquetSchemaConverter(
+                    columnTypes,
+                    columnNames,
+                    false,
+                    false);
+
+            List<Type> parquetTypes = columnTypes.stream()
+                    .map(type -> RedshiftParquetTypes.toParquetType(typeOperators, type))
+                    .collect(toImmutableList());
+
+            // we use identity column mapping; input page already contains only data columns per
+            // DataLagePageSink.getDataPage()
+            int[] identityMapping = new int[columnTypes.size()];
+            for (int i = 0; i < identityMapping.length; ++i) {
+                identityMapping[i] = i;
+            }
+
+            return new ParquetFileWriter(
+                    fileSystem.newOutputFile(path),
+                    rollbackAction,
+                    parquetTypes,
+                    columnNames,
+                    converter.getMessageType(),
+                    converter.getPrimitiveTypes(),
+                    parquetWriterOptions,
+                    identityMapping,
+                    compressionCodec,
+                    trinoVersion,
+                    Optional.empty(),
+                    Optional.empty());
+        }
+        catch (IOException e) {
+            throw new UncheckedIOException(e);
+        }
+    }
+
+    private void flushCurrentFile()
+    {
+        parquetWriter.commit();
+    }
+
+    public static final class RedshiftParquetTypes
+    {
+        public static Type toParquetType(TypeOperators typeOperators, Type type)
+        {
+            if (type instanceof TimestampWithTimeZoneType timestamp) {
+                verify(timestamp.getPrecision() == 3, "Unsupported type: %s", type);
+                return TIMESTAMP_MILLIS;
+            }
+            if (type instanceof ArrayType arrayType) {
+                return new ArrayType(toParquetType(typeOperators, arrayType.getElementType()));
+            }
+            if (type instanceof MapType mapType) {
+                return new MapType(toParquetType(typeOperators, mapType.getKeyType()), toParquetType(typeOperators, mapType.getValueType()), typeOperators);
+            }
+            if (type instanceof RowType rowType) {
+                return RowType.from(rowType.getFields().stream()
+                        .map(field -> RowType.field(field.getName().orElseThrow(), toParquetType(typeOperators, field.getType())))
+                        .collect(toImmutableList()));
+            }
+            return type;
+        }
+    }
+}