From bdcb2c5d0ed9bc77a887eedc31467673696c4d1f Mon Sep 17 00:00:00 2001 From: Jake Radzikowski Date: Mon, 8 May 2023 20:40:35 -0700 Subject: [PATCH 01/31] Change PrimitiveDataFrameColumnComputations to use Generic Math --- .../AssemblyAttributes.cs | 7 + .../DecimalMathComputation.cs | 23 + .../FloatingPointMathComputation.cs | 24 + .../Microsoft.Data.Analysis.csproj | 15 +- .../NumberMathComputation.cs | 318 + .../PrimitiveDataFrameColumnComputations.cs | 5634 +---------------- .../PrimitiveDataFrameColumnComputations.tt | 293 - .../AssemblyAttributes.cs | 7 + .../DataFrameTests.cs | 5 +- .../Microsoft.Data.Analysis.Tests.csproj | 2 + 10 files changed, 401 insertions(+), 5927 deletions(-) create mode 100644 src/Microsoft.Data.Analysis/AssemblyAttributes.cs create mode 100644 src/Microsoft.Data.Analysis/DecimalMathComputation.cs create mode 100644 src/Microsoft.Data.Analysis/FloatingPointMathComputation.cs create mode 100644 src/Microsoft.Data.Analysis/NumberMathComputation.cs delete mode 100644 src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.tt create mode 100644 test/Microsoft.Data.Analysis.Tests/AssemblyAttributes.cs diff --git a/src/Microsoft.Data.Analysis/AssemblyAttributes.cs b/src/Microsoft.Data.Analysis/AssemblyAttributes.cs new file mode 100644 index 0000000000..b9a8a4daa9 --- /dev/null +++ b/src/Microsoft.Data.Analysis/AssemblyAttributes.cs @@ -0,0 +1,7 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.Versioning; + +[assembly: RequiresPreviewFeatures] diff --git a/src/Microsoft.Data.Analysis/DecimalMathComputation.cs b/src/Microsoft.Data.Analysis/DecimalMathComputation.cs new file mode 100644 index 0000000000..92db1d40dc --- /dev/null +++ b/src/Microsoft.Data.Analysis/DecimalMathComputation.cs @@ -0,0 +1,23 @@ + + +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// Generated from PrimitiveColumnComputations.tt. Do not modify directly + +using System; +using System.Collections.Generic; +using System.Runtime.Versioning; + +namespace Microsoft.Data.Analysis +{ + [RequiresPreviewFeatures] + internal class DecimalMathComputation : NumberMathComputation + { + public override void Round(PrimitiveColumnContainer column) + { + Apply(column, decimal.Round); + } + } +} diff --git a/src/Microsoft.Data.Analysis/FloatingPointMathComputation.cs b/src/Microsoft.Data.Analysis/FloatingPointMathComputation.cs new file mode 100644 index 0000000000..e01de94750 --- /dev/null +++ b/src/Microsoft.Data.Analysis/FloatingPointMathComputation.cs @@ -0,0 +1,24 @@ + + +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// Generated from PrimitiveColumnComputations.tt. Do not modify directly + +using System; +using System.Collections.Generic; +using System.Runtime.Versioning; + +namespace Microsoft.Data.Analysis +{ + [RequiresPreviewFeatures] + internal class FloatingPointMathComputation : NumberMathComputation + where T : unmanaged, INumber, IFloatingPoint + { + public override void Round(PrimitiveColumnContainer column) + { + Apply(column, T.Round); + } + } +} diff --git a/src/Microsoft.Data.Analysis/Microsoft.Data.Analysis.csproj b/src/Microsoft.Data.Analysis/Microsoft.Data.Analysis.csproj index acfcd62199..b1c59a28ee 100644 --- a/src/Microsoft.Data.Analysis/Microsoft.Data.Analysis.csproj +++ b/src/Microsoft.Data.Analysis/Microsoft.Data.Analysis.csproj @@ -1,13 +1,16 @@  - netstandard2.0 + net6.0 + True true false This package contains easy-to-use and high-performance libraries for data analysis and transformation. Initial preview of robust and extensible types and algorithms for manipulating structured data that supports aggregations, statistical funtions, sorting, grouping, joins, merges, handling missing values and more. ML.NET ML Machine Learning Data Science DataFrame Preparation DataView Analytics Exploration true + + - 2.0.0 + 11.0.0 3.19.6 2.3.1 3.3.0 diff --git a/src/Microsoft.Data.Analysis/DataFrame.Arrow.cs b/src/Microsoft.Data.Analysis/DataFrame.Arrow.cs index 2938413459..270cfff63b 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.Arrow.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.Arrow.cs @@ -101,10 +101,18 @@ private static void AppendDataFrameColumnFromArrowArray(Field field, IArrowArray AppendDataFrameColumnFromArrowArray(fieldsEnumerator.Current, structArrayEnumerator.Current, ret, field.Name + "_"); } break; - case ArrowTypeId.Decimal: + case ArrowTypeId.Date64: + Date64Array arrowDate64Array = (Date64Array)arrowArray; + dataFrameColumn = new PrimitiveDataFrameColumn(fieldName, arrowDate64Array.Data.Length); + for (int i = 0; i < arrowDate64Array.Data.Length; i++) + { + dataFrameColumn[i] = arrowDate64Array.GetDateTime(i); + } + break; + case ArrowTypeId.Decimal128: + case ArrowTypeId.Decimal256: case ArrowTypeId.Binary: case ArrowTypeId.Date32: - case ArrowTypeId.Date64: case ArrowTypeId.Dictionary: case ArrowTypeId.FixedSizedBinary: case ArrowTypeId.HalfFloat: @@ -114,6 +122,7 @@ private static void AppendDataFrameColumnFromArrowArray(Field field, IArrowArray case ArrowTypeId.Null: case ArrowTypeId.Time32: case ArrowTypeId.Time64: + case ArrowTypeId.Timestamp: default: throw new NotImplementedException($"{fieldType.Name}"); } @@ -145,7 +154,7 @@ public static DataFrame FromArrowRecordBatch(RecordBatch recordBatch) } /// - /// Returns an without copying data + /// Returns an mostly without copying data /// public IEnumerable ToArrowRecordBatches() { diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs index 92996b136b..d65255d5be 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs @@ -374,18 +374,6 @@ internal int MaxRecordBatchLength(long startIndex) return Buffers[arrayIndex].Length - (int)startIndex; } - internal ReadOnlyMemory GetValueBuffer(long startIndex) - { - int arrayIndex = GetArrayContainingRowIndex(startIndex); - return Buffers[arrayIndex].ReadOnlyBuffer; - } - - internal ReadOnlyMemory GetNullBuffer(long startIndex) - { - int arrayIndex = GetArrayContainingRowIndex(startIndex); - return NullBitMapBuffers[arrayIndex].ReadOnlyBuffer; - } - public IReadOnlyList this[long startIndex, int length] { get diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs index c251c802ec..304d542c54 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs @@ -7,6 +7,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using Apache.Arrow; using Apache.Arrow.Types; using Microsoft.ML; @@ -104,6 +105,8 @@ private IArrowType GetArrowType() return UInt64Type.Default; else if (typeof(T) == typeof(ushort)) return UInt16Type.Default; + else if (typeof(T) == typeof(DateTime)) + return Date64Type.Default; else throw new NotImplementedException(nameof(T)); } @@ -127,36 +130,64 @@ protected internal override Apache.Arrow.Array ToArrowArray(long startIndex, int { int arrayIndex = numberOfRows == 0 ? 0 : _columnContainer.GetArrayContainingRowIndex(startIndex); int offset = (int)(startIndex - arrayIndex * ReadOnlyDataFrameBuffer.MaxCapacity); + if (numberOfRows != 0 && numberOfRows > _columnContainer.Buffers[arrayIndex].Length - offset) { throw new ArgumentException(Strings.SpansMultipleBuffers, nameof(numberOfRows)); } - ArrowBuffer valueBuffer = numberOfRows == 0 ? ArrowBuffer.Empty : new ArrowBuffer(_columnContainer.GetValueBuffer(startIndex)); - ArrowBuffer nullBuffer = numberOfRows == 0 ? ArrowBuffer.Empty : new ArrowBuffer(_columnContainer.GetNullBuffer(startIndex)); + int nullCount = GetNullCount(startIndex, numberOfRows); + + //DateTime requires convertion + if (this.DataType == typeof(DateTime)) + { + if (numberOfRows == 0) + return new Date64Array(ArrowBuffer.Empty, ArrowBuffer.Empty, numberOfRows, nullCount, offset); + + ReadOnlyDataFrameBuffer valueBuffer = (numberOfRows == 0) ? null : _columnContainer.Buffers[arrayIndex]; + ReadOnlyDataFrameBuffer nullBuffer = (numberOfRows == 0) ? null : _columnContainer.NullBitMapBuffers[arrayIndex]; + + ReadOnlySpan valueSpan = MemoryMarshal.Cast(valueBuffer.ReadOnlySpan); + Date64Array.Builder builder = new Date64Array.Builder().Reserve(valueBuffer.Length); + + for (int i = 0; i < valueBuffer.Length; i++) + { + if (BitUtility.GetBit(nullBuffer.ReadOnlySpan, i)) + builder.Append(valueSpan[i]); + else + builder.AppendNull(); + } + + return builder.Build(); + } + + //No convertion + ArrowBuffer arrowValueBuffer = numberOfRows == 0 ? ArrowBuffer.Empty : new ArrowBuffer(_columnContainer.Buffers[arrayIndex].ReadOnlyBuffer); + ArrowBuffer arrowNullBuffer = numberOfRows == 0 ? ArrowBuffer.Empty : new ArrowBuffer(_columnContainer.NullBitMapBuffers[arrayIndex].ReadOnlyBuffer); + Type type = this.DataType; if (type == typeof(bool)) - return new BooleanArray(valueBuffer, nullBuffer, numberOfRows, nullCount, offset); + return new BooleanArray(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset); else if (type == typeof(double)) - return new DoubleArray(valueBuffer, nullBuffer, numberOfRows, nullCount, offset); + return new DoubleArray(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset); else if (type == typeof(float)) - return new FloatArray(valueBuffer, nullBuffer, numberOfRows, nullCount, offset); + return new FloatArray(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset); else if (type == typeof(int)) - return new Int32Array(valueBuffer, nullBuffer, numberOfRows, nullCount, offset); + return new Int32Array(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset); else if (type == typeof(long)) - return new Int64Array(valueBuffer, nullBuffer, numberOfRows, nullCount, offset); + return new Int64Array(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset); else if (type == typeof(sbyte)) - return new Int8Array(valueBuffer, nullBuffer, numberOfRows, nullCount, offset); + return new Int8Array(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset); else if (type == typeof(short)) - return new Int16Array(valueBuffer, nullBuffer, numberOfRows, nullCount, offset); + return new Int16Array(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset); else if (type == typeof(uint)) - return new UInt32Array(valueBuffer, nullBuffer, numberOfRows, nullCount, offset); + return new UInt32Array(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset); else if (type == typeof(ulong)) - return new UInt64Array(valueBuffer, nullBuffer, numberOfRows, nullCount, offset); + return new UInt64Array(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset); else if (type == typeof(ushort)) - return new UInt16Array(valueBuffer, nullBuffer, numberOfRows, nullCount, offset); + return new UInt16Array(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset); else if (type == typeof(byte)) - return new UInt8Array(valueBuffer, nullBuffer, numberOfRows, nullCount, offset); + return new UInt8Array(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset); else throw new NotImplementedException(type.ToString()); } diff --git a/test/Microsoft.Data.Analysis.Tests/ArrowIntegrationTests.cs b/test/Microsoft.Data.Analysis.Tests/ArrowIntegrationTests.cs index dacf43a8db..185ab835bb 100644 --- a/test/Microsoft.Data.Analysis.Tests/ArrowIntegrationTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/ArrowIntegrationTests.cs @@ -48,6 +48,7 @@ public void TestArrowIntegration() .Append("ULongColumn", false, new UInt64Array.Builder().AppendRange(Enumerable.Repeat((ulong)1, 10)).Build()) .Append("ByteColumn", false, new Int8Array.Builder().AppendRange(Enumerable.Repeat((sbyte)1, 10)).Build()) .Append("UByteColumn", false, new UInt8Array.Builder().AppendRange(Enumerable.Repeat((byte)1, 10)).Build()) + .Append("Date64Column", false, new Date64Array.Builder().AppendRange(Enumerable.Repeat(DateTime.Now, 10)).Build()) .Build(); DataFrame df = DataFrame.FromArrowRecordBatch(originalBatch); From 4ebddeb1b5ae9fedaba7d1a95c46471fac24801d Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Sun, 21 May 2023 12:19:56 +0300 Subject: [PATCH 20/31] Fix DataFrame Merge issue # Conflicts: # src/Microsoft.Data.Analysis/BooleanDataFrameColumn.cs # src/Microsoft.Data.Analysis/ByteDataFrameColumn.cs # src/Microsoft.Data.Analysis/CharDataFrameColumn.cs # src/Microsoft.Data.Analysis/DateTimeDataFrameColumn.cs # src/Microsoft.Data.Analysis/DecimalDataFrameColumn.cs # src/Microsoft.Data.Analysis/DoubleDataFrameColumn.cs # src/Microsoft.Data.Analysis/Int16DataFrameColumn.cs # src/Microsoft.Data.Analysis/Int32DataFrameColumn.cs # src/Microsoft.Data.Analysis/Int64DataFrameColumn.cs # src/Microsoft.Data.Analysis/SByteDataFrameColumn.cs # src/Microsoft.Data.Analysis/SingleDataFrameColumn.cs # src/Microsoft.Data.Analysis/UInt16DataFrameColumn.cs # src/Microsoft.Data.Analysis/UInt32DataFrameColumn.cs # src/Microsoft.Data.Analysis/UInt64DataFrameColumn.cs # test/Microsoft.Data.Analysis.Tests/DataFrame.IOTests.cs --- .../DataFrameColumnCollection.cs | 17 +++++++ .../PrimitiveDataFrameColumn.cs | 17 +++++-- .../DataFrameTests.cs | 50 +++++++++++++++++++ 3 files changed, 81 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs b/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs index a94cdb7801..52d890b182 100644 --- a/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs +++ b/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs @@ -220,6 +220,23 @@ public PrimitiveDataFrameColumn GetPrimitiveColumn(string name) throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(T)), nameof(T)); } + /// + /// Gets the with the specified . + /// + /// The name of the column + /// . + /// A column named cannot be found, or if the column's type doesn't match. + public PrimitiveDataFrameColumn GetDateTimeColumn(string name) + { + DataFrameColumn column = this[name]; + if (column is PrimitiveDataFrameColumn ret) + { + return ret; + } + + throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(DateTime))); + } + /// /// Gets the with the specified . /// diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs index 304d542c54..0fe7820fe2 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs @@ -220,6 +220,16 @@ protected override IReadOnlyList GetValues(long startIndex, int length) return ret; } + internal virtual PrimitiveDataFrameColumn CreateNewColumn(string name, PrimitiveColumnContainer container) + { + return new PrimitiveDataFrameColumn(name, container); + } + + protected virtual PrimitiveDataFrameColumn CreateNewColumn(string name, long length = 0) + { + return new PrimitiveDataFrameColumn(name, length); + } + internal T? GetTypedValue(long rowIndex) => _columnContainer[rowIndex]; protected override object GetValue(long rowIndex) => GetTypedValue(rowIndex); @@ -411,7 +421,7 @@ private PrimitiveDataFrameColumn Clone(PrimitiveDataFrameColumn boolCol { if (boolColumn.Length > Length) throw new ArgumentException(Strings.MapIndicesExceedsColumnLenth, nameof(boolColumn)); - PrimitiveDataFrameColumn ret = new PrimitiveDataFrameColumn(Name); + PrimitiveDataFrameColumn ret = CreateNewColumn(Name); for (long i = 0; i < boolColumn.Length; i++) { bool? value = boolColumn[i]; @@ -438,7 +448,8 @@ private PrimitiveDataFrameColumn CloneImplementation(PrimitiveDataFrameCol } else throw new NotImplementedException(); - PrimitiveDataFrameColumn ret = new PrimitiveDataFrameColumn(Name, retContainer); + + PrimitiveDataFrameColumn ret = CreateNewColumn(Name, retContainer); return ret; } @@ -447,7 +458,7 @@ public PrimitiveDataFrameColumn Clone(PrimitiveDataFrameColumn mapIndic if (mapIndices is null) { PrimitiveColumnContainer newColumnContainer = _columnContainer.Clone(); - return new PrimitiveDataFrameColumn(Name, newColumnContainer); + return CreateNewColumn(Name, newColumnContainer); } else { diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs index 866ecfec4f..87cdf4846f 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs @@ -2766,6 +2766,56 @@ public void TestMerge_Issue5778() MatchRowsOnMergedDataFrame(merge, left, right, 1, 1, 0); } + [Fact] + //Issue 6127 + public void TestMerge_CorrectColumnTypes() + { + DataFrame left = MakeDataFrameWithAllMutableColumnTypes(2, false); + DataFrame right = MakeDataFrameWithAllMutableColumnTypes(1); + + DataFrame merge = left.Merge(right, "Int", "Int"); + + Assert.NotNull(merge.Columns.GetBooleanColumn("Bool_left")); + Assert.NotNull(merge.Columns.GetBooleanColumn("Bool_right")); + + Assert.NotNull(merge.Columns.GetDecimalColumn("Decimal_left")); + Assert.NotNull(merge.Columns.GetDecimalColumn("Decimal_right")); + + Assert.NotNull(merge.Columns.GetSingleColumn("Float_left")); + Assert.NotNull(merge.Columns.GetSingleColumn("Float_right")); + + Assert.NotNull(merge.Columns.GetDoubleColumn("Double_left")); + Assert.NotNull(merge.Columns.GetDoubleColumn("Double_right")); + + Assert.NotNull(merge.Columns.GetByteColumn("Byte_left")); + Assert.NotNull(merge.Columns.GetByteColumn("Byte_right")); + + Assert.NotNull(merge.Columns.GetCharColumn("Char_left")); + Assert.NotNull(merge.Columns.GetCharColumn("Char_right")); + + Assert.NotNull(merge.Columns.GetInt16Column("Short_left")); + Assert.NotNull(merge.Columns.GetInt16Column("Short_right")); + + Assert.NotNull(merge.Columns.GetUInt16Column("Ushort_left")); + Assert.NotNull(merge.Columns.GetUInt16Column("Ushort_right")); + + Assert.NotNull(merge.Columns.GetInt32Column("Int_left")); + Assert.NotNull(merge.Columns.GetInt32Column("Int_right")); + + Assert.NotNull(merge.Columns.GetUInt32Column("Uint_left")); + Assert.NotNull(merge.Columns.GetUInt32Column("Uint_right")); + + Assert.NotNull(merge.Columns.GetInt64Column("Long_left")); + Assert.NotNull(merge.Columns.GetInt64Column("Long_right")); + + Assert.NotNull(merge.Columns.GetUInt64Column("Ulong_left")); + Assert.NotNull(merge.Columns.GetUInt64Column("Ulong_right")); + + Assert.NotNull(merge.Columns.GetDateTimeColumn("DateTime_left")); + Assert.NotNull(merge.Columns.GetDateTimeColumn("DateTime_right")); + + } + [Fact] public void TestDescription() { From da2cb99751ee1a8396fb03003585eea0793c6907 Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Thu, 25 May 2023 08:56:59 +0300 Subject: [PATCH 21/31] Clean switch by type in binary operations # Conflicts: # src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.tt --- ...imitiveDataFrameColumn.BinaryOperations.cs | 392 +++++++++--------- 1 file changed, 196 insertions(+), 196 deletions(-) diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.cs index d0df8f9c34..d05af4d699 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.cs @@ -20,33 +20,33 @@ public override DataFrameColumn Add(DataFrameColumn column, bool inPlace = false switch (column) { case PrimitiveDataFrameColumn boolColumn: - return AddImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AddImplementation(boolColumn, inPlace); case PrimitiveDataFrameColumn byteColumn: - return AddImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AddImplementation(byteColumn, inPlace); case PrimitiveDataFrameColumn charColumn: - return AddImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AddImplementation(charColumn, inPlace); case PrimitiveDataFrameColumn decimalColumn: - return AddImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AddImplementation(decimalColumn, inPlace); case PrimitiveDataFrameColumn doubleColumn: - return AddImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AddImplementation(doubleColumn, inPlace); case PrimitiveDataFrameColumn floatColumn: - return AddImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AddImplementation(floatColumn, inPlace); case PrimitiveDataFrameColumn intColumn: - return AddImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AddImplementation(intColumn, inPlace); case PrimitiveDataFrameColumn longColumn: - return AddImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AddImplementation(longColumn, inPlace); case PrimitiveDataFrameColumn sbyteColumn: - return AddImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AddImplementation(sbyteColumn, inPlace); case PrimitiveDataFrameColumn shortColumn: - return AddImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AddImplementation(shortColumn, inPlace); case PrimitiveDataFrameColumn uintColumn: - return AddImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AddImplementation(uintColumn, inPlace); case PrimitiveDataFrameColumn ulongColumn: - return AddImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AddImplementation(ulongColumn, inPlace); case PrimitiveDataFrameColumn ushortColumn: - return AddImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AddImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: - return AddImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AddImplementation(DateTimeColumn, inPlace); default: throw new NotSupportedException(); } @@ -67,33 +67,33 @@ public override DataFrameColumn Subtract(DataFrameColumn column, bool inPlace = switch (column) { case PrimitiveDataFrameColumn boolColumn: - return SubtractImplementation(column as PrimitiveDataFrameColumn, inPlace); + return SubtractImplementation(boolColumn, inPlace); case PrimitiveDataFrameColumn byteColumn: - return SubtractImplementation(column as PrimitiveDataFrameColumn, inPlace); + return SubtractImplementation(byteColumn, inPlace); case PrimitiveDataFrameColumn charColumn: - return SubtractImplementation(column as PrimitiveDataFrameColumn, inPlace); + return SubtractImplementation(charColumn, inPlace); case PrimitiveDataFrameColumn decimalColumn: - return SubtractImplementation(column as PrimitiveDataFrameColumn, inPlace); + return SubtractImplementation(decimalColumn, inPlace); case PrimitiveDataFrameColumn doubleColumn: - return SubtractImplementation(column as PrimitiveDataFrameColumn, inPlace); + return SubtractImplementation(doubleColumn, inPlace); case PrimitiveDataFrameColumn floatColumn: - return SubtractImplementation(column as PrimitiveDataFrameColumn, inPlace); + return SubtractImplementation(floatColumn, inPlace); case PrimitiveDataFrameColumn intColumn: - return SubtractImplementation(column as PrimitiveDataFrameColumn, inPlace); + return SubtractImplementation(intColumn, inPlace); case PrimitiveDataFrameColumn longColumn: - return SubtractImplementation(column as PrimitiveDataFrameColumn, inPlace); + return SubtractImplementation(longColumn, inPlace); case PrimitiveDataFrameColumn sbyteColumn: - return SubtractImplementation(column as PrimitiveDataFrameColumn, inPlace); + return SubtractImplementation(sbyteColumn, inPlace); case PrimitiveDataFrameColumn shortColumn: - return SubtractImplementation(column as PrimitiveDataFrameColumn, inPlace); + return SubtractImplementation(shortColumn, inPlace); case PrimitiveDataFrameColumn uintColumn: - return SubtractImplementation(column as PrimitiveDataFrameColumn, inPlace); + return SubtractImplementation(uintColumn, inPlace); case PrimitiveDataFrameColumn ulongColumn: - return SubtractImplementation(column as PrimitiveDataFrameColumn, inPlace); + return SubtractImplementation(ulongColumn, inPlace); case PrimitiveDataFrameColumn ushortColumn: - return SubtractImplementation(column as PrimitiveDataFrameColumn, inPlace); + return SubtractImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: - return SubtractImplementation(column as PrimitiveDataFrameColumn, inPlace); + return SubtractImplementation(DateTimeColumn, inPlace); default: throw new NotSupportedException(); } @@ -114,33 +114,33 @@ public override DataFrameColumn Multiply(DataFrameColumn column, bool inPlace = switch (column) { case PrimitiveDataFrameColumn boolColumn: - return MultiplyImplementation(column as PrimitiveDataFrameColumn, inPlace); + return MultiplyImplementation(boolColumn, inPlace); case PrimitiveDataFrameColumn byteColumn: - return MultiplyImplementation(column as PrimitiveDataFrameColumn, inPlace); + return MultiplyImplementation(byteColumn, inPlace); case PrimitiveDataFrameColumn charColumn: - return MultiplyImplementation(column as PrimitiveDataFrameColumn, inPlace); + return MultiplyImplementation(charColumn, inPlace); case PrimitiveDataFrameColumn decimalColumn: - return MultiplyImplementation(column as PrimitiveDataFrameColumn, inPlace); + return MultiplyImplementation(decimalColumn, inPlace); case PrimitiveDataFrameColumn doubleColumn: - return MultiplyImplementation(column as PrimitiveDataFrameColumn, inPlace); + return MultiplyImplementation(doubleColumn, inPlace); case PrimitiveDataFrameColumn floatColumn: - return MultiplyImplementation(column as PrimitiveDataFrameColumn, inPlace); + return MultiplyImplementation(floatColumn, inPlace); case PrimitiveDataFrameColumn intColumn: - return MultiplyImplementation(column as PrimitiveDataFrameColumn, inPlace); + return MultiplyImplementation(intColumn, inPlace); case PrimitiveDataFrameColumn longColumn: - return MultiplyImplementation(column as PrimitiveDataFrameColumn, inPlace); + return MultiplyImplementation(longColumn, inPlace); case PrimitiveDataFrameColumn sbyteColumn: - return MultiplyImplementation(column as PrimitiveDataFrameColumn, inPlace); + return MultiplyImplementation(sbyteColumn, inPlace); case PrimitiveDataFrameColumn shortColumn: - return MultiplyImplementation(column as PrimitiveDataFrameColumn, inPlace); + return MultiplyImplementation(shortColumn, inPlace); case PrimitiveDataFrameColumn uintColumn: - return MultiplyImplementation(column as PrimitiveDataFrameColumn, inPlace); + return MultiplyImplementation(uintColumn, inPlace); case PrimitiveDataFrameColumn ulongColumn: - return MultiplyImplementation(column as PrimitiveDataFrameColumn, inPlace); + return MultiplyImplementation(ulongColumn, inPlace); case PrimitiveDataFrameColumn ushortColumn: - return MultiplyImplementation(column as PrimitiveDataFrameColumn, inPlace); + return MultiplyImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: - return MultiplyImplementation(column as PrimitiveDataFrameColumn, inPlace); + return MultiplyImplementation(DateTimeColumn, inPlace); default: throw new NotSupportedException(); } @@ -161,33 +161,33 @@ public override DataFrameColumn Divide(DataFrameColumn column, bool inPlace = fa switch (column) { case PrimitiveDataFrameColumn boolColumn: - return DivideImplementation(column as PrimitiveDataFrameColumn, inPlace); + return DivideImplementation(boolColumn, inPlace); case PrimitiveDataFrameColumn byteColumn: - return DivideImplementation(column as PrimitiveDataFrameColumn, inPlace); + return DivideImplementation(byteColumn, inPlace); case PrimitiveDataFrameColumn charColumn: - return DivideImplementation(column as PrimitiveDataFrameColumn, inPlace); + return DivideImplementation(charColumn, inPlace); case PrimitiveDataFrameColumn decimalColumn: - return DivideImplementation(column as PrimitiveDataFrameColumn, inPlace); + return DivideImplementation(decimalColumn, inPlace); case PrimitiveDataFrameColumn doubleColumn: - return DivideImplementation(column as PrimitiveDataFrameColumn, inPlace); + return DivideImplementation(doubleColumn, inPlace); case PrimitiveDataFrameColumn floatColumn: - return DivideImplementation(column as PrimitiveDataFrameColumn, inPlace); + return DivideImplementation(floatColumn, inPlace); case PrimitiveDataFrameColumn intColumn: - return DivideImplementation(column as PrimitiveDataFrameColumn, inPlace); + return DivideImplementation(intColumn, inPlace); case PrimitiveDataFrameColumn longColumn: - return DivideImplementation(column as PrimitiveDataFrameColumn, inPlace); + return DivideImplementation(longColumn, inPlace); case PrimitiveDataFrameColumn sbyteColumn: - return DivideImplementation(column as PrimitiveDataFrameColumn, inPlace); + return DivideImplementation(sbyteColumn, inPlace); case PrimitiveDataFrameColumn shortColumn: - return DivideImplementation(column as PrimitiveDataFrameColumn, inPlace); + return DivideImplementation(shortColumn, inPlace); case PrimitiveDataFrameColumn uintColumn: - return DivideImplementation(column as PrimitiveDataFrameColumn, inPlace); + return DivideImplementation(uintColumn, inPlace); case PrimitiveDataFrameColumn ulongColumn: - return DivideImplementation(column as PrimitiveDataFrameColumn, inPlace); + return DivideImplementation(ulongColumn, inPlace); case PrimitiveDataFrameColumn ushortColumn: - return DivideImplementation(column as PrimitiveDataFrameColumn, inPlace); + return DivideImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: - return DivideImplementation(column as PrimitiveDataFrameColumn, inPlace); + return DivideImplementation(DateTimeColumn, inPlace); default: throw new NotSupportedException(); } @@ -208,33 +208,33 @@ public override DataFrameColumn Modulo(DataFrameColumn column, bool inPlace = fa switch (column) { case PrimitiveDataFrameColumn boolColumn: - return ModuloImplementation(column as PrimitiveDataFrameColumn, inPlace); + return ModuloImplementation(boolColumn, inPlace); case PrimitiveDataFrameColumn byteColumn: - return ModuloImplementation(column as PrimitiveDataFrameColumn, inPlace); + return ModuloImplementation(byteColumn, inPlace); case PrimitiveDataFrameColumn charColumn: - return ModuloImplementation(column as PrimitiveDataFrameColumn, inPlace); + return ModuloImplementation(charColumn, inPlace); case PrimitiveDataFrameColumn decimalColumn: - return ModuloImplementation(column as PrimitiveDataFrameColumn, inPlace); + return ModuloImplementation(decimalColumn, inPlace); case PrimitiveDataFrameColumn doubleColumn: - return ModuloImplementation(column as PrimitiveDataFrameColumn, inPlace); + return ModuloImplementation(doubleColumn, inPlace); case PrimitiveDataFrameColumn floatColumn: - return ModuloImplementation(column as PrimitiveDataFrameColumn, inPlace); + return ModuloImplementation(floatColumn, inPlace); case PrimitiveDataFrameColumn intColumn: - return ModuloImplementation(column as PrimitiveDataFrameColumn, inPlace); + return ModuloImplementation(intColumn, inPlace); case PrimitiveDataFrameColumn longColumn: - return ModuloImplementation(column as PrimitiveDataFrameColumn, inPlace); + return ModuloImplementation(longColumn, inPlace); case PrimitiveDataFrameColumn sbyteColumn: - return ModuloImplementation(column as PrimitiveDataFrameColumn, inPlace); + return ModuloImplementation(sbyteColumn, inPlace); case PrimitiveDataFrameColumn shortColumn: - return ModuloImplementation(column as PrimitiveDataFrameColumn, inPlace); + return ModuloImplementation(shortColumn, inPlace); case PrimitiveDataFrameColumn uintColumn: - return ModuloImplementation(column as PrimitiveDataFrameColumn, inPlace); + return ModuloImplementation(uintColumn, inPlace); case PrimitiveDataFrameColumn ulongColumn: - return ModuloImplementation(column as PrimitiveDataFrameColumn, inPlace); + return ModuloImplementation(ulongColumn, inPlace); case PrimitiveDataFrameColumn ushortColumn: - return ModuloImplementation(column as PrimitiveDataFrameColumn, inPlace); + return ModuloImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: - return ModuloImplementation(column as PrimitiveDataFrameColumn, inPlace); + return ModuloImplementation(DateTimeColumn, inPlace); default: throw new NotSupportedException(); } @@ -255,33 +255,33 @@ public override DataFrameColumn And(DataFrameColumn column, bool inPlace = false switch (column) { case PrimitiveDataFrameColumn boolColumn: - return AndImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AndImplementation(boolColumn, inPlace); case PrimitiveDataFrameColumn byteColumn: - return AndImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AndImplementation(byteColumn, inPlace); case PrimitiveDataFrameColumn charColumn: - return AndImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AndImplementation(charColumn, inPlace); case PrimitiveDataFrameColumn decimalColumn: - return AndImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AndImplementation(decimalColumn, inPlace); case PrimitiveDataFrameColumn doubleColumn: - return AndImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AndImplementation(doubleColumn, inPlace); case PrimitiveDataFrameColumn floatColumn: - return AndImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AndImplementation(floatColumn, inPlace); case PrimitiveDataFrameColumn intColumn: - return AndImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AndImplementation(intColumn, inPlace); case PrimitiveDataFrameColumn longColumn: - return AndImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AndImplementation(longColumn, inPlace); case PrimitiveDataFrameColumn sbyteColumn: - return AndImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AndImplementation(sbyteColumn, inPlace); case PrimitiveDataFrameColumn shortColumn: - return AndImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AndImplementation(shortColumn, inPlace); case PrimitiveDataFrameColumn uintColumn: - return AndImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AndImplementation(uintColumn, inPlace); case PrimitiveDataFrameColumn ulongColumn: - return AndImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AndImplementation(ulongColumn, inPlace); case PrimitiveDataFrameColumn ushortColumn: - return AndImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AndImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: - return AndImplementation(column as PrimitiveDataFrameColumn, inPlace); + return AndImplementation(DateTimeColumn, inPlace); default: throw new NotSupportedException(); } @@ -297,33 +297,33 @@ public override DataFrameColumn Or(DataFrameColumn column, bool inPlace = false) switch (column) { case PrimitiveDataFrameColumn boolColumn: - return OrImplementation(column as PrimitiveDataFrameColumn, inPlace); + return OrImplementation(boolColumn, inPlace); case PrimitiveDataFrameColumn byteColumn: - return OrImplementation(column as PrimitiveDataFrameColumn, inPlace); + return OrImplementation(byteColumn, inPlace); case PrimitiveDataFrameColumn charColumn: - return OrImplementation(column as PrimitiveDataFrameColumn, inPlace); + return OrImplementation(charColumn, inPlace); case PrimitiveDataFrameColumn decimalColumn: - return OrImplementation(column as PrimitiveDataFrameColumn, inPlace); + return OrImplementation(decimalColumn, inPlace); case PrimitiveDataFrameColumn doubleColumn: - return OrImplementation(column as PrimitiveDataFrameColumn, inPlace); + return OrImplementation(doubleColumn, inPlace); case PrimitiveDataFrameColumn floatColumn: - return OrImplementation(column as PrimitiveDataFrameColumn, inPlace); + return OrImplementation(floatColumn, inPlace); case PrimitiveDataFrameColumn intColumn: - return OrImplementation(column as PrimitiveDataFrameColumn, inPlace); + return OrImplementation(intColumn, inPlace); case PrimitiveDataFrameColumn longColumn: - return OrImplementation(column as PrimitiveDataFrameColumn, inPlace); + return OrImplementation(longColumn, inPlace); case PrimitiveDataFrameColumn sbyteColumn: - return OrImplementation(column as PrimitiveDataFrameColumn, inPlace); + return OrImplementation(sbyteColumn, inPlace); case PrimitiveDataFrameColumn shortColumn: - return OrImplementation(column as PrimitiveDataFrameColumn, inPlace); + return OrImplementation(shortColumn, inPlace); case PrimitiveDataFrameColumn uintColumn: - return OrImplementation(column as PrimitiveDataFrameColumn, inPlace); + return OrImplementation(uintColumn, inPlace); case PrimitiveDataFrameColumn ulongColumn: - return OrImplementation(column as PrimitiveDataFrameColumn, inPlace); + return OrImplementation(ulongColumn, inPlace); case PrimitiveDataFrameColumn ushortColumn: - return OrImplementation(column as PrimitiveDataFrameColumn, inPlace); + return OrImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: - return OrImplementation(column as PrimitiveDataFrameColumn, inPlace); + return OrImplementation(DateTimeColumn, inPlace); default: throw new NotSupportedException(); } @@ -339,33 +339,33 @@ public override DataFrameColumn Xor(DataFrameColumn column, bool inPlace = false switch (column) { case PrimitiveDataFrameColumn boolColumn: - return XorImplementation(column as PrimitiveDataFrameColumn, inPlace); + return XorImplementation(boolColumn, inPlace); case PrimitiveDataFrameColumn byteColumn: - return XorImplementation(column as PrimitiveDataFrameColumn, inPlace); + return XorImplementation(byteColumn, inPlace); case PrimitiveDataFrameColumn charColumn: - return XorImplementation(column as PrimitiveDataFrameColumn, inPlace); + return XorImplementation(charColumn, inPlace); case PrimitiveDataFrameColumn decimalColumn: - return XorImplementation(column as PrimitiveDataFrameColumn, inPlace); + return XorImplementation(decimalColumn, inPlace); case PrimitiveDataFrameColumn doubleColumn: - return XorImplementation(column as PrimitiveDataFrameColumn, inPlace); + return XorImplementation(doubleColumn, inPlace); case PrimitiveDataFrameColumn floatColumn: - return XorImplementation(column as PrimitiveDataFrameColumn, inPlace); + return XorImplementation(floatColumn, inPlace); case PrimitiveDataFrameColumn intColumn: - return XorImplementation(column as PrimitiveDataFrameColumn, inPlace); + return XorImplementation(intColumn, inPlace); case PrimitiveDataFrameColumn longColumn: - return XorImplementation(column as PrimitiveDataFrameColumn, inPlace); + return XorImplementation(longColumn, inPlace); case PrimitiveDataFrameColumn sbyteColumn: - return XorImplementation(column as PrimitiveDataFrameColumn, inPlace); + return XorImplementation(sbyteColumn, inPlace); case PrimitiveDataFrameColumn shortColumn: - return XorImplementation(column as PrimitiveDataFrameColumn, inPlace); + return XorImplementation(shortColumn, inPlace); case PrimitiveDataFrameColumn uintColumn: - return XorImplementation(column as PrimitiveDataFrameColumn, inPlace); + return XorImplementation(uintColumn, inPlace); case PrimitiveDataFrameColumn ulongColumn: - return XorImplementation(column as PrimitiveDataFrameColumn, inPlace); + return XorImplementation(ulongColumn, inPlace); case PrimitiveDataFrameColumn ushortColumn: - return XorImplementation(column as PrimitiveDataFrameColumn, inPlace); + return XorImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: - return XorImplementation(column as PrimitiveDataFrameColumn, inPlace); + return XorImplementation(DateTimeColumn, inPlace); default: throw new NotSupportedException(); } @@ -391,33 +391,33 @@ public override PrimitiveDataFrameColumn ElementwiseEquals(DataFrameColumn switch (column) { case PrimitiveDataFrameColumn boolColumn: - return ElementwiseEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseEqualsImplementation(boolColumn); case PrimitiveDataFrameColumn byteColumn: - return ElementwiseEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseEqualsImplementation(byteColumn); case PrimitiveDataFrameColumn charColumn: - return ElementwiseEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseEqualsImplementation(charColumn); case PrimitiveDataFrameColumn decimalColumn: - return ElementwiseEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseEqualsImplementation(decimalColumn); case PrimitiveDataFrameColumn doubleColumn: - return ElementwiseEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseEqualsImplementation(doubleColumn); case PrimitiveDataFrameColumn floatColumn: - return ElementwiseEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseEqualsImplementation(floatColumn); case PrimitiveDataFrameColumn intColumn: - return ElementwiseEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseEqualsImplementation(intColumn); case PrimitiveDataFrameColumn longColumn: - return ElementwiseEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseEqualsImplementation(longColumn); case PrimitiveDataFrameColumn sbyteColumn: - return ElementwiseEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseEqualsImplementation(sbyteColumn); case PrimitiveDataFrameColumn shortColumn: - return ElementwiseEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseEqualsImplementation(shortColumn); case PrimitiveDataFrameColumn uintColumn: - return ElementwiseEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseEqualsImplementation(uintColumn); case PrimitiveDataFrameColumn ulongColumn: - return ElementwiseEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseEqualsImplementation(ulongColumn); case PrimitiveDataFrameColumn ushortColumn: - return ElementwiseEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseEqualsImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: - return ElementwiseEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseEqualsImplementation(DateTimeColumn); default: throw new NotSupportedException(); } @@ -438,33 +438,33 @@ public override PrimitiveDataFrameColumn ElementwiseNotEquals(DataFrameCol switch (column) { case PrimitiveDataFrameColumn boolColumn: - return ElementwiseNotEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseNotEqualsImplementation(boolColumn); case PrimitiveDataFrameColumn byteColumn: - return ElementwiseNotEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseNotEqualsImplementation(byteColumn); case PrimitiveDataFrameColumn charColumn: - return ElementwiseNotEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseNotEqualsImplementation(charColumn); case PrimitiveDataFrameColumn decimalColumn: - return ElementwiseNotEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseNotEqualsImplementation(decimalColumn); case PrimitiveDataFrameColumn doubleColumn: - return ElementwiseNotEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseNotEqualsImplementation(doubleColumn); case PrimitiveDataFrameColumn floatColumn: - return ElementwiseNotEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseNotEqualsImplementation(floatColumn); case PrimitiveDataFrameColumn intColumn: - return ElementwiseNotEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseNotEqualsImplementation(intColumn); case PrimitiveDataFrameColumn longColumn: - return ElementwiseNotEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseNotEqualsImplementation(longColumn); case PrimitiveDataFrameColumn sbyteColumn: - return ElementwiseNotEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseNotEqualsImplementation(sbyteColumn); case PrimitiveDataFrameColumn shortColumn: - return ElementwiseNotEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseNotEqualsImplementation(shortColumn); case PrimitiveDataFrameColumn uintColumn: - return ElementwiseNotEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseNotEqualsImplementation(uintColumn); case PrimitiveDataFrameColumn ulongColumn: - return ElementwiseNotEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseNotEqualsImplementation(ulongColumn); case PrimitiveDataFrameColumn ushortColumn: - return ElementwiseNotEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseNotEqualsImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: - return ElementwiseNotEqualsImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseNotEqualsImplementation(DateTimeColumn); default: throw new NotSupportedException(); } @@ -485,33 +485,33 @@ public override PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqual(Dat switch (column) { case PrimitiveDataFrameColumn boolColumn: - return ElementwiseGreaterThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanOrEqualImplementation(boolColumn); case PrimitiveDataFrameColumn byteColumn: - return ElementwiseGreaterThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanOrEqualImplementation(byteColumn); case PrimitiveDataFrameColumn charColumn: - return ElementwiseGreaterThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanOrEqualImplementation(charColumn); case PrimitiveDataFrameColumn decimalColumn: - return ElementwiseGreaterThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanOrEqualImplementation(decimalColumn); case PrimitiveDataFrameColumn doubleColumn: - return ElementwiseGreaterThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanOrEqualImplementation(doubleColumn); case PrimitiveDataFrameColumn floatColumn: - return ElementwiseGreaterThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanOrEqualImplementation(floatColumn); case PrimitiveDataFrameColumn intColumn: - return ElementwiseGreaterThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanOrEqualImplementation(intColumn); case PrimitiveDataFrameColumn longColumn: - return ElementwiseGreaterThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanOrEqualImplementation(longColumn); case PrimitiveDataFrameColumn sbyteColumn: - return ElementwiseGreaterThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanOrEqualImplementation(sbyteColumn); case PrimitiveDataFrameColumn shortColumn: - return ElementwiseGreaterThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanOrEqualImplementation(shortColumn); case PrimitiveDataFrameColumn uintColumn: - return ElementwiseGreaterThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanOrEqualImplementation(uintColumn); case PrimitiveDataFrameColumn ulongColumn: - return ElementwiseGreaterThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanOrEqualImplementation(ulongColumn); case PrimitiveDataFrameColumn ushortColumn: - return ElementwiseGreaterThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanOrEqualImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: - return ElementwiseGreaterThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanOrEqualImplementation(DateTimeColumn); default: throw new NotSupportedException(); } @@ -532,33 +532,33 @@ public override PrimitiveDataFrameColumn ElementwiseLessThanOrEqual(DataFr switch (column) { case PrimitiveDataFrameColumn boolColumn: - return ElementwiseLessThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanOrEqualImplementation(boolColumn); case PrimitiveDataFrameColumn byteColumn: - return ElementwiseLessThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanOrEqualImplementation(byteColumn); case PrimitiveDataFrameColumn charColumn: - return ElementwiseLessThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanOrEqualImplementation(charColumn); case PrimitiveDataFrameColumn decimalColumn: - return ElementwiseLessThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanOrEqualImplementation(decimalColumn); case PrimitiveDataFrameColumn doubleColumn: - return ElementwiseLessThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanOrEqualImplementation(doubleColumn); case PrimitiveDataFrameColumn floatColumn: - return ElementwiseLessThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanOrEqualImplementation(floatColumn); case PrimitiveDataFrameColumn intColumn: - return ElementwiseLessThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanOrEqualImplementation(intColumn); case PrimitiveDataFrameColumn longColumn: - return ElementwiseLessThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanOrEqualImplementation(longColumn); case PrimitiveDataFrameColumn sbyteColumn: - return ElementwiseLessThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanOrEqualImplementation(sbyteColumn); case PrimitiveDataFrameColumn shortColumn: - return ElementwiseLessThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanOrEqualImplementation(shortColumn); case PrimitiveDataFrameColumn uintColumn: - return ElementwiseLessThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanOrEqualImplementation(uintColumn); case PrimitiveDataFrameColumn ulongColumn: - return ElementwiseLessThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanOrEqualImplementation(ulongColumn); case PrimitiveDataFrameColumn ushortColumn: - return ElementwiseLessThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanOrEqualImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: - return ElementwiseLessThanOrEqualImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanOrEqualImplementation(DateTimeColumn); default: throw new NotSupportedException(); } @@ -579,33 +579,33 @@ public override PrimitiveDataFrameColumn ElementwiseGreaterThan(DataFrameC switch (column) { case PrimitiveDataFrameColumn boolColumn: - return ElementwiseGreaterThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanImplementation(boolColumn); case PrimitiveDataFrameColumn byteColumn: - return ElementwiseGreaterThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanImplementation(byteColumn); case PrimitiveDataFrameColumn charColumn: - return ElementwiseGreaterThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanImplementation(charColumn); case PrimitiveDataFrameColumn decimalColumn: - return ElementwiseGreaterThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanImplementation(decimalColumn); case PrimitiveDataFrameColumn doubleColumn: - return ElementwiseGreaterThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanImplementation(doubleColumn); case PrimitiveDataFrameColumn floatColumn: - return ElementwiseGreaterThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanImplementation(floatColumn); case PrimitiveDataFrameColumn intColumn: - return ElementwiseGreaterThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanImplementation(intColumn); case PrimitiveDataFrameColumn longColumn: - return ElementwiseGreaterThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanImplementation(longColumn); case PrimitiveDataFrameColumn sbyteColumn: - return ElementwiseGreaterThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanImplementation(sbyteColumn); case PrimitiveDataFrameColumn shortColumn: - return ElementwiseGreaterThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanImplementation(shortColumn); case PrimitiveDataFrameColumn uintColumn: - return ElementwiseGreaterThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanImplementation(uintColumn); case PrimitiveDataFrameColumn ulongColumn: - return ElementwiseGreaterThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanImplementation(ulongColumn); case PrimitiveDataFrameColumn ushortColumn: - return ElementwiseGreaterThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: - return ElementwiseGreaterThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseGreaterThanImplementation(DateTimeColumn); default: throw new NotSupportedException(); } @@ -626,33 +626,33 @@ public override PrimitiveDataFrameColumn ElementwiseLessThan(DataFrameColu switch (column) { case PrimitiveDataFrameColumn boolColumn: - return ElementwiseLessThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanImplementation(boolColumn); case PrimitiveDataFrameColumn byteColumn: - return ElementwiseLessThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanImplementation(byteColumn); case PrimitiveDataFrameColumn charColumn: - return ElementwiseLessThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanImplementation(charColumn); case PrimitiveDataFrameColumn decimalColumn: - return ElementwiseLessThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanImplementation(decimalColumn); case PrimitiveDataFrameColumn doubleColumn: - return ElementwiseLessThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanImplementation(doubleColumn); case PrimitiveDataFrameColumn floatColumn: - return ElementwiseLessThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanImplementation(floatColumn); case PrimitiveDataFrameColumn intColumn: - return ElementwiseLessThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanImplementation(intColumn); case PrimitiveDataFrameColumn longColumn: - return ElementwiseLessThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanImplementation(longColumn); case PrimitiveDataFrameColumn sbyteColumn: - return ElementwiseLessThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanImplementation(sbyteColumn); case PrimitiveDataFrameColumn shortColumn: - return ElementwiseLessThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanImplementation(shortColumn); case PrimitiveDataFrameColumn uintColumn: - return ElementwiseLessThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanImplementation(uintColumn); case PrimitiveDataFrameColumn ulongColumn: - return ElementwiseLessThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanImplementation(ulongColumn); case PrimitiveDataFrameColumn ushortColumn: - return ElementwiseLessThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: - return ElementwiseLessThanImplementation(column as PrimitiveDataFrameColumn); + return ElementwiseLessThanImplementation(DateTimeColumn); default: throw new NotSupportedException(); } From 1de06be0cf8fcb3057fe8a3cbcf20b16452e0752 Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Thu, 25 May 2023 09:32:42 +0300 Subject: [PATCH 22/31] Simplify getting mutable buffers # Conflicts: # src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs # src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.tt --- .../PrimitiveDataFrameColumnArithmetic.cs | 68 +++++-------------- 1 file changed, 17 insertions(+), 51 deletions(-) diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs index a8916dd3be..3da50547d6 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs @@ -148,9 +148,7 @@ public void And(PrimitiveColumnContainer left, PrimitiveColumnContainer.GetMutableBuffer(buffer); - left.Buffers[b] = mutableBuffer; + var mutableBuffer = left.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; var otherSpan = right.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) @@ -163,9 +161,7 @@ public void And(PrimitiveColumnContainer column, bool scalar) { for (int b = 0; b < column.Buffers.Count; b++) { - var buffer = column.Buffers[b]; - var mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - column.Buffers[b] = mutableBuffer; + var mutableBuffer = column.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; for (int i = 0; i < span.Length; i++) { @@ -177,9 +173,7 @@ public void And(bool scalar, PrimitiveColumnContainer column) { for (int b = 0; b < column.Buffers.Count; b++) { - var buffer = column.Buffers[b]; - var mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - column.Buffers[b] = mutableBuffer; + var mutableBuffer = column.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; for (int i = 0; i < span.Length; i++) { @@ -191,9 +185,7 @@ public void Or(PrimitiveColumnContainer left, PrimitiveColumnContainer.GetMutableBuffer(buffer); - left.Buffers[b] = mutableBuffer; + var mutableBuffer = left.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; var otherSpan = right.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) @@ -206,9 +198,7 @@ public void Or(PrimitiveColumnContainer column, bool scalar) { for (int b = 0; b < column.Buffers.Count; b++) { - var buffer = column.Buffers[b]; - var mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - column.Buffers[b] = mutableBuffer; + var mutableBuffer = column.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; for (int i = 0; i < span.Length; i++) { @@ -220,9 +210,7 @@ public void Or(bool scalar, PrimitiveColumnContainer column) { for (int b = 0; b < column.Buffers.Count; b++) { - var buffer = column.Buffers[b]; - var mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - column.Buffers[b] = mutableBuffer; + var mutableBuffer = column.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; for (int i = 0; i < span.Length; i++) { @@ -234,9 +222,7 @@ public void Xor(PrimitiveColumnContainer left, PrimitiveColumnContainer.GetMutableBuffer(buffer); - left.Buffers[b] = mutableBuffer; + var mutableBuffer = left.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; var otherSpan = right.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) @@ -249,9 +235,7 @@ public void Xor(PrimitiveColumnContainer column, bool scalar) { for (int b = 0; b < column.Buffers.Count; b++) { - var buffer = column.Buffers[b]; - var mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - column.Buffers[b] = mutableBuffer; + var mutableBuffer = column.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; for (int i = 0; i < span.Length; i++) { @@ -263,9 +247,7 @@ public void Xor(bool scalar, PrimitiveColumnContainer column) { for (int b = 0; b < column.Buffers.Count; b++) { - var buffer = column.Buffers[b]; - var mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - column.Buffers[b] = mutableBuffer; + var mutableBuffer = column.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; for (int i = 0; i < span.Length; i++) { @@ -285,9 +267,7 @@ public void ElementwiseEquals(PrimitiveColumnContainer left, PrimitiveColu { for (int b = 0; b < left.Buffers.Count; b++) { - var buffer = left.Buffers[b]; - var mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - left.Buffers[b] = mutableBuffer; + var mutableBuffer = left.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; var otherSpan = right.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) @@ -300,9 +280,7 @@ public void ElementwiseEquals(PrimitiveColumnContainer column, bool scalar { for (int b = 0; b < column.Buffers.Count; b++) { - var buffer = column.Buffers[b]; - var mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - column.Buffers[b] = mutableBuffer; + var mutableBuffer = column.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; for (int i = 0; i < span.Length; i++) { @@ -314,9 +292,7 @@ public void ElementwiseNotEquals(PrimitiveColumnContainer left, PrimitiveC { for (int b = 0; b < left.Buffers.Count; b++) { - var buffer = left.Buffers[b]; - var mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - left.Buffers[b] = mutableBuffer; + var mutableBuffer = left.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; var otherSpan = right.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) @@ -329,9 +305,7 @@ public void ElementwiseNotEquals(PrimitiveColumnContainer column, bool sca { for (int b = 0; b < column.Buffers.Count; b++) { - var buffer = column.Buffers[b]; - var mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - column.Buffers[b] = mutableBuffer; + var mutableBuffer = column.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; for (int i = 0; i < span.Length; i++) { @@ -483,9 +457,7 @@ public void ElementwiseEquals(PrimitiveColumnContainer left, Primitive { for (int b = 0; b < left.Buffers.Count; b++) { - var buffer = left.Buffers[b]; - var mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - left.Buffers[b] = mutableBuffer; + var mutableBuffer = left.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; var otherSpan = right.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) @@ -498,9 +470,7 @@ public void ElementwiseEquals(PrimitiveColumnContainer column, DateTim { for (int b = 0; b < column.Buffers.Count; b++) { - var buffer = column.Buffers[b]; - var mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - column.Buffers[b] = mutableBuffer; + var mutableBuffer = column.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; for (int i = 0; i < span.Length; i++) { @@ -512,9 +482,7 @@ public void ElementwiseNotEquals(PrimitiveColumnContainer left, Primit { for (int b = 0; b < left.Buffers.Count; b++) { - var buffer = left.Buffers[b]; - var mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - left.Buffers[b] = mutableBuffer; + var mutableBuffer = left.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; var otherSpan = right.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) @@ -527,9 +495,7 @@ public void ElementwiseNotEquals(PrimitiveColumnContainer column, Date { for (int b = 0; b < column.Buffers.Count; b++) { - var buffer = column.Buffers[b]; - var mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - column.Buffers[b] = mutableBuffer; + var mutableBuffer = column.Buffers.GetOrCreateMutable(b); var span = mutableBuffer.Span; for (int i = 0; i < span.Length; i++) { From 6be198d5032c41d934c5a60d6720e200a346d5b9 Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Thu, 25 May 2023 09:37:44 +0300 Subject: [PATCH 23/31] Don't convert buffer to mutable if it not required # Conflicts: # src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs # src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.tt --- .../PrimitiveDataFrameColumnArithmetic.cs | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs index 3da50547d6..9a11b44cdf 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs @@ -267,8 +267,7 @@ public void ElementwiseEquals(PrimitiveColumnContainer left, PrimitiveColu { for (int b = 0; b < left.Buffers.Count; b++) { - var mutableBuffer = left.Buffers.GetOrCreateMutable(b); - var span = mutableBuffer.Span; + var span = left.Buffers[b].ReadOnlySpan; var otherSpan = right.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) { @@ -280,8 +279,7 @@ public void ElementwiseEquals(PrimitiveColumnContainer column, bool scalar { for (int b = 0; b < column.Buffers.Count; b++) { - var mutableBuffer = column.Buffers.GetOrCreateMutable(b); - var span = mutableBuffer.Span; + var span = column.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) { ret[i] = (span[i] == scalar); @@ -292,8 +290,7 @@ public void ElementwiseNotEquals(PrimitiveColumnContainer left, PrimitiveC { for (int b = 0; b < left.Buffers.Count; b++) { - var mutableBuffer = left.Buffers.GetOrCreateMutable(b); - var span = mutableBuffer.Span; + var span = left.Buffers[b].ReadOnlySpan; var otherSpan = right.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) { @@ -305,8 +302,7 @@ public void ElementwiseNotEquals(PrimitiveColumnContainer column, bool sca { for (int b = 0; b < column.Buffers.Count; b++) { - var mutableBuffer = column.Buffers.GetOrCreateMutable(b); - var span = mutableBuffer.Span; + var span = column.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) { ret[i] = (span[i] != scalar); @@ -457,8 +453,7 @@ public void ElementwiseEquals(PrimitiveColumnContainer left, Primitive { for (int b = 0; b < left.Buffers.Count; b++) { - var mutableBuffer = left.Buffers.GetOrCreateMutable(b); - var span = mutableBuffer.Span; + var span = left.Buffers[b].ReadOnlySpan; var otherSpan = right.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) { @@ -470,8 +465,7 @@ public void ElementwiseEquals(PrimitiveColumnContainer column, DateTim { for (int b = 0; b < column.Buffers.Count; b++) { - var mutableBuffer = column.Buffers.GetOrCreateMutable(b); - var span = mutableBuffer.Span; + var span = column.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) { ret[i] = (span[i] == scalar); @@ -482,8 +476,7 @@ public void ElementwiseNotEquals(PrimitiveColumnContainer left, Primit { for (int b = 0; b < left.Buffers.Count; b++) { - var mutableBuffer = left.Buffers.GetOrCreateMutable(b); - var span = mutableBuffer.Span; + var span = left.Buffers[b].ReadOnlySpan; var otherSpan = right.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) { @@ -495,8 +488,7 @@ public void ElementwiseNotEquals(PrimitiveColumnContainer column, Date { for (int b = 0; b < column.Buffers.Count; b++) { - var mutableBuffer = column.Buffers.GetOrCreateMutable(b); - var span = mutableBuffer.Span; + var span = column.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) { ret[i] = (span[i] != scalar); From d3a0aae9034d5ae8821b8f23e1ae24a916141ec2 Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Thu, 6 Jul 2023 12:49:56 +0300 Subject: [PATCH 24/31] Provide ability to filter by null value # Conflicts: # src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.tt # src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.cs # src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.tt --- .../DataFrameColumn.BinaryOperations.cs | 9 +++ ...imitiveDataFrameColumn.BinaryOperations.cs | 74 +++++++++++++++++++ .../PrimitiveDataFrameColumn.cs | 24 ++++++ .../StringDataFrameColumn.BinaryOperations.cs | 26 +++++++ .../DataFrameTests.cs | 68 +++++++++++++++++ 5 files changed, 201 insertions(+) diff --git a/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs b/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs index 8ecd052486..4a3bac6988 100644 --- a/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs +++ b/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs @@ -316,5 +316,14 @@ public virtual PrimitiveDataFrameColumn ElementwiseLessThan(T value) throw new NotImplementedException(); } + public virtual PrimitiveDataFrameColumn ElementwiseIsNull() + { + throw new NotImplementedException(); + } + + public virtual PrimitiveDataFrameColumn ElementwiseIsNotNull() + { + throw new NotImplementedException(); + } } } diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.cs index d05af4d699..fc75bda8fe 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.BinaryOperations.cs @@ -47,10 +47,12 @@ public override DataFrameColumn Add(DataFrameColumn column, bool inPlace = false return AddImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: return AddImplementation(DateTimeColumn, inPlace); + default: throw new NotSupportedException(); } } + /// public override DataFrameColumn Add(U value, bool inPlace = false) { @@ -61,6 +63,7 @@ public override DataFrameColumn Add(U value, bool inPlace = false) } return AddImplementation(value, inPlace); } + /// public override DataFrameColumn Subtract(DataFrameColumn column, bool inPlace = false) { @@ -94,10 +97,12 @@ public override DataFrameColumn Subtract(DataFrameColumn column, bool inPlace = return SubtractImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: return SubtractImplementation(DateTimeColumn, inPlace); + default: throw new NotSupportedException(); } } + /// public override DataFrameColumn Subtract(U value, bool inPlace = false) { @@ -108,6 +113,7 @@ public override DataFrameColumn Subtract(U value, bool inPlace = false) } return SubtractImplementation(value, inPlace); } + /// public override DataFrameColumn Multiply(DataFrameColumn column, bool inPlace = false) { @@ -141,10 +147,12 @@ public override DataFrameColumn Multiply(DataFrameColumn column, bool inPlace = return MultiplyImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: return MultiplyImplementation(DateTimeColumn, inPlace); + default: throw new NotSupportedException(); } } + /// public override DataFrameColumn Multiply(U value, bool inPlace = false) { @@ -155,6 +163,7 @@ public override DataFrameColumn Multiply(U value, bool inPlace = false) } return MultiplyImplementation(value, inPlace); } + /// public override DataFrameColumn Divide(DataFrameColumn column, bool inPlace = false) { @@ -188,10 +197,12 @@ public override DataFrameColumn Divide(DataFrameColumn column, bool inPlace = fa return DivideImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: return DivideImplementation(DateTimeColumn, inPlace); + default: throw new NotSupportedException(); } } + /// public override DataFrameColumn Divide(U value, bool inPlace = false) { @@ -202,6 +213,7 @@ public override DataFrameColumn Divide(U value, bool inPlace = false) } return DivideImplementation(value, inPlace); } + /// public override DataFrameColumn Modulo(DataFrameColumn column, bool inPlace = false) { @@ -235,10 +247,12 @@ public override DataFrameColumn Modulo(DataFrameColumn column, bool inPlace = fa return ModuloImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: return ModuloImplementation(DateTimeColumn, inPlace); + default: throw new NotSupportedException(); } } + /// public override DataFrameColumn Modulo(U value, bool inPlace = false) { @@ -249,6 +263,7 @@ public override DataFrameColumn Modulo(U value, bool inPlace = false) } return ModuloImplementation(value, inPlace); } + /// public override DataFrameColumn And(DataFrameColumn column, bool inPlace = false) { @@ -282,15 +297,18 @@ public override DataFrameColumn And(DataFrameColumn column, bool inPlace = false return AndImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: return AndImplementation(DateTimeColumn, inPlace); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn And(bool value, bool inPlace = false) { return AndImplementation(value, inPlace); } + /// public override DataFrameColumn Or(DataFrameColumn column, bool inPlace = false) { @@ -324,15 +342,18 @@ public override DataFrameColumn Or(DataFrameColumn column, bool inPlace = false) return OrImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: return OrImplementation(DateTimeColumn, inPlace); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn Or(bool value, bool inPlace = false) { return OrImplementation(value, inPlace); } + /// public override DataFrameColumn Xor(DataFrameColumn column, bool inPlace = false) { @@ -366,15 +387,18 @@ public override DataFrameColumn Xor(DataFrameColumn column, bool inPlace = false return XorImplementation(ushortColumn, inPlace); case PrimitiveDataFrameColumn DateTimeColumn: return XorImplementation(DateTimeColumn, inPlace); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn Xor(bool value, bool inPlace = false) { return XorImplementation(value, inPlace); } + /// public override DataFrameColumn LeftShift(int value, bool inPlace = false) { @@ -418,10 +442,14 @@ public override PrimitiveDataFrameColumn ElementwiseEquals(DataFrameColumn return ElementwiseEqualsImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: return ElementwiseEqualsImplementation(DateTimeColumn); + case null: + return ElementwiseIsNull(); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn ElementwiseEquals(U value) { @@ -432,6 +460,7 @@ public override PrimitiveDataFrameColumn ElementwiseEquals(U value) } return ElementwiseEqualsImplementation(value); } + /// public override PrimitiveDataFrameColumn ElementwiseNotEquals(DataFrameColumn column) { @@ -465,10 +494,14 @@ public override PrimitiveDataFrameColumn ElementwiseNotEquals(DataFrameCol return ElementwiseNotEqualsImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: return ElementwiseNotEqualsImplementation(DateTimeColumn); + case null: + return ElementwiseIsNotNull(); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn ElementwiseNotEquals(U value) { @@ -479,6 +512,7 @@ public override PrimitiveDataFrameColumn ElementwiseNotEquals(U value) } return ElementwiseNotEqualsImplementation(value); } + /// public override PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqual(DataFrameColumn column) { @@ -512,10 +546,12 @@ public override PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqual(Dat return ElementwiseGreaterThanOrEqualImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: return ElementwiseGreaterThanOrEqualImplementation(DateTimeColumn); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqual(U value) { @@ -526,6 +562,7 @@ public override PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqual( } return ElementwiseGreaterThanOrEqualImplementation(value); } + /// public override PrimitiveDataFrameColumn ElementwiseLessThanOrEqual(DataFrameColumn column) { @@ -559,10 +596,12 @@ public override PrimitiveDataFrameColumn ElementwiseLessThanOrEqual(DataFr return ElementwiseLessThanOrEqualImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: return ElementwiseLessThanOrEqualImplementation(DateTimeColumn); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn ElementwiseLessThanOrEqual(U value) { @@ -573,6 +612,7 @@ public override PrimitiveDataFrameColumn ElementwiseLessThanOrEqual(U v } return ElementwiseLessThanOrEqualImplementation(value); } + /// public override PrimitiveDataFrameColumn ElementwiseGreaterThan(DataFrameColumn column) { @@ -606,10 +646,12 @@ public override PrimitiveDataFrameColumn ElementwiseGreaterThan(DataFrameC return ElementwiseGreaterThanImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: return ElementwiseGreaterThanImplementation(DateTimeColumn); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn ElementwiseGreaterThan(U value) { @@ -620,6 +662,7 @@ public override PrimitiveDataFrameColumn ElementwiseGreaterThan(U value } return ElementwiseGreaterThanImplementation(value); } + /// public override PrimitiveDataFrameColumn ElementwiseLessThan(DataFrameColumn column) { @@ -653,10 +696,12 @@ public override PrimitiveDataFrameColumn ElementwiseLessThan(DataFrameColu return ElementwiseLessThanImplementation(ushortColumn); case PrimitiveDataFrameColumn DateTimeColumn: return ElementwiseLessThanImplementation(DateTimeColumn); + default: throw new NotSupportedException(); } } + /// public override PrimitiveDataFrameColumn ElementwiseLessThan(U value) { @@ -668,6 +713,7 @@ public override PrimitiveDataFrameColumn ElementwiseLessThan(U value) return ElementwiseLessThanImplementation(value); } + internal DataFrameColumn AddImplementation(PrimitiveDataFrameColumn column, bool inPlace) where U : unmanaged { @@ -750,6 +796,7 @@ internal DataFrameColumn AddImplementation(PrimitiveDataFrameColumn column throw new NotSupportedException(); } } + internal DataFrameColumn AddImplementation(U value, bool inPlace) { switch (typeof(T)) @@ -1035,6 +1082,7 @@ internal DataFrameColumn SubtractImplementation(PrimitiveDataFrameColumn c throw new NotSupportedException(); } } + internal DataFrameColumn SubtractImplementation(U value, bool inPlace) { switch (typeof(T)) @@ -1139,6 +1187,7 @@ internal DataFrameColumn SubtractImplementation(U value, bool inPlace) throw new NotSupportedException(); } } + internal DataFrameColumn MultiplyImplementation(PrimitiveDataFrameColumn column, bool inPlace) where U : unmanaged { @@ -1221,6 +1270,7 @@ internal DataFrameColumn MultiplyImplementation(PrimitiveDataFrameColumn c throw new NotSupportedException(); } } + internal DataFrameColumn MultiplyImplementation(U value, bool inPlace) { switch (typeof(T)) @@ -1298,6 +1348,7 @@ internal DataFrameColumn MultiplyImplementation(U value, bool inPlace) throw new NotSupportedException(); } } + internal DataFrameColumn DivideImplementation(PrimitiveDataFrameColumn column, bool inPlace) where U : unmanaged { @@ -1380,6 +1431,7 @@ internal DataFrameColumn DivideImplementation(PrimitiveDataFrameColumn col throw new NotSupportedException(); } } + internal DataFrameColumn DivideImplementation(U value, bool inPlace) { switch (typeof(T)) @@ -1484,6 +1536,7 @@ internal DataFrameColumn DivideImplementation(U value, bool inPlace) throw new NotSupportedException(); } } + internal DataFrameColumn ModuloImplementation(PrimitiveDataFrameColumn column, bool inPlace) where U : unmanaged { @@ -1566,6 +1619,7 @@ internal DataFrameColumn ModuloImplementation(PrimitiveDataFrameColumn col throw new NotSupportedException(); } } + internal DataFrameColumn ModuloImplementation(U value, bool inPlace) { switch (typeof(T)) @@ -1643,6 +1697,7 @@ internal DataFrameColumn ModuloImplementation(U value, bool inPlace) throw new NotSupportedException(); } } + internal DataFrameColumn AndImplementation(PrimitiveDataFrameColumn column, bool inPlace) where U : unmanaged { @@ -1678,6 +1733,7 @@ internal DataFrameColumn AndImplementation(PrimitiveDataFrameColumn column throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn AndImplementation(U value, bool inPlace) { switch (typeof(T)) @@ -1708,6 +1764,7 @@ internal PrimitiveDataFrameColumn AndImplementation(U value, bool inPla throw new NotSupportedException(); } } + internal DataFrameColumn OrImplementation(PrimitiveDataFrameColumn column, bool inPlace) where U : unmanaged { @@ -1743,6 +1800,7 @@ internal DataFrameColumn OrImplementation(PrimitiveDataFrameColumn column, throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn OrImplementation(U value, bool inPlace) { switch (typeof(T)) @@ -1773,6 +1831,7 @@ internal PrimitiveDataFrameColumn OrImplementation(U value, bool inPlac throw new NotSupportedException(); } } + internal DataFrameColumn XorImplementation(PrimitiveDataFrameColumn column, bool inPlace) where U : unmanaged { @@ -1808,6 +1867,7 @@ internal DataFrameColumn XorImplementation(PrimitiveDataFrameColumn column throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn XorImplementation(U value, bool inPlace) { switch (typeof(T)) @@ -1838,6 +1898,7 @@ internal PrimitiveDataFrameColumn XorImplementation(U value, bool inPla throw new NotSupportedException(); } } + internal DataFrameColumn LeftShiftImplementation(int value, bool inPlace) { switch (typeof(T)) @@ -1901,6 +1962,7 @@ internal DataFrameColumn LeftShiftImplementation(int value, bool inPlace) throw new NotSupportedException(); } } + internal DataFrameColumn RightShiftImplementation(int value, bool inPlace) { switch (typeof(T)) @@ -1964,6 +2026,7 @@ internal DataFrameColumn RightShiftImplementation(int value, bool inPlace) throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseEqualsImplementation(PrimitiveDataFrameColumn column) where U : unmanaged { @@ -2053,6 +2116,7 @@ internal PrimitiveDataFrameColumn ElementwiseEqualsImplementation(Primi throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseEqualsImplementation(U value) { switch (typeof(T)) @@ -2137,6 +2201,7 @@ internal PrimitiveDataFrameColumn ElementwiseEqualsImplementation(U val throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseNotEqualsImplementation(PrimitiveDataFrameColumn column) where U : unmanaged { @@ -2226,6 +2291,7 @@ internal PrimitiveDataFrameColumn ElementwiseNotEqualsImplementation(Pr throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseNotEqualsImplementation(U value) { switch (typeof(T)) @@ -2310,6 +2376,7 @@ internal PrimitiveDataFrameColumn ElementwiseNotEqualsImplementation(U throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqualImplementation(PrimitiveDataFrameColumn column) where U : unmanaged { @@ -2387,6 +2454,7 @@ internal PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqualImplementat throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqualImplementation(U value) { switch (typeof(T)) @@ -2459,6 +2527,7 @@ internal PrimitiveDataFrameColumn ElementwiseGreaterThanOrEqualImplementat throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseLessThanOrEqualImplementation(PrimitiveDataFrameColumn column) where U : unmanaged { @@ -2536,6 +2605,7 @@ internal PrimitiveDataFrameColumn ElementwiseLessThanOrEqualImplementation throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseLessThanOrEqualImplementation(U value) { switch (typeof(T)) @@ -2608,6 +2678,7 @@ internal PrimitiveDataFrameColumn ElementwiseLessThanOrEqualImplementation throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseGreaterThanImplementation(PrimitiveDataFrameColumn column) where U : unmanaged { @@ -2685,6 +2756,7 @@ internal PrimitiveDataFrameColumn ElementwiseGreaterThanImplementation( throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseGreaterThanImplementation(U value) { switch (typeof(T)) @@ -2757,6 +2829,7 @@ internal PrimitiveDataFrameColumn ElementwiseGreaterThanImplementation( throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseLessThanImplementation(PrimitiveDataFrameColumn column) where U : unmanaged { @@ -2834,6 +2907,7 @@ internal PrimitiveDataFrameColumn ElementwiseLessThanImplementation(Pri throw new NotSupportedException(); } } + internal PrimitiveDataFrameColumn ElementwiseLessThanImplementation(U value) { switch (typeof(T)) diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs index 0fe7820fe2..dbf73a5536 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs @@ -821,5 +821,29 @@ public override Dictionary> GetGroupedOccurrences(DataFr { return GetGroupedOccurrences(other, out otherColumnNullIndices); } + + public override PrimitiveDataFrameColumn ElementwiseIsNull() + { + var ret = new BooleanDataFrameColumn(Name, Length); + + for (long i = 0; i < Length; i++) + { + ret[i] = !_columnContainer[i].HasValue; + } + + return ret; + } + + public override PrimitiveDataFrameColumn ElementwiseIsNotNull() + { + var ret = new BooleanDataFrameColumn(Name, Length); + + for (long i = 0; i < Length; i++) + { + ret[i] = _columnContainer[i].HasValue; + } + + return ret; + } } } diff --git a/src/Microsoft.Data.Analysis/StringDataFrameColumn.BinaryOperations.cs b/src/Microsoft.Data.Analysis/StringDataFrameColumn.BinaryOperations.cs index 0bfbd3b6bc..c6ffe4c4cf 100644 --- a/src/Microsoft.Data.Analysis/StringDataFrameColumn.BinaryOperations.cs +++ b/src/Microsoft.Data.Analysis/StringDataFrameColumn.BinaryOperations.cs @@ -91,6 +91,9 @@ internal static PrimitiveDataFrameColumn ElementwiseEqualsImplementation(D /// public override PrimitiveDataFrameColumn ElementwiseEquals(DataFrameColumn column) { + if (column == null) + return ElementwiseIsNull(); + return ElementwiseEqualsImplementation(this, column); } @@ -128,6 +131,26 @@ internal static PrimitiveDataFrameColumn ElementwiseNotEqualsImplementatio return ret; } + public override PrimitiveDataFrameColumn ElementwiseIsNotNull() + { + PrimitiveDataFrameColumn ret = new PrimitiveDataFrameColumn(Name, Length); + for (long i = 0; i < Length; i++) + { + ret[i] = this[i] != null; + } + return ret; + } + + public override PrimitiveDataFrameColumn ElementwiseIsNull() + { + PrimitiveDataFrameColumn ret = new PrimitiveDataFrameColumn(Name, Length); + for (long i = 0; i < Length; i++) + { + ret[i] = this[i] == null; + } + return ret; + } + public PrimitiveDataFrameColumn ElementwiseNotEquals(string value) { PrimitiveDataFrameColumn ret = new PrimitiveDataFrameColumn(Name, Length); @@ -141,6 +164,9 @@ public PrimitiveDataFrameColumn ElementwiseNotEquals(string value) /// public override PrimitiveDataFrameColumn ElementwiseNotEquals(DataFrameColumn column) { + if (column == null) + return ElementwiseIsNotNull(); + return ElementwiseNotEqualsImplementation(this, column); } diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs index ff7856e984..c42864a162 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs @@ -3490,5 +3490,73 @@ public void TestMeanMedian() Assert.Equal(4, df["Decimal"].Median()); } + + [Fact] + public void Test_PrimitiveColumnNotEqualsNull() + { + var col = new DoubleDataFrameColumn("col", new double?[] { 1.23, null, 2, 3 }); + var dfTest = new DataFrame(col); + + var filteredNullDf = dfTest.Filter(dfTest["col"].ElementwiseNotEquals(null)); + + Assert.True(filteredNullDf.Columns.IndexOf("col") >= 0); + Assert.Equal(3, filteredNullDf.Columns["col"].Length); + + Assert.Equal(1.23, filteredNullDf.Columns["col"][0]); + Assert.Equal(2.0, filteredNullDf.Columns["col"][1]); + Assert.Equal(3.0, filteredNullDf.Columns["col"][2]); + } + + [Fact] + public void Test_PrimitiveColumnEqualsNull() + { + var index = new Int32DataFrameColumn("index", new int[] { 1, 2, 3, 4, 5 }); + var col = new DoubleDataFrameColumn("col", new double?[] { 1.23, null, 2, 3, null }); ; + var dfTest = new DataFrame(index, col); + + var filteredNullDf = dfTest.Filter(dfTest["col"].ElementwiseEquals(null)); + + Assert.True(filteredNullDf.Columns.IndexOf("col") >= 0); + Assert.True(filteredNullDf.Columns.IndexOf("index") >= 0); + + Assert.Equal(2, filteredNullDf.Rows.Count); + + Assert.Equal(2, filteredNullDf.Columns["index"][0]); + Assert.Equal(5, filteredNullDf.Columns["index"][1]); + } + + [Fact] + public void Test_StringColumnNotEqualsNull() + { + var col = new StringDataFrameColumn("col", new[] { "One", null, "Two", "Three" }); + var dfTest = new DataFrame(col); + + var filteredNullDf = dfTest.Filter(dfTest["col"].ElementwiseNotEquals(null)); + + Assert.True(filteredNullDf.Columns.IndexOf("col") >= 0); + Assert.Equal(3, filteredNullDf.Columns["col"].Length); + + Assert.Equal("One", filteredNullDf.Columns["col"][0]); + Assert.Equal("Two", filteredNullDf.Columns["col"][1]); + Assert.Equal("Three", filteredNullDf.Columns["col"][2]); + } + + [Fact] + public void Test_StringColumnEqualsNull() + { + var index = new Int32DataFrameColumn("index", new int[] { 1, 2, 3, 4, 5 }); + var col = new StringDataFrameColumn("col", new[] { "One", null, "Three", "Four", null }); ; + var dfTest = new DataFrame(index, col); + + var filteredNullDf = dfTest.Filter(dfTest["col"].ElementwiseEquals(null)); + + Assert.True(filteredNullDf.Columns.IndexOf("col") >= 0); + Assert.True(filteredNullDf.Columns.IndexOf("index") >= 0); + + Assert.Equal(2, filteredNullDf.Rows.Count); + + Assert.Equal(2, filteredNullDf.Columns["index"][0]); + Assert.Equal(5, filteredNullDf.Columns["index"][1]); + } } } From ab7e69810e9a7cc96d7286f858cdcfec8c713904 Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Thu, 6 Jul 2023 12:51:04 +0300 Subject: [PATCH 25/31] Add comments --- .../DataFrameColumn.BinaryOperations.cs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs b/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs index 4a3bac6988..1c340575db 100644 --- a/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs +++ b/src/Microsoft.Data.Analysis/DataFrameColumn.BinaryOperations.cs @@ -316,11 +316,17 @@ public virtual PrimitiveDataFrameColumn ElementwiseLessThan(T value) throw new NotImplementedException(); } + /// + /// Performs an element-wise equal to Null on each value in the column + /// public virtual PrimitiveDataFrameColumn ElementwiseIsNull() { throw new NotImplementedException(); } + /// + /// Performs an element-wise not equal to Null on each value in the column + /// public virtual PrimitiveDataFrameColumn ElementwiseIsNotNull() { throw new NotImplementedException(); From b0daf74a7e5e1e5afd925deeccf48eb26d24a5ff Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Thu, 6 Jul 2023 13:03:23 +0300 Subject: [PATCH 26/31] Fix merge issues (broken build) --- .../PrimitiveDataFrameColumn.cs | 4 ++-- test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs index dbf73a5536..5aed1c57f7 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs @@ -824,7 +824,7 @@ public override Dictionary> GetGroupedOccurrences(DataFr public override PrimitiveDataFrameColumn ElementwiseIsNull() { - var ret = new BooleanDataFrameColumn(Name, Length); + var ret = new PrimitiveDataFrameColumn(Name, Length); for (long i = 0; i < Length; i++) { @@ -836,7 +836,7 @@ public override PrimitiveDataFrameColumn ElementwiseIsNull() public override PrimitiveDataFrameColumn ElementwiseIsNotNull() { - var ret = new BooleanDataFrameColumn(Name, Length); + var ret = new PrimitiveDataFrameColumn(Name, Length); for (long i = 0; i < Length; i++) { diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs index c42864a162..cbc6cc9e80 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. @@ -3494,7 +3494,7 @@ public void TestMeanMedian() [Fact] public void Test_PrimitiveColumnNotEqualsNull() { - var col = new DoubleDataFrameColumn("col", new double?[] { 1.23, null, 2, 3 }); + var col = new PrimitiveDataFrameColumn("col", new double?[] { 1.23, null, 2, 3 }); var dfTest = new DataFrame(col); var filteredNullDf = dfTest.Filter(dfTest["col"].ElementwiseNotEquals(null)); @@ -3510,8 +3510,8 @@ public void Test_PrimitiveColumnNotEqualsNull() [Fact] public void Test_PrimitiveColumnEqualsNull() { - var index = new Int32DataFrameColumn("index", new int[] { 1, 2, 3, 4, 5 }); - var col = new DoubleDataFrameColumn("col", new double?[] { 1.23, null, 2, 3, null }); ; + var index = new PrimitiveDataFrameColumn("index", new int[] { 1, 2, 3, 4, 5 }); + var col = new PrimitiveDataFrameColumn("col", new double?[] { 1.23, null, 2, 3, null }); ; var dfTest = new DataFrame(index, col); var filteredNullDf = dfTest.Filter(dfTest["col"].ElementwiseEquals(null)); @@ -3544,7 +3544,7 @@ public void Test_StringColumnNotEqualsNull() [Fact] public void Test_StringColumnEqualsNull() { - var index = new Int32DataFrameColumn("index", new int[] { 1, 2, 3, 4, 5 }); + var index = new PrimitiveDataFrameColumn("index", new int[] { 1, 2, 3, 4, 5 }); var col = new StringDataFrameColumn("col", new[] { "One", null, "Three", "Four", null }); ; var dfTest = new DataFrame(index, col); From 663db1f1b0724c14fcbd5d552e3c68f966013c07 Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Thu, 6 Jul 2023 14:20:15 +0300 Subject: [PATCH 27/31] Step 1 # Conflicts: # src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.Computations.tt # src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.cs # src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.tt --- .../DateTimeComputation.cs | 61 +++++++++++++------ .../PrimitiveDataFrameColumn.Computations.cs | 16 ++--- .../PrimitiveDataFrameColumnComputations.cs | 32 +++++----- .../DataFrameTests.cs | 52 ++++++++++++++++ 4 files changed, 119 insertions(+), 42 deletions(-) diff --git a/src/Microsoft.Data.Analysis/DateTimeComputation.cs b/src/Microsoft.Data.Analysis/DateTimeComputation.cs index ba14e39292..3e50ec0c82 100644 --- a/src/Microsoft.Data.Analysis/DateTimeComputation.cs +++ b/src/Microsoft.Data.Analysis/DateTimeComputation.cs @@ -4,6 +4,8 @@ using System; using System.Collections.Generic; +using System.Diagnostics; +using System.Reflection; using System.Text; namespace Microsoft.Data.Analysis @@ -189,26 +191,37 @@ public void CumulativeSum(PrimitiveColumnContainer column, IEnumerable throw new NotSupportedException(); } - public void Max(PrimitiveColumnContainer column, out DateTime ret) + public void Max(PrimitiveColumnContainer column, out DateTime? ret) { - ret = column.Buffers[0].ReadOnlySpan[0]; + var maxDate = DateTime.MinValue; + bool hasMaxValue = false; + for (int b = 0; b < column.Buffers.Count; b++) { - var buffer = column.Buffers[b]; - var readOnlySpan = buffer.ReadOnlySpan; + var readOnlySpan = column.Buffers[b].ReadOnlySpan; + var bitmapSpan = column.NullBitMapBuffers[b].ReadOnlySpan; for (int i = 0; i < readOnlySpan.Length; i++) { + int byteIndex = (int)((uint)i / 8); + + //Check if bit is not set (value is null) - skip + if (((bitmapSpan[byteIndex] >> (i & 7)) & 1) == 0) + continue; + var val = readOnlySpan[i]; - if (val > ret) + if (val > maxDate) { - ret = val; + maxDate = val; + hasMaxValue = true; } } } + + ret = hasMaxValue ? maxDate : null; } - public void Max(PrimitiveColumnContainer column, IEnumerable rows, out DateTime ret) + public void Max(PrimitiveColumnContainer column, IEnumerable rows, out DateTime? ret) { ret = default; var readOnlySpan = column.Buffers[0].ReadOnlySpan; @@ -237,26 +250,38 @@ public void Max(PrimitiveColumnContainer column, IEnumerable row } } - public void Min(PrimitiveColumnContainer column, out DateTime ret) + public void Min(PrimitiveColumnContainer column, out DateTime? ret) { - ret = column.Buffers[0].ReadOnlySpan[0]; + var minDate = DateTime.MaxValue; + bool hasMinValue = false; + for (int b = 0; b < column.Buffers.Count; b++) { - var buffer = column.Buffers[b]; - var readOnlySpan = buffer.ReadOnlySpan; + var readOnlySpan = column.Buffers[b].ReadOnlySpan; + var bitmapSpan = column.NullBitMapBuffers[b].ReadOnlySpan; + for (int i = 0; i < readOnlySpan.Length; i++) { + int byteIndex = (int)((uint)i / 8); + + //Check if bit is not set (value is null) - skip + if (((bitmapSpan[byteIndex] >> (i & 7)) & 1) == 0) + continue; + var val = readOnlySpan[i]; - if (val < ret) + if (val < minDate) { - ret = val; + minDate = val; + hasMinValue = true; } } } + + ret = hasMinValue ? minDate : null; } - public void Min(PrimitiveColumnContainer column, IEnumerable rows, out DateTime ret) + public void Min(PrimitiveColumnContainer column, IEnumerable rows, out DateTime? ret) { ret = default; var readOnlySpan = column.Buffers[0].ReadOnlySpan; @@ -285,22 +310,22 @@ public void Min(PrimitiveColumnContainer column, IEnumerable row } } - public void Product(PrimitiveColumnContainer column, out DateTime ret) + public void Product(PrimitiveColumnContainer column, out DateTime? ret) { throw new NotSupportedException(); } - public void Product(PrimitiveColumnContainer column, IEnumerable rows, out DateTime ret) + public void Product(PrimitiveColumnContainer column, IEnumerable rows, out DateTime? ret) { throw new NotSupportedException(); } - public void Sum(PrimitiveColumnContainer column, out DateTime ret) + public void Sum(PrimitiveColumnContainer column, out DateTime? ret) { throw new NotSupportedException(); } - public void Sum(PrimitiveColumnContainer column, IEnumerable rows, out DateTime ret) + public void Sum(PrimitiveColumnContainer column, IEnumerable rows, out DateTime? ret) { throw new NotSupportedException(); } diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.Computations.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.Computations.cs index 58e6a1e7c5..5501236c03 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.Computations.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.Computations.cs @@ -93,49 +93,49 @@ public override DataFrameColumn CumulativeSum(IEnumerable rowIndices, bool /// public override object Max() { - PrimitiveColumnComputation.Instance.Max(_columnContainer, out T ret); + PrimitiveColumnComputation.Instance.Max(_columnContainer, out T? ret); return ret; } /// public override object Max(IEnumerable rowIndices) { - PrimitiveColumnComputation.Instance.Max(_columnContainer, rowIndices, out T ret); + PrimitiveColumnComputation.Instance.Max(_columnContainer, rowIndices, out T? ret); return ret; } /// public override object Min() { - PrimitiveColumnComputation.Instance.Min(_columnContainer, out T ret); + PrimitiveColumnComputation.Instance.Min(_columnContainer, out T? ret); return ret; } /// public override object Min(IEnumerable rowIndices) { - PrimitiveColumnComputation.Instance.Min(_columnContainer, rowIndices, out T ret); + PrimitiveColumnComputation.Instance.Min(_columnContainer, rowIndices, out T? ret); return ret; } /// public override object Product() { - PrimitiveColumnComputation.Instance.Product(_columnContainer, out T ret); + PrimitiveColumnComputation.Instance.Product(_columnContainer, out T? ret); return ret; } /// public override object Product(IEnumerable rowIndices) { - PrimitiveColumnComputation.Instance.Product(_columnContainer, rowIndices, out T ret); + PrimitiveColumnComputation.Instance.Product(_columnContainer, rowIndices, out T? ret); return ret; } /// public override object Sum() { - PrimitiveColumnComputation.Instance.Sum(_columnContainer, out T ret); + PrimitiveColumnComputation.Instance.Sum(_columnContainer, out T? ret); return ret; } /// public override object Sum(IEnumerable rowIndices) { - PrimitiveColumnComputation.Instance.Sum(_columnContainer, rowIndices, out T ret); + PrimitiveColumnComputation.Instance.Sum(_columnContainer, rowIndices, out T? ret); return ret; } /// diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.cs index 5410afa7ad..4105cb0c9c 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.cs @@ -26,14 +26,14 @@ internal interface IPrimitiveColumnComputation void CumulativeProduct(PrimitiveColumnContainer column, IEnumerable rows); void CumulativeSum(PrimitiveColumnContainer column); void CumulativeSum(PrimitiveColumnContainer column, IEnumerable rows); - void Max(PrimitiveColumnContainer column, out T ret); - void Max(PrimitiveColumnContainer column, IEnumerable rows, out T ret); - void Min(PrimitiveColumnContainer column, out T ret); - void Min(PrimitiveColumnContainer column, IEnumerable rows, out T ret); - void Product(PrimitiveColumnContainer column, out T ret); - void Product(PrimitiveColumnContainer column, IEnumerable rows, out T ret); - void Sum(PrimitiveColumnContainer column, out T ret); - void Sum(PrimitiveColumnContainer column, IEnumerable rows, out T ret); + void Max(PrimitiveColumnContainer column, out T? ret); + void Max(PrimitiveColumnContainer column, IEnumerable rows, out T? ret); + void Min(PrimitiveColumnContainer column, out T? ret); + void Min(PrimitiveColumnContainer column, IEnumerable rows, out T? ret); + void Product(PrimitiveColumnContainer column, out T? ret); + void Product(PrimitiveColumnContainer column, IEnumerable rows, out T? ret); + void Sum(PrimitiveColumnContainer column, out T? ret); + void Sum(PrimitiveColumnContainer column, IEnumerable rows, out T? ret); void Round(PrimitiveColumnContainer column); PrimitiveColumnContainer CreateTruncating(PrimitiveColumnContainer column) where U : unmanaged, INumber; } @@ -194,42 +194,42 @@ public void CumulativeSum(PrimitiveColumnContainer column, IEnumerable column, out bool ret) + public void Max(PrimitiveColumnContainer column, out bool? ret) { throw new NotSupportedException(); } - public void Max(PrimitiveColumnContainer column, IEnumerable rows, out bool ret) + public void Max(PrimitiveColumnContainer column, IEnumerable rows, out bool? ret) { throw new NotSupportedException(); } - public void Min(PrimitiveColumnContainer column, out bool ret) + public void Min(PrimitiveColumnContainer column, out bool? ret) { throw new NotSupportedException(); } - public void Min(PrimitiveColumnContainer column, IEnumerable rows, out bool ret) + public void Min(PrimitiveColumnContainer column, IEnumerable rows, out bool? ret) { throw new NotSupportedException(); } - public void Product(PrimitiveColumnContainer column, out bool ret) + public void Product(PrimitiveColumnContainer column, out bool? ret) { throw new NotSupportedException(); } - public void Product(PrimitiveColumnContainer column, IEnumerable rows, out bool ret) + public void Product(PrimitiveColumnContainer column, IEnumerable rows, out bool? ret) { throw new NotSupportedException(); } - public void Sum(PrimitiveColumnContainer column, out bool ret) + public void Sum(PrimitiveColumnContainer column, out bool? ret) { throw new NotSupportedException(); } - public void Sum(PrimitiveColumnContainer column, IEnumerable rows, out bool ret) + public void Sum(PrimitiveColumnContainer column, IEnumerable rows, out bool? ret) { throw new NotSupportedException(); } diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs index cbc6cc9e80..301c94680d 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs @@ -1173,6 +1173,58 @@ public void TestComputationsIncludingDateTime() } } + [Fact] + public void TestIntComputations_MaxMin_WithNulls() + { + var column = new Int32DataFrameColumn("Int", new int?[] + { + null, + 2, + 1, + 4, + 3, + null + }); + + Assert.Equal(1, column.Min()); + Assert.Equal(4, column.Max()); + } + + [Fact] + public void TestDateTimeComputations_MaxMin_OnEmptyColumn() + { + var column = new DateTimeDataFrameColumn("DateTime"); + + Assert.Null(column.Min()); + Assert.Null(column.Max()); + } + + [Fact] + public void TestIntComputations_MaxMin_OnEmptyColumn() + { + var column = new Int32DataFrameColumn("Int"); + + Assert.Null(column.Min()); + Assert.Null(column.Max()); + } + + [Fact] + public void TestDateTimeComputations_MaxMin_WithNulls() + { + var dateTimeColumn = new DateTimeDataFrameColumn("DateTime", new DateTime?[] + { + null, + new DateTime(2022, 1, 1), + new DateTime(2020, 1, 1), + new DateTime(2023, 1, 1), + new DateTime(2021, 1, 1), + null + }); + + Assert.Equal(new DateTime(2020, 1, 1), dateTimeColumn.Min()); + Assert.Equal(new DateTime(2023, 1, 1), dateTimeColumn.Max()); + } + [Theory] [InlineData(5, 10)] [InlineData(-15, 10)] From 33b6432ee61bc0d5051181b0da3b1156a4499cc5 Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Thu, 6 Jul 2023 14:22:07 +0300 Subject: [PATCH 28/31] Cherry pick Step 2 commit from 6733 --- .../NumberMathComputation.cs | 17 ++++++++--------- .../DataFrameTests.cs | 8 ++++---- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/Microsoft.Data.Analysis/NumberMathComputation.cs b/src/Microsoft.Data.Analysis/NumberMathComputation.cs index 029acafb31..d0e7d4102e 100644 --- a/src/Microsoft.Data.Analysis/NumberMathComputation.cs +++ b/src/Microsoft.Data.Analysis/NumberMathComputation.cs @@ -9,7 +9,6 @@ using System; using System.Collections.Generic; using System.Runtime.Versioning; -using Microsoft.ML.Data; namespace Microsoft.Data.Analysis { @@ -75,43 +74,43 @@ public void CumulativeSum(PrimitiveColumnContainer column, IEnumerable CumulativeApply(column, Add, rows); } - public void Max(PrimitiveColumnContainer column, out T ret) + public void Max(PrimitiveColumnContainer column, out T? ret) { ret = CalculateReduction(column, T.Max, column[0].Value); } - public void Max(PrimitiveColumnContainer column, IEnumerable rows, out T ret) + public void Max(PrimitiveColumnContainer column, IEnumerable rows, out T? ret) { ret = CalculateReduction(column, T.Max, rows); } - public void Min(PrimitiveColumnContainer column, out T ret) + public void Min(PrimitiveColumnContainer column, out T? ret) { ret = CalculateReduction(column, T.Min, column[0].Value); } - public void Min(PrimitiveColumnContainer column, IEnumerable rows, out T ret) + public void Min(PrimitiveColumnContainer column, IEnumerable rows, out T? ret) { ret = CalculateReduction(column, T.Min, rows); } - public void Product(PrimitiveColumnContainer column, out T ret) + public void Product(PrimitiveColumnContainer column, out T? ret) { ret = CalculateReduction(column, Multiply, T.One); } - public void Product(PrimitiveColumnContainer column, IEnumerable rows, out T ret) + public void Product(PrimitiveColumnContainer column, IEnumerable rows, out T? ret) { ret = CalculateReduction(column, Multiply, rows); } - public void Sum(PrimitiveColumnContainer column, out T ret) + public void Sum(PrimitiveColumnContainer column, out T? ret) { ret = CalculateReduction(column, Add, T.Zero); } - public void Sum(PrimitiveColumnContainer column, IEnumerable rows, out T ret) + public void Sum(PrimitiveColumnContainer column, IEnumerable rows, out T? ret) { ret = CalculateReduction(column, Add, rows); } diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs index 301c94680d..db26d1cf82 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs @@ -1176,7 +1176,7 @@ public void TestComputationsIncludingDateTime() [Fact] public void TestIntComputations_MaxMin_WithNulls() { - var column = new Int32DataFrameColumn("Int", new int?[] + var column = new PrimitiveDataFrameColumn("Int", new int?[] { null, 2, @@ -1193,7 +1193,7 @@ public void TestIntComputations_MaxMin_WithNulls() [Fact] public void TestDateTimeComputations_MaxMin_OnEmptyColumn() { - var column = new DateTimeDataFrameColumn("DateTime"); + var column = new PrimitiveDataFrameColumn("DateTime"); Assert.Null(column.Min()); Assert.Null(column.Max()); @@ -1202,7 +1202,7 @@ public void TestDateTimeComputations_MaxMin_OnEmptyColumn() [Fact] public void TestIntComputations_MaxMin_OnEmptyColumn() { - var column = new Int32DataFrameColumn("Int"); + var column = new PrimitiveDataFrameColumn("Int"); Assert.Null(column.Min()); Assert.Null(column.Max()); @@ -1211,7 +1211,7 @@ public void TestIntComputations_MaxMin_OnEmptyColumn() [Fact] public void TestDateTimeComputations_MaxMin_WithNulls() { - var dateTimeColumn = new DateTimeDataFrameColumn("DateTime", new DateTime?[] + var dateTimeColumn = new PrimitiveDataFrameColumn("DateTime", new DateTime?[] { null, new DateTime(2022, 1, 1), From 11b5d6f040a564d570bd9ad73f5731e012ed5893 Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Thu, 6 Jul 2023 14:28:18 +0300 Subject: [PATCH 29/31] Fixed code review findings # Conflicts: # src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.cs # src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.tt --- .../DateTimeComputation.cs | 8 +--- .../NumberMathComputation.cs | 10 ++--- .../PrimitiveColumnContainer.cs | 41 ++++++++++--------- .../PrimitiveDataFrameColumn.Sort.cs | 2 +- .../PrimitiveDataFrameColumn.cs | 2 +- 5 files changed, 31 insertions(+), 32 deletions(-) diff --git a/src/Microsoft.Data.Analysis/DateTimeComputation.cs b/src/Microsoft.Data.Analysis/DateTimeComputation.cs index 3e50ec0c82..4dae3cfd9d 100644 --- a/src/Microsoft.Data.Analysis/DateTimeComputation.cs +++ b/src/Microsoft.Data.Analysis/DateTimeComputation.cs @@ -202,10 +202,8 @@ public void Max(PrimitiveColumnContainer column, out DateTime? ret) var bitmapSpan = column.NullBitMapBuffers[b].ReadOnlySpan; for (int i = 0; i < readOnlySpan.Length; i++) { - int byteIndex = (int)((uint)i / 8); - //Check if bit is not set (value is null) - skip - if (((bitmapSpan[byteIndex] >> (i & 7)) & 1) == 0) + if (!BitmapHelper.IsValid(bitmapSpan, i)) continue; var val = readOnlySpan[i]; @@ -262,10 +260,8 @@ public void Min(PrimitiveColumnContainer column, out DateTime? ret) for (int i = 0; i < readOnlySpan.Length; i++) { - int byteIndex = (int)((uint)i / 8); - //Check if bit is not set (value is null) - skip - if (((bitmapSpan[byteIndex] >> (i & 7)) & 1) == 0) + if (!BitmapHelper.IsValid(bitmapSpan, i)) continue; var val = readOnlySpan[i]; diff --git a/src/Microsoft.Data.Analysis/NumberMathComputation.cs b/src/Microsoft.Data.Analysis/NumberMathComputation.cs index d0e7d4102e..6e7318df74 100644 --- a/src/Microsoft.Data.Analysis/NumberMathComputation.cs +++ b/src/Microsoft.Data.Analysis/NumberMathComputation.cs @@ -143,7 +143,7 @@ protected void Apply(PrimitiveColumnContainer column, Func func) var bitmap = column.NullBitMapBuffers[b].ReadOnlySpan; for (int i = 0; i < buffer.Length; i++) { - if (column.IsValid(bitmap, i)) + if (BitmapHelper.IsValid(bitmap, i)) { buffer[i] = func(buffer[i]); } @@ -160,7 +160,7 @@ protected void CumulativeApply(PrimitiveColumnContainer column, Func var bitmap = column.NullBitMapBuffers[b].ReadOnlySpan; for (int i = 0; i < buffer.Length; i++) { - if (column.IsValid(bitmap, i)) + if (BitmapHelper.IsValid(bitmap, i)) { ret = func(buffer[i], ret); buffer[i] = ret; @@ -179,7 +179,7 @@ protected T CalculateReduction(PrimitiveColumnContainer column, Func var bitMap = column.NullBitMapBuffers[b].ReadOnlySpan; for (int i = 0; i < buffer.Length; i++) { - if (column.IsValid(bitMap, i)) + if (BitmapHelper.IsValid(bitMap, i)) { ret = checked(func(ret, buffer[i])); } @@ -212,7 +212,7 @@ protected void CumulativeApply(PrimitiveColumnContainer column, Func } row -= minRange; - if (column.IsValid(bitmap, (int)row)) + if (BitmapHelper.IsValid(bitmap, (int)row)) { if (!isInitialized) { @@ -252,7 +252,7 @@ protected T CalculateReduction(PrimitiveColumnContainer column, Func } row -= minRange; - if (column.IsValid(bitMap, (int)row)) + if (BitmapHelper.IsValid(bitMap, (int)row)) { if (!isInitialized) { diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs index d65255d5be..1a3ae978a0 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs @@ -12,8 +12,24 @@ namespace Microsoft.Data.Analysis { + internal static class BitmapHelper + { + // Faster to use when we already have a span since it avoids indexing + public static bool IsValid(ReadOnlySpan bitMapBufferSpan, int index) + { + int nullBitMapSpanIndex = index / 8; + byte thisBitMap = bitMapBufferSpan[nullBitMapSpanIndex]; + return IsBitSet(thisBitMap, index); + } + + public static bool IsBitSet(byte curBitMap, int index) + { + return ((curBitMap >> (index & 7)) & 1) != 0; + } + } + /// - /// PrimitiveDataFrameColumnContainer is just a store for the column data. APIs that want to change the data must be defined in PrimitiveDataFrameColumn + /// PrimitiveColumnContainer is just a store for the column data. APIs that want to change the data must be defined in PrimitiveDataFrameColumn /// /// internal partial class PrimitiveColumnContainer : IEnumerable @@ -223,7 +239,7 @@ public void ApplyElementwise(Func func) for (int i = 0; i < mutableBuffer.Length; i++) { long curIndex = i + prevLength; - bool isValid = IsValid(mutableNullBitMapBuffer, i); + bool isValid = BitmapHelper.IsValid(mutableNullBitMapBuffer, i); T? value = func(isValid ? mutableBuffer[i] : null, curIndex); mutableBuffer[i] = value.GetValueOrDefault(); SetValidityBit(mutableNullBitMapBuffer, i, value != null); @@ -246,7 +262,7 @@ public void Apply(Func func, PrimitiveColumnContainer(Func func, PrimitiveColumnContainer bitMapBufferSpan, int index) - { - int nullBitMapSpanIndex = index / 8; - byte thisBitMap = bitMapBufferSpan[nullBitMapSpanIndex]; - return IsBitSet(thisBitMap, index); - } - public bool IsValid(long index) => NullCount == 0 || GetValidityBit(index); private byte SetBit(byte curBitMap, int index, bool value) @@ -329,11 +337,6 @@ internal void SetValidityBit(long index, bool value) SetValidityBit(bitMapBuffer.Span, (int)index, value); } - private bool IsBitSet(byte curBitMap, int index) - { - return ((curBitMap >> (index & 7)) & 1) != 0; - } - private bool GetValidityBit(long index) { if ((uint)index >= Length) @@ -350,7 +353,7 @@ private bool GetValidityBit(long index) int bitMapBufferIndex = (int)((uint)index / 8); Debug.Assert(bitMapBuffer.Length > bitMapBufferIndex); byte curBitMap = bitMapBuffer[bitMapBufferIndex]; - return IsBitSet(curBitMap, (int)index); + return BitmapHelper.IsBitSet(curBitMap, (int)index); } public long Length; @@ -512,7 +515,7 @@ public PrimitiveColumnContainer Clone(PrimitiveColumnContainer mapIndic spanIndex = buffer.Length - 1 - i; long mapRowIndex = mapIndicesIntSpan.IsEmpty ? mapIndicesLongSpan[spanIndex] : mapIndicesIntSpan[spanIndex]; - bool mapRowIndexIsValid = mapIndices.IsValid(mapIndicesNullBitMapSpan, spanIndex); + bool mapRowIndexIsValid = BitmapHelper.IsValid(mapIndicesNullBitMapSpan, spanIndex); if (mapRowIndexIsValid && (mapRowIndex < minRange || mapRowIndex >= maxRange)) { int bufferIndex = (int)(mapRowIndex / maxCapacity); @@ -527,7 +530,7 @@ public PrimitiveColumnContainer Clone(PrimitiveColumnContainer mapIndic { mapRowIndex -= minRange; value = thisSpan[(int)mapRowIndex]; - isValid = IsValid(thisNullBitMapSpan, (int)mapRowIndex); + isValid = BitmapHelper.IsValid(thisNullBitMapSpan, (int)mapRowIndex); } retSpan[i] = isValid ? value : default; diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.Sort.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.Sort.cs index 699779a921..0b5ebd2120 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.Sort.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.Sort.cs @@ -45,7 +45,7 @@ private PrimitiveDataFrameColumn GetSortIndices(IComparer comparer, out for (int i = 0; i < sortIndices.Length; i++) { int localSortIndex = sortIndices[i]; - if (_columnContainer.IsValid(nullBitMapSpan, localSortIndex)) + if (BitmapHelper.IsValid(nullBitMapSpan, localSortIndex)) { nonNullSortIndices.Add(sortIndices[i]); } diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs index 5aed1c57f7..91421ddb06 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs @@ -541,7 +541,7 @@ public override Dictionary> GroupColumnValues(out for (int i = 0; i < readOnlySpan.Length; i++) { long currentLength = i + previousLength; - if (_columnContainer.IsValid(nullBitMapSpan, i)) + if (BitmapHelper.IsValid(nullBitMapSpan, i)) { bool containsKey = multimap.TryGetValue(readOnlySpan[i], out ICollection values); if (containsKey) From cb39ed834492b1e6ec232a17d26e6883044e9f8a Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Thu, 6 Jul 2023 17:40:48 +0300 Subject: [PATCH 30/31] Cherry pick PR 6724 (fix dataframe arithmetics for columns having several value buffers) # Conflicts: # src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs # src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.tt --- .../PrimitiveDataFrameColumnArithmetic.cs | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs index ef3c9b1a8a..7fc3fa521e 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs @@ -1,4 +1,4 @@ - + // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. @@ -265,47 +265,51 @@ public void RightShift(PrimitiveColumnContainer column, int value) } public void ElementwiseEquals(PrimitiveColumnContainer left, PrimitiveColumnContainer right, PrimitiveColumnContainer ret) { + long index = 0; for (int b = 0; b < left.Buffers.Count; b++) { var span = left.Buffers[b].ReadOnlySpan; var otherSpan = right.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) { - ret[i] = (span[i] == otherSpan[i]); + ret[index++] = (span[i] == otherSpan[i]); } } } public void ElementwiseEquals(PrimitiveColumnContainer column, bool scalar, PrimitiveColumnContainer ret) { + long index = 0; for (int b = 0; b < column.Buffers.Count; b++) { var span = column.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) { - ret[i] = (span[i] == scalar); + ret[index++] = (span[i] == scalar); } } } public void ElementwiseNotEquals(PrimitiveColumnContainer left, PrimitiveColumnContainer right, PrimitiveColumnContainer ret) { + long index = 0; for (int b = 0; b < left.Buffers.Count; b++) { var span = left.Buffers[b].ReadOnlySpan; var otherSpan = right.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) { - ret[i] = (span[i] != otherSpan[i]); + ret[index++] = (span[i] != otherSpan[i]); } } } public void ElementwiseNotEquals(PrimitiveColumnContainer column, bool scalar, PrimitiveColumnContainer ret) { + long index = 0; for (int b = 0; b < column.Buffers.Count; b++) { var span = column.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) { - ret[i] = (span[i] != scalar); + ret[index++] = (span[i] != scalar); } } } @@ -451,47 +455,51 @@ public void RightShift(PrimitiveColumnContainer column, int value) } public void ElementwiseEquals(PrimitiveColumnContainer left, PrimitiveColumnContainer right, PrimitiveColumnContainer ret) { + long index = 0; for (int b = 0; b < left.Buffers.Count; b++) { var span = left.Buffers[b].ReadOnlySpan; var otherSpan = right.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) { - ret[i] = (span[i] == otherSpan[i]); + ret[index++] = (span[i] == otherSpan[i]); } } } public void ElementwiseEquals(PrimitiveColumnContainer column, DateTime scalar, PrimitiveColumnContainer ret) { + long index = 0; for (int b = 0; b < column.Buffers.Count; b++) { var span = column.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) { - ret[i] = (span[i] == scalar); + ret[index++] = (span[i] == scalar); } } } public void ElementwiseNotEquals(PrimitiveColumnContainer left, PrimitiveColumnContainer right, PrimitiveColumnContainer ret) { + long index = 0; for (int b = 0; b < left.Buffers.Count; b++) { var span = left.Buffers[b].ReadOnlySpan; var otherSpan = right.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) { - ret[i] = (span[i] != otherSpan[i]); + ret[index++] = (span[i] != otherSpan[i]); } } } public void ElementwiseNotEquals(PrimitiveColumnContainer column, DateTime scalar, PrimitiveColumnContainer ret) { + long index = 0; for (int b = 0; b < column.Buffers.Count; b++) { var span = column.Buffers[b].ReadOnlySpan; for (int i = 0; i < span.Length; i++) { - ret[i] = (span[i] != scalar); + ret[index++] = (span[i] != scalar); } } } From 2856d3a499d4c3acf74f2c8610a8af8bfc66987b Mon Sep 17 00:00:00 2001 From: Jake Radzikowski Date: Thu, 6 Jul 2023 12:10:11 -0700 Subject: [PATCH 31/31] Fix tests --- .../NumberMathComputation.cs | 25 +++++++++++++------ .../Microsoft.ML.Fairlearn.csproj | 3 ++- .../Microsoft.ML.AutoML.Tests.csproj | 3 ++- .../Microsoft.ML.Core.Tests.csproj | 2 ++ .../Microsoft.ML.Fairlearn.Tests.csproj | 2 ++ .../Microsoft.ML.Tests.csproj | 2 ++ 6 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.Data.Analysis/NumberMathComputation.cs b/src/Microsoft.Data.Analysis/NumberMathComputation.cs index 6e7318df74..70613017e3 100644 --- a/src/Microsoft.Data.Analysis/NumberMathComputation.cs +++ b/src/Microsoft.Data.Analysis/NumberMathComputation.cs @@ -8,6 +8,8 @@ using System; using System.Collections.Generic; +using System.Linq; +using System.Runtime.CompilerServices; using System.Runtime.Versioning; namespace Microsoft.Data.Analysis @@ -76,7 +78,7 @@ public void CumulativeSum(PrimitiveColumnContainer column, IEnumerable public void Max(PrimitiveColumnContainer column, out T? ret) { - ret = CalculateReduction(column, T.Max, column[0].Value); + ret = CalculateReduction(column, T.Max); } public void Max(PrimitiveColumnContainer column, IEnumerable rows, out T? ret) @@ -86,7 +88,7 @@ public void Max(PrimitiveColumnContainer column, IEnumerable rows, out public void Min(PrimitiveColumnContainer column, out T? ret) { - ret = CalculateReduction(column, T.Min, column[0].Value); + ret = CalculateReduction(column, T.Min); } public void Min(PrimitiveColumnContainer column, IEnumerable rows, out T? ret) @@ -97,7 +99,7 @@ public void Min(PrimitiveColumnContainer column, IEnumerable rows, out public void Product(PrimitiveColumnContainer column, out T? ret) { - ret = CalculateReduction(column, Multiply, T.One); + ret = CalculateReduction(column, Multiply); } public void Product(PrimitiveColumnContainer column, IEnumerable rows, out T? ret) @@ -107,7 +109,7 @@ public void Product(PrimitiveColumnContainer column, IEnumerable rows, public void Sum(PrimitiveColumnContainer column, out T? ret) { - ret = CalculateReduction(column, Add, T.Zero); + ret = CalculateReduction(column, Add); } public void Sum(PrimitiveColumnContainer column, IEnumerable rows, out T? ret) @@ -169,9 +171,10 @@ protected void CumulativeApply(PrimitiveColumnContainer column, Func } } - protected T CalculateReduction(PrimitiveColumnContainer column, Func func, T startValue) + protected T? CalculateReduction(PrimitiveColumnContainer column, Func func) { - var ret = startValue; + T? ret = null; + bool isInitialized = false; for (int b = 0; b < column.Buffers.Count; b++) { @@ -181,7 +184,15 @@ protected T CalculateReduction(PrimitiveColumnContainer column, Func { if (BitmapHelper.IsValid(bitMap, i)) { - ret = checked(func(ret, buffer[i])); + if (!isInitialized) + { + isInitialized = true; + ret = buffer[i]; + } + else + { + ret = checked(func(ret.Value, buffer[i])); + } } } } diff --git a/src/Microsoft.ML.Fairlearn/Microsoft.ML.Fairlearn.csproj b/src/Microsoft.ML.Fairlearn/Microsoft.ML.Fairlearn.csproj index 72b2ad0edb..7ad6c422ef 100644 --- a/src/Microsoft.ML.Fairlearn/Microsoft.ML.Fairlearn.csproj +++ b/src/Microsoft.ML.Fairlearn/Microsoft.ML.Fairlearn.csproj @@ -2,7 +2,8 @@ - netstandard2.0 + net6.0 + net6.0 Microsoft.ML.Fairlearn None diff --git a/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj b/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj index f6b9a021d9..bdf8c5311e 100644 --- a/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj +++ b/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj @@ -1,7 +1,8 @@  $(NoWarn) - + net6.0 + net6.0 None diff --git a/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj b/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj index 475eb5dbb1..49fddcd74f 100644 --- a/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj +++ b/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj @@ -8,6 +8,8 @@ None + net6.0 + net6.0 diff --git a/test/Microsoft.ML.Fairlearn.Tests/Microsoft.ML.Fairlearn.Tests.csproj b/test/Microsoft.ML.Fairlearn.Tests/Microsoft.ML.Fairlearn.Tests.csproj index b950086278..90850d97af 100644 --- a/test/Microsoft.ML.Fairlearn.Tests/Microsoft.ML.Fairlearn.Tests.csproj +++ b/test/Microsoft.ML.Fairlearn.Tests/Microsoft.ML.Fairlearn.Tests.csproj @@ -2,6 +2,8 @@ None $(NoWarn);MSML_ParameterLocalVarName;MSML_PrivateFieldName;MSML_ExtendBaseTestClass;MSML_GeneralName + net6.0 + net6.0 diff --git a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj index c50abd3350..e933dc2f86 100644 --- a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj +++ b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj @@ -1,6 +1,8 @@  + net6.0 + net6.0 Microsoft.ML.Tests true Test