From 0e07fab83f7e14d034f9c004dd4138839527783e Mon Sep 17 00:00:00 2001 From: Jonathan Matthews <44363860+jonathanmatthews@users.noreply.github.com> Date: Tue, 23 Nov 2021 14:24:12 +0000 Subject: [PATCH] Trim column headers by default, and bump major version (#32) Add the option to trim column headers when reading from the input text, and make it the default. This is a potentially breaking change, so increment the major version as well. --- README.md | 1 + .../DelimitedDataParser.csproj | 2 +- .../DelimitedDataReader.cs | 13 +++++++- src/DelimitedDataParser/Parser.cs | 20 +++++++++++++ .../ExporterTest.Reader.cs | 2 +- test/DelimitedDataParser.Test/ParserTest.cs | 30 +++++++++++++++++++ 6 files changed, 65 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c48caf0..02b5aea 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ while (reader.Read()) * `FieldSeparator` - the character used as field delimiter in the text file. Default: `,` (i.e., CSV). * `UseFirstRowAsColumnHeaders` - specifies whether the first row of the text file should be treated as a header row. Default: `true`. +* `TrimColumnHeaders` - specifies whether the column headers, if present, should have whitespace trimmed before being used as a key. ## Exporter diff --git a/src/DelimitedDataParser/DelimitedDataParser.csproj b/src/DelimitedDataParser/DelimitedDataParser.csproj index 748e592..b64ea02 100644 --- a/src/DelimitedDataParser/DelimitedDataParser.csproj +++ b/src/DelimitedDataParser/DelimitedDataParser.csproj @@ -6,7 +6,7 @@ ..\..\CustomExtendedCorrectnessRules.ruleset Enable · enable.com Enable - 4.2.1 + 5.0.0 C# library for parsing and exporting tabular data in delimited format (e.g. CSV). Copyright © 2018 https://github.com/EnableSoftware.png diff --git a/src/DelimitedDataParser/DelimitedDataReader.cs b/src/DelimitedDataParser/DelimitedDataReader.cs index c06582a..6062b8d 100644 --- a/src/DelimitedDataParser/DelimitedDataReader.cs +++ b/src/DelimitedDataParser/DelimitedDataReader.cs @@ -5,6 +5,7 @@ using System.Data.Common; using System.Globalization; using System.IO; +using System.Linq; using System.Text; using System.Threading; @@ -20,6 +21,7 @@ internal class DelimitedDataReader : DbDataReader private readonly Encoding _encoding; private readonly char _fieldSeparator; private readonly bool _useFirstRowAsColumnHeaders; + private readonly bool _trimColumnHeaders; private readonly CancellationToken _cancellationToken; private readonly char[] _buffer = new char[4096]; @@ -37,12 +39,14 @@ internal class DelimitedDataReader : DbDataReader Encoding encoding, char fieldSeparator, bool useFirstRowAsColumnHeaders, + bool trimColumnHeaders, CancellationToken cancellationToken = default(CancellationToken)) { _textReader = textReader ?? throw new ArgumentNullException(nameof(textReader)); _encoding = encoding ?? throw new ArgumentException(nameof(encoding)); _fieldSeparator = fieldSeparator; _useFirstRowAsColumnHeaders = useFirstRowAsColumnHeaders; + _trimColumnHeaders = trimColumnHeaders; _cancellationToken = cancellationToken; } @@ -601,7 +605,14 @@ private void EnsureInitialised() private void GenerateFieldLookup() { // Here we assume that the current row is the header row. - _fieldNameLookup = new List(_currentRow).AsReadOnly(); + if (_trimColumnHeaders) + { + _fieldNameLookup = _currentRow.Select(o => o?.Trim()).ToList().AsReadOnly(); + } + else + { + _fieldNameLookup = new List(_currentRow).AsReadOnly(); + } } private void GenerateDefaultFieldNameLookup() diff --git a/src/DelimitedDataParser/Parser.cs b/src/DelimitedDataParser/Parser.cs index d365824..8e4c514 100644 --- a/src/DelimitedDataParser/Parser.cs +++ b/src/DelimitedDataParser/Parser.cs @@ -22,6 +22,7 @@ public class Parser private ISet _columnNamesAsText; private char _fieldSeparator = ','; private bool _useFirstRowAsColumnHeaders = true; + private bool _trimColumnHeaders = true; /// /// Initializes a new instance of the class. @@ -64,6 +65,23 @@ public virtual bool UseFirstRowAsColumnHeaders } } + /// + /// Gets or sets a value indicating whether the column headers, if present, should have whitespace trimmed before being used as a key + /// The default value is true. + /// + public virtual bool TrimColumnHeaders + { + get + { + return _trimColumnHeaders; + } + + set + { + _trimColumnHeaders = value; + } + } + /// /// Clear all "columns as text" settings. /// @@ -183,6 +201,7 @@ public virtual DbDataReader ParseReader(TextReader textReader, Encoding encoding encoding, _fieldSeparator, _useFirstRowAsColumnHeaders, + _trimColumnHeaders, cancellationToken); } @@ -207,6 +226,7 @@ public virtual DbDataReader ParseReader(StreamReader streamReader, CancellationT streamReader.CurrentEncoding, _fieldSeparator, _useFirstRowAsColumnHeaders, + _trimColumnHeaders, cancellationToken); } diff --git a/test/DelimitedDataParser.Test/ExporterTest.Reader.cs b/test/DelimitedDataParser.Test/ExporterTest.Reader.cs index 7a08236..ea23a3e 100644 --- a/test/DelimitedDataParser.Test/ExporterTest.Reader.cs +++ b/test/DelimitedDataParser.Test/ExporterTest.Reader.cs @@ -603,7 +603,7 @@ public void ExportReader_Supports_Large_Dataset() { stringReader = new StringReader(expected.ToString()); - using (var dataReader = new DelimitedDataReader(stringReader, Encoding.UTF8, ',', true)) + using (var dataReader = new DelimitedDataReader(stringReader, Encoding.UTF8, ',', true, false)) { stringReader = null; diff --git a/test/DelimitedDataParser.Test/ParserTest.cs b/test/DelimitedDataParser.Test/ParserTest.cs index d72b4c9..5c0bf0a 100644 --- a/test/DelimitedDataParser.Test/ParserTest.cs +++ b/test/DelimitedDataParser.Test/ParserTest.cs @@ -56,6 +56,36 @@ public void Can_Parse_Column_Names_From_First_Row() Assert.Equal("Field 3", output.Columns[2].ColumnName); } + [Fact] + public void Will_Trim_Column_Names_By_Default() + { + string input = @"Field 1 , Field 2, Field 3 "; + + var parser = new Parser(); + var output = parser.Parse(GetTextReader(input)); + + Assert.Equal(3, output.Columns.Count); + Assert.Equal("Field 1", output.Columns[0].ColumnName); + Assert.Equal("Field 2", output.Columns[1].ColumnName); + Assert.Equal("Field 3", output.Columns[2].ColumnName); + } + + [Fact] + public void Will_Not_Trim_Column_Names_If_Prevented() + { + string input = @"Field 1 , Field 2, Field 3 "; + + var parser = new Parser(); + parser.TrimColumnHeaders = false; + + var output = parser.Parse(GetTextReader(input)); + + Assert.Equal(3, output.Columns.Count); + Assert.Equal("Field 1 ", output.Columns[0].ColumnName); + Assert.Equal(" Field 2", output.Columns[1].ColumnName); + Assert.Equal(" Field 3 ", output.Columns[2].ColumnName); + } + [Fact] public void Can_Parse_Empty_Column_Names() {