Skip to content
This repository has been archived by the owner on Feb 12, 2024. It is now read-only.

Commit

Permalink
Trim column headers by default, and bump major version (#32)
Browse files Browse the repository at this point in the history
Add the option to trim column headers when reading from the input text, and make it the default. This is a potentially breaking change, so increment the major version as well.
  • Loading branch information
jonathanmatthews committed Nov 23, 2021
1 parent 310dd82 commit 0e07fab
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 3 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ while (reader.Read())

* `FieldSeparator` - the character used as field delimiter in the text file. Default: `,` (i.e., CSV).
* `UseFirstRowAsColumnHeaders` - specifies whether the first row of the text file should be treated as a header row. Default: `true`.
* `TrimColumnHeaders` - specifies whether the column headers, if present, should have whitespace trimmed before being used as a key.

## Exporter

Expand Down
2 changes: 1 addition & 1 deletion src/DelimitedDataParser/DelimitedDataParser.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<CodeAnalysisRuleSet>..\..\CustomExtendedCorrectnessRules.ruleset</CodeAnalysisRuleSet>
<Authors>Enable · enable.com</Authors>
<Company>Enable</Company>
<Version>4.2.1</Version>
<Version>5.0.0</Version>
<Description>C# library for parsing and exporting tabular data in delimited format (e.g. CSV).</Description>
<Copyright>Copyright © 2018</Copyright>
<PackageIconUrl>https://github.com/EnableSoftware.png</PackageIconUrl>
Expand Down
13 changes: 12 additions & 1 deletion src/DelimitedDataParser/DelimitedDataReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Data.Common;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;

Expand All @@ -20,6 +21,7 @@ internal class DelimitedDataReader : DbDataReader
private readonly Encoding _encoding;
private readonly char _fieldSeparator;
private readonly bool _useFirstRowAsColumnHeaders;
private readonly bool _trimColumnHeaders;
private readonly CancellationToken _cancellationToken;
private readonly char[] _buffer = new char[4096];

Expand All @@ -37,12 +39,14 @@ internal class DelimitedDataReader : DbDataReader
Encoding encoding,
char fieldSeparator,
bool useFirstRowAsColumnHeaders,
bool trimColumnHeaders,
CancellationToken cancellationToken = default(CancellationToken))
{
_textReader = textReader ?? throw new ArgumentNullException(nameof(textReader));
_encoding = encoding ?? throw new ArgumentException(nameof(encoding));
_fieldSeparator = fieldSeparator;
_useFirstRowAsColumnHeaders = useFirstRowAsColumnHeaders;
_trimColumnHeaders = trimColumnHeaders;
_cancellationToken = cancellationToken;
}

Expand Down Expand Up @@ -601,7 +605,14 @@ private void EnsureInitialised()
private void GenerateFieldLookup()
{
// Here we assume that the current row is the header row.
_fieldNameLookup = new List<string>(_currentRow).AsReadOnly();
if (_trimColumnHeaders)
{
_fieldNameLookup = _currentRow.Select(o => o?.Trim()).ToList().AsReadOnly();
}
else
{
_fieldNameLookup = new List<string>(_currentRow).AsReadOnly();
}
}

private void GenerateDefaultFieldNameLookup()
Expand Down
20 changes: 20 additions & 0 deletions src/DelimitedDataParser/Parser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ public class Parser
private ISet<string> _columnNamesAsText;
private char _fieldSeparator = ',';
private bool _useFirstRowAsColumnHeaders = true;
private bool _trimColumnHeaders = true;

/// <summary>
/// Initializes a new instance of the <see cref="Parser"/> class.
Expand Down Expand Up @@ -64,6 +65,23 @@ public virtual bool UseFirstRowAsColumnHeaders
}
}

/// <summary>
/// Gets or sets a value indicating whether the column headers, if present, should have whitespace trimmed before being used as a key
/// The default value is <c>true</c>.
/// </summary>
public virtual bool TrimColumnHeaders
{
get
{
return _trimColumnHeaders;
}

set
{
_trimColumnHeaders = value;
}
}

/// <summary>
/// Clear all "columns as text" settings.
/// </summary>
Expand Down Expand Up @@ -183,6 +201,7 @@ public virtual DbDataReader ParseReader(TextReader textReader, Encoding encoding
encoding,
_fieldSeparator,
_useFirstRowAsColumnHeaders,
_trimColumnHeaders,
cancellationToken);
}

Expand All @@ -207,6 +226,7 @@ public virtual DbDataReader ParseReader(StreamReader streamReader, CancellationT
streamReader.CurrentEncoding,
_fieldSeparator,
_useFirstRowAsColumnHeaders,
_trimColumnHeaders,
cancellationToken);
}

Expand Down
2 changes: 1 addition & 1 deletion test/DelimitedDataParser.Test/ExporterTest.Reader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ public void ExportReader_Supports_Large_Dataset()
{
stringReader = new StringReader(expected.ToString());

using (var dataReader = new DelimitedDataReader(stringReader, Encoding.UTF8, ',', true))
using (var dataReader = new DelimitedDataReader(stringReader, Encoding.UTF8, ',', true, false))
{
stringReader = null;

Expand Down
30 changes: 30 additions & 0 deletions test/DelimitedDataParser.Test/ParserTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,36 @@ public void Can_Parse_Column_Names_From_First_Row()
Assert.Equal("Field 3", output.Columns[2].ColumnName);
}

[Fact]
public void Will_Trim_Column_Names_By_Default()
{
string input = @"Field 1 , Field 2, Field 3 ";

var parser = new Parser();
var output = parser.Parse(GetTextReader(input));

Assert.Equal(3, output.Columns.Count);
Assert.Equal("Field 1", output.Columns[0].ColumnName);
Assert.Equal("Field 2", output.Columns[1].ColumnName);
Assert.Equal("Field 3", output.Columns[2].ColumnName);
}

[Fact]
public void Will_Not_Trim_Column_Names_If_Prevented()
{
string input = @"Field 1 , Field 2, Field 3 ";

var parser = new Parser();
parser.TrimColumnHeaders = false;

var output = parser.Parse(GetTextReader(input));

Assert.Equal(3, output.Columns.Count);
Assert.Equal("Field 1 ", output.Columns[0].ColumnName);
Assert.Equal(" Field 2", output.Columns[1].ColumnName);
Assert.Equal(" Field 3 ", output.Columns[2].ColumnName);
}

[Fact]
public void Can_Parse_Empty_Column_Names()
{
Expand Down

0 comments on commit 0e07fab

Please sign in to comment.