Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix buffer read for windows style newlines #67

Merged
merged 8 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions .github/workflows/dotnet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,22 @@ jobs:
- name: Build (Framework 2.0 Tests)
run: msbuild ./tests/net20/tests.net20.csproj
- name: Test (net20)
run: ./NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests/net20/bin/Debug/tests.net20.dll
working-directory: ./tests/net20/bin/Debug/
run: ../../../../NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests.net20.dll
- name: Build (Framework 4.0)
run: msbuild ./src/net40/src.net40.csproj
- name: Build (Framework 4.0 Tests)
run: msbuild ./tests/net40/tests.net40.csproj
- name: Test (net40)
run: ./NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests/net40/bin/Debug/tests.net40.dll
working-directory: ./tests/net40/bin/Debug
run: ../../../../NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests.net40.dll
- name: Build (Framework 4.5)
run: msbuild ./src/net45/src.net45.csproj
- name: Build (Framework 4.5 Tests)
run: msbuild ./tests/net45/tests.net45.csproj
- name: Test (net45)
run: ./NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests/net45/bin/Debug/tests.net45.dll
working-directory: ./tests/net45/bin/Debug/
run: ../../../../NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests.net45.dll
- name: Build (DotNet Core 5.0 and NetStandard 2.0)
run: dotnet build ./csharp-csv-reader.sln
- name: Test (net50)
Expand Down
6 changes: 3 additions & 3 deletions CSVFile.nuspec
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<package >
<metadata>
<id>CSVFile</id>
<version>3.1.4</version>
<version>3.2.0</version>
<title>CSVFile</title>
<authors>Ted Spence</authors>
<owners>Ted Spence</owners>
Expand All @@ -15,8 +15,8 @@
<releaseNotes>
August 5, 2024

* Add serialization options for arrays and objects
* Fix bad deploy of 3.1.3
* Fix issue with Windows-style newlines crossing chunks found by @joelverhagen
* Fix issue with endless loops reported by @wvvegt
</releaseNotes>
<readme>docs/README.md</readme>
<copyright>Copyright 2006 - 2024</copyright>
Expand Down
8 changes: 8 additions & 0 deletions src/CSV.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
/// <summary>
/// Root class that contains static functions for straightforward CSV parsing
/// </summary>
public static class CSV

Check warning on line 29 in src/CSV.cs

View workflow job for this annotation

GitHub Actions / build-ubuntu

Rename class 'CSV' to match pascal case naming rules, consider using 'Csv'.
{
/// <summary>
/// Use this to determine what version of DotNet was used to build this library
Expand Down Expand Up @@ -73,6 +73,10 @@
{
yield return row;
}
else if (inStream.EndOfStream)
{
break;
}
}
}

Expand Down Expand Up @@ -101,6 +105,10 @@
{
yield return row;
}
else if (inStream.EndOfStream)
{
break;
}
}
}
#endif
Expand Down Expand Up @@ -206,7 +214,7 @@
/// <param name="list">The array of objects to serialize</param>
/// <param name="settings">The CSV settings to use when exporting this array (Default: CSV)</param>
/// <returns>The completed CSV string representing one line per element in list</returns>
public static string Serialize<T>(IEnumerable<T> list, CSVSettings settings = null) where T : class, new()

Check warning on line 217 in src/CSV.cs

View workflow job for this annotation

GitHub Actions / build-ubuntu

All 'Serialize' method overloads should be adjacent.
{
if (settings == null)
{
Expand Down
6 changes: 4 additions & 2 deletions src/CSVStateMachine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
/// <summary>
/// The current state of CSV processing, given the text that has been seen so far
/// </summary>
public enum CSVState

Check warning on line 19 in src/CSVStateMachine.cs

View workflow job for this annotation

GitHub Actions / build-ubuntu

Rename this enumeration to match the regular expression: '^([A-Z]{1,3}[a-z0-9]+)*([A-Z]{2})?$'.
{
/// <summary>
/// We have reached the end of the CSV and everything is done
Expand All @@ -43,7 +43,7 @@
/// Since some CSV files have a single row of data that comprises multiple lines, this state machine may or may
/// not produce one row of data for each chunk of text received.
/// </summary>
public class CSVStateMachine

Check warning on line 46 in src/CSVStateMachine.cs

View workflow job for this annotation

GitHub Actions / build-ubuntu

Rename class 'CSVStateMachine' to match pascal case naming rules, consider using 'CsvStateMachine'.
{
private readonly CSVSettings _settings;
private string _line;
Expand All @@ -65,7 +65,7 @@
/// <returns></returns>
public bool NeedsMoreText()
{
return String.IsNullOrEmpty(_line) || _position >= _line.Length;
return String.IsNullOrEmpty(_line) || _position + _settings.LineSeparator.Length >= _line.Length;
}

/// <summary>
Expand Down Expand Up @@ -202,11 +202,13 @@
var notEnoughChars = _position + _settings.LineSeparator.Length > _line.Length;
if (notEnoughChars && !reachedEnd)
{
// Backtrack one character so we can pick up the line separator completely next time
_position--;
return null;
}

// If we have reached the end, but this isn't a complete line separator, it's just text
if (notEnoughChars && reachedEnd)
if (notEnoughChars)
{
_work.Append(c);
}
Expand Down
1,695 changes: 1,695 additions & 0 deletions tests/PackageAssets.csv

Large diffs are not rendered by default.

39 changes: 39 additions & 0 deletions tests/ReaderTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
* Home page: https://github.com/tspence/csharp-csv-reader
*/
using System;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using NUnit.Framework;
using CSVFile;
#if HAS_ASYNC
Expand Down Expand Up @@ -336,6 +340,41 @@ public void TestMultipleNewlines()
}
}

[Test]
public void TestIssue62()
{
var inputLines = File.ReadAllLines("PackageAssets.csv");
var desiredLines = 53_543;
var linesToRead = Enumerable
.Repeat(inputLines, desiredLines / inputLines.Length + 1)
.SelectMany(x => x)
.Take(desiredLines)
.ToArray();

var config = new CSVSettings
{
HeaderRowIncluded = false,
};

var outputLines = 0;
var rawText = string.Join(Environment.NewLine, linesToRead);
var rawBytes = Encoding.UTF8.GetBytes(rawText);
using (var memoryStream = new MemoryStream(rawBytes))
{
using (var streamReader = new StreamReader(memoryStream))
{
using (var csvReader = new CSVReader(streamReader, config))
{
foreach (var row in csvReader)
{
outputLines++;
}
}
}
}
Assert.AreEqual(desiredLines, outputLines);
}

#if HAS_ASYNC_IENUM
[Test]
public async Task TestAsyncReader()
Expand Down
6 changes: 6 additions & 0 deletions tests/net20/tests.net20.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@
<Name>src.net20</Name>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<Content Include="..\PackageAssets.csv">
<Link>PackageAssets.csv</Link>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
</ItemGroup>
Expand Down
6 changes: 6 additions & 0 deletions tests/net40/tests.net40.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@
<Name>src.net40</Name>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<Content Include="..\PackageAssets.csv">
<Link>PackageAssets.csv</Link>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
</ItemGroup>
Expand Down
6 changes: 6 additions & 0 deletions tests/net45/tests.net45.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@
<Name>src.net45</Name>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<Content Include="..\PackageAssets.csv">
<Link>PackageAssets.csv</Link>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
</ItemGroup>
Expand Down
7 changes: 7 additions & 0 deletions tests/net50/tests.net50.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,11 @@
<ProjectReference Include="..\..\src\net50\src.net50.csproj" />
</ItemGroup>

<ItemGroup>
<Content Include="..\PackageAssets.csv">
<Link>PackageAssets.csv</Link>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>

</Project>
7 changes: 7 additions & 0 deletions tests/net60/tests.net60.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,12 @@
<ProjectReference Include="..\..\src\netstandard20\src.netstandard20.csproj" />
</ItemGroup>

<ItemGroup>
<Content Include="..\PackageAssets.csv">
<Link>PackageAssets.csv</Link>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>

</Project>

Loading