Skip to content

Commit 2e03e72

Browse files
committed
Do not write UTF8 BOM. Fix #53.
1 parent e3c8826 commit 2e03e72

File tree

9 files changed

+130
-18
lines changed

9 files changed

+130
-18
lines changed
Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,25 @@
1-
namespace NetTopologySuite.IO.Esri.Dbf
1+
using System.Text;
2+
3+
namespace NetTopologySuite.IO.Esri.Dbf
24
{
5+
/// <summary>
6+
/// Manages configurations and constants specific to the structure and operation of DBF files in the dBASE III format.
7+
/// </summary>
38
internal static class Dbf
49
{
5-
public readonly static int TableDescriptorSize = 32; // Number of bytes in the table header
10+
internal readonly static int TableDescriptorSize = 32; // Number of bytes in the table header
611

712
internal readonly static int FieldDescriptorSize = 32; // Number of bytes in the field descriptor
813
internal readonly static int MaxFieldCount = 255;
9-
public readonly static byte Dbase3Version = 0x03; // dBASE III
10-
public readonly static byte HeaderTerminatorMark = 0x0D;
14+
internal readonly static byte Dbase3Version = 0x03; // dBASE III
15+
internal readonly static byte HeaderTerminatorMark = 0x0D;
16+
17+
internal readonly static byte DeletedRecordMark = 0x2A; // '*'
18+
internal readonly static byte ValidRecordMark = 0x20; // ' '
19+
internal readonly static byte EndOfFileMark = 0x1A;
1120

12-
public readonly static byte DeletedRecordMark = 0x2A; // '*'
13-
public readonly static byte ValidRecordMark = 0x20; // ' '
14-
public readonly static byte EndOfFileMark = 0x1A;
21+
internal static readonly int MaxFieldNameLength = 10;
1522

16-
public static readonly int MaxFieldNameLength = 10;
23+
internal readonly static Encoding DefaultEncoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
1724
}
1825
}

src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfEncoding.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ static DbfEncoding()
2929

3030
// https://support.esri.com/en/technical-article/000013192
3131

32-
AddLanguageDriverId(0, Encoding.UTF8); // For unknown LDID
32+
AddLanguageDriverId(0, Dbf.DefaultEncoding); // For unknown LDID
3333
AddLanguageDriverId(0x03, Encoding.Default); // OS Default
3434
AddLanguageDriverId(0x57, Encoding.Default); // OS Default
3535

src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfReader.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ private void Initialize(Stream stream, Encoding encoding = null)
106106
RecordSize = Buffer.ReadDbfRecordSize();
107107
Buffer.Advance(17);
108108

109-
Encoding = encoding ?? Buffer.ReadDbfEncoding() ?? Encoding.UTF8; // null => Try to read encoding from DBF's reserved bytes
109+
Encoding = encoding ?? Buffer.ReadDbfEncoding() ?? Dbf.DefaultEncoding; // null => Try to read encoding from DBF's reserved bytes
110110
Buffer.Advance(2);
111111

112112
// --- File header is done, read field descriptor header now ---

src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfStreamExtensions.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ public static Encoding ReadDbfEncoding(this Stream stream)
8383

8484
public static void WriteDbaseFieldDescriptor(this Stream stream, DbfField field, Encoding encoding)
8585
{
86-
encoding = encoding ?? Encoding.UTF8;
86+
encoding = encoding ?? Dbf.DefaultEncoding;
8787
var name = field.Name.PadRight(Dbf.MaxFieldNameLength, char.MinValue); // Field name must have empty space zero-filled
8888

8989

@@ -97,7 +97,7 @@ public static void WriteDbaseFieldDescriptor(this Stream stream, DbfField field,
9797
}
9898
public static DbfField ReadDbaseFieldDescriptor(this Stream stream, Encoding encoding)
9999
{
100-
encoding = encoding ?? Encoding.UTF8;
100+
encoding = encoding ?? Dbf.DefaultEncoding;
101101

102102
var name = stream.ReadString(Dbf.MaxFieldNameLength, encoding)?.Trim();
103103
stream.Advance(1); // Reserved (field name terminator)

src/NetTopologySuite.IO.Esri.Shapefile/Dbf/DbfWriter.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public class DbfWriter : ManagedDisposable
5151
/// <param name="encoding">DBF file encoding. Defaults to UTF8.</param>
5252
public DbfWriter(Stream stream, IReadOnlyList<DbfField> fields, Encoding encoding = null)
5353
{
54-
Encoding = encoding ?? Encoding.UTF8;
54+
Encoding = encoding ?? Dbf.DefaultEncoding;
5555
IntializeFields(fields);
5656
DbfStream = stream ?? throw new ArgumentNullException("Uninitialized dBASE stream.", nameof(stream));
5757
WriteHeader();
@@ -66,7 +66,7 @@ public DbfWriter(Stream stream, IReadOnlyList<DbfField> fields, Encoding encodin
6666
/// <param name="encoding">DBF file encoding. Defaults to UTF8.</param>
6767
public DbfWriter(string dbfPath, IReadOnlyList<DbfField> fields, Encoding encoding = null)
6868
{
69-
Encoding = encoding ?? Encoding.UTF8;
69+
Encoding = encoding ?? Dbf.DefaultEncoding;
7070
IntializeFields(fields);
7171
WriteCpgEncoding(dbfPath, encoding);
7272
try

src/NetTopologySuite.IO.Esri.Shapefile/Dbf/Fields/DbfCharacterField.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ internal override void WriteValue(Stream stream)
7171
private Encoding _encoding = null;
7272
internal Encoding Encoding
7373
{
74-
get { return _encoding ?? Encoding.UTF8; }
74+
get { return _encoding ?? Dbf.DefaultEncoding; }
7575
set
7676
{
7777
if (value == null)

src/NetTopologySuite.IO.Esri.Shapefile/Shapefiles/Writers/ShapefileWriter.T.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,10 @@ internal ShapefileWriter(string shpPath, ShapefileWriterOptions options)
8888
ShpWriter = CreateShpWriter(shpStream, shxStream); // It calls this.ShapeType
8989

9090
if (!string.IsNullOrWhiteSpace(options.Projection))
91-
File.WriteAllText(Path.ChangeExtension(shpPath, ".prj"), options.Projection);
91+
{
92+
var prjPath = Path.ChangeExtension(shpPath, ".prj");
93+
File.WriteAllText(prjPath, options.Projection, options.Encoding);
94+
}
9295
}
9396
catch
9497
{

src/NetTopologySuite.IO.Esri.Shapefile/Shapefiles/Writers/ShapefileWriterOptions.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,14 @@ public class ShapefileWriterOptions
2121
/// </summary>
2222
public List<DbfField> Fields { get; } = new List<DbfField>();
2323

24-
private Encoding _encoding = Encoding.UTF8;
24+
private Encoding _encoding = Dbf.Dbf.DefaultEncoding;
2525
/// <summary>
2626
/// DBF file encoding.
2727
/// </summary>
2828
public Encoding Encoding
2929
{
3030
get => _encoding;
31-
set => _encoding = value ?? Encoding.UTF8;
31+
set => _encoding = value ?? Dbf.Dbf.DefaultEncoding;
3232
}
3333

3434
/// <summary>
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
using NetTopologySuite.IO.Esri.Dbf.Fields;
2+
using NetTopologySuite.IO.Esri.Shapefiles.Writers;
3+
using NUnit.Framework;
4+
using System;
5+
using System.Collections.Generic;
6+
using System.IO;
7+
using System.Text;
8+
9+
namespace NetTopologySuite.IO.Esri.Test.Issues;
10+
11+
/// <summary>
12+
/// https://github.com/NetTopologySuite/NetTopologySuite.IO.Esri/issues/53
13+
/// </summary>
14+
internal class Issue053
15+
{
16+
[Test]
17+
public void Projection_Utf8_BOM()
18+
{
19+
var fields = new List<DbfField>();
20+
var fidField = fields.AddNumericInt32Field("fid");
21+
var options = new ShapefileWriterOptions(ShapeType.Polygon, fields.ToArray())
22+
{
23+
Projection = "GEOGCS[\"GCS_WGS_1984\",DATUM[\"D_WGS_1984\",SPHEROID[\"WGS_1984\",6378137.0,298.257223563]],PRIMEM[\"Greenwich\",0.0],UNIT[\"Degree\",0.0174532925199433]]"
24+
};
25+
26+
var shpPath = TestShapefiles.GetTempShpPath();
27+
using (var shpWriter = Shapefile.OpenWrite(shpPath, options))
28+
{
29+
shpWriter.Geometry = SampleGeometry.SampleMultiPolygon;
30+
fidField.NumericValue = 1;
31+
shpWriter.Write();
32+
}
33+
34+
var expectedProjectionString = options.Projection;
35+
var expectedProjectionBytes = options.Encoding.GetBytes(options.Projection);
36+
37+
var prjPath = Path.ChangeExtension(shpPath, ".prj");
38+
var storedProjectionString = File.ReadAllText(prjPath);
39+
var storedProjectionBytes = File.ReadAllBytes(prjPath);
40+
41+
TestShapefiles.DeleteShp(shpPath);
42+
43+
Assert.AreEqual(expectedProjectionString, storedProjectionString);
44+
Assert.AreEqual(expectedProjectionBytes, storedProjectionBytes);
45+
}
46+
47+
[Test]
48+
public static void Utf8_BOM_Default()
49+
{
50+
var encoding = Encoding.UTF8;
51+
var filePath = Path.GetTempFileName();
52+
var expectedString = "abc";
53+
var expectedBytes = encoding.GetBytes(expectedString);
54+
WriteFile(filePath, expectedString, encoding);
55+
56+
var storedString = File.ReadAllText(filePath, encoding);
57+
var storedBytes = File.ReadAllBytes(filePath);
58+
59+
Assert.AreEqual(expectedString, storedString); // C# is cleaver enough to ignore BOM when reading
60+
Assert.AreNotEqual(expectedBytes, storedBytes); // Not equal because of BOM stored by default
61+
}
62+
63+
[Test]
64+
public static void Utf8_BOM_Included()
65+
{
66+
var encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: true);
67+
var filePath = Path.GetTempFileName();
68+
var expectedString = "abc";
69+
var expectedBytes = encoding.GetBytes(expectedString);
70+
WriteFile(filePath, expectedString, encoding);
71+
72+
var storedString = File.ReadAllText(filePath, encoding);
73+
var storedBytes = File.ReadAllBytes(filePath);
74+
75+
Assert.AreEqual(expectedString, storedString); // C# is cleaver enough to ignore BOM when reading
76+
Assert.AreNotEqual(expectedBytes, storedBytes); // Not equal because of BOM stored explicitly
77+
}
78+
79+
[Test]
80+
public static void Utf8_BOM_Excluded()
81+
{
82+
var encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
83+
var filePath = Path.GetTempFileName();
84+
var expectedString = "abc";
85+
var expectedBytes = encoding.GetBytes(expectedString);
86+
WriteFile(filePath, expectedString, encoding);
87+
88+
var storedString = File.ReadAllText(filePath, encoding);
89+
var storedBytes = File.ReadAllBytes(filePath);
90+
91+
Assert.AreEqual(expectedString, storedString);
92+
Assert.AreEqual(expectedBytes, storedBytes);
93+
}
94+
95+
private static void WriteFile(string filePath, string content, Encoding encoding)
96+
{
97+
using (StreamWriter writer = new StreamWriter(filePath, false, encoding))
98+
{
99+
writer.Write(content);
100+
}
101+
}
102+
}

0 commit comments

Comments
 (0)