Skip to content

Commit

Permalink
Update the serialised object formatter to use a tokeniser instead of …
Browse files Browse the repository at this point in the history
…the string parsing method

Fixes #6 - comments inside key/value strings
  • Loading branch information
LogicAndTrick committed Nov 20, 2021
1 parent 72abe60 commit 319f6ec
Show file tree
Hide file tree
Showing 6 changed files with 250 additions and 86 deletions.
18 changes: 18 additions & 0 deletions Sledge.Formats.Tests/Valve/TestSerialisedObject.cs
Original file line number Diff line number Diff line change
Expand Up @@ -141,5 +141,23 @@ public void TestEscapedQuotes()
Assert.AreEqual("Key\"With\"Quotes", output[0].Properties[0].Key);
Assert.AreEqual("Quoted\"Value", output[0].Properties[0].Value);
}

[TestMethod]
public void TestCommentsInQuotes()
{
var fmt = new SerialisedObjectFormatter();
using var input = Streamify($@"Test
{{
{Q}Key{Q} {Q}http://example.com{Q}
}}
");
var output = fmt.Deserialize(input).ToList();
Assert.AreEqual(1, output.Count);
Assert.AreEqual("Test", output[0].Name);
Assert.AreEqual(0, output[0].Children.Count);
Assert.AreEqual(1, output[0].Properties.Count);
Assert.AreEqual("Key", output[0].Properties[0].Key);
Assert.AreEqual("http://example.com", output[0].Properties[0].Value);
}
}
}
4 changes: 2 additions & 2 deletions Sledge.Formats/Sledge.Formats.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
<RepositoryUrl>https://github.com/LogicAndTrick/sledge-formats</RepositoryUrl>
<RepositoryType>Git</RepositoryType>
<PackageTags>half-life quake valve liblist vdf</PackageTags>
<PackageReleaseNotes>Included XML documentation</PackageReleaseNotes>
<PackageReleaseNotes>Update the serialised object formatter to use a tokeniser instead of the string parsing method</PackageReleaseNotes>
<PackageLicenseFile></PackageLicenseFile>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<Version>1.0.2</Version>
<Version>1.0.3</Version>
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
Expand Down
137 changes: 53 additions & 84 deletions Sledge.Formats/Valve/SerialisedObjectFormatter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -108,95 +108,64 @@ private static void Print(SerialisedObject obj, TextWriter tw, int tabs = 0)
/// <returns>The parsed structure</returns>
public static IEnumerable<SerialisedObject> Parse(TextReader reader)
{
string line;
while ((line = CleanLine(reader.ReadLine())) != null)
SerialisedObject current = null;
var stack = new Stack<SerialisedObject>();

var tokens = ValveTokeniser.Tokenise(reader);
using (var it = tokens.GetEnumerator())
{
if (ValidStructStartString(line))
while (it.MoveNext())
{
yield return ParseStructure(reader, line);
var t = it.Current;
switch (t?.Type)
{
case ValveTokenType.Invalid:
throw new Exception($"Parsing error (line {t.Line}, column {t.Column}): {t.Value}");
case ValveTokenType.Open:
throw new Exception($"Parsing error (line {t.Line}, column {t.Column}): Structure must have a name");
case ValveTokenType.Close:
if (current == null) throw new Exception($"Parsing error (line {t.Line}, column {t.Column}): No structure to close");
if (stack.Count == 0)
{
yield return current;
current = null;
}
else
{
var prev = stack.Pop();
prev.Children.Add(current);
current = prev;
}
break;
case ValveTokenType.Name:
if (!it.MoveNext() || it.Current == null || it.Current.Type != ValveTokenType.Open) throw new Exception($"Parsing error (line {t.Line}, column {t.Column}): Expected structure open brace");
var next = new SerialisedObject(t.Value);
if (current == null)
{
current = next;
}
else
{
stack.Push(current);
current = next;
}
break;
case ValveTokenType.String:
if (current == null) throw new Exception($"Parsing error (line {t.Line}, column {t.Column}): No structure to add key/values to");
var key = t.Value;
if (!it.MoveNext() || it.Current == null || it.Current.Type != ValveTokenType.String) throw new Exception($"Parsing error (line {t.Line}, column {t.Column}): Expected string value to follow key");
var value = it.Current.Value;
current.Properties.Add(new KeyValuePair<string, string>(key, value));
break;
case ValveTokenType.End:
if (current != null) throw new Exception($"Parsing error (line {t.Line}, column {t.Column}): Unterminated structure at end of file");
yield break;
default:
throw new ArgumentOutOfRangeException();
}
}
}
}

/// <summary>
/// Remove comments and excess whitespace from a line
/// </summary>
/// <param name="line">The unclean line</param>
/// <returns>The cleaned line</returns>
private static string CleanLine(string line)
{
if (line == null) return null;
var ret = line;
if (ret.Contains("//")) ret = ret.Substring(0, ret.IndexOf("//", StringComparison.Ordinal)); // Comments
return ret.Trim();
}

/// <summary>
/// Parse a structure, given the name of the structure
/// </summary>
/// <param name="reader">The TextReader to read from</param>
/// <param name="name">The structure's name</param>
/// <returns>The parsed structure</returns>
private static SerialisedObject ParseStructure(TextReader reader, string name)
{
var spl = name.SplitWithQuotes();
var gs = new SerialisedObject(spl[0]);
string line;
if (spl.Length != 2 || spl[1] != "{")
{
do
{
line = CleanLine(reader.ReadLine());
} while (String.IsNullOrWhiteSpace(line));
if (line != "{")
{
return gs;
}
}
while ((line = CleanLine(reader.ReadLine())) != null)
{
if (line == "}") break;

if (ValidStructPropertyString(line)) ParseProperty(gs, line);
else if (ValidStructStartString(line)) gs.Children.Add(ParseStructure(reader, line));
}
return gs;
}

/// <summary>
/// Check if the given string is a valid structure name
/// </summary>
/// <param name="s">The string to test</param>
/// <returns>True if this is a valid structure name, false otherwise</returns>
private static bool ValidStructStartString(string s)
{
if (string.IsNullOrEmpty(s)) return false;
var split = s.SplitWithQuotes();
return split.Length == 1 || (split.Length == 2 && split[1] == "{");
}

/// <summary>
/// Check if the given string is a valid property string in the format: "key" "value"
/// </summary>
/// <param name="s">The string to test</param>
/// <returns>True if this is a valid property string, false otherwise</returns>
private static bool ValidStructPropertyString(string s)
{
if (string.IsNullOrEmpty(s)) return false;
var split = s.SplitWithQuotes();
return split.Length == 2;
}

/// <summary>
/// Parse a property string in the format: "key" "value", and add it to the structure
/// </summary>
/// <param name="gs">The structure to add the property to</param>
/// <param name="prop">The property string to parse</param>
private static void ParseProperty(SerialisedObject gs, string prop)
{
var split = prop.SplitWithQuotes();
gs.Properties.Add(new KeyValuePair<string, string>(split[0], (split[1] ?? "").Replace('`', '"')));
}
#endregion
}
}
16 changes: 16 additions & 0 deletions Sledge.Formats/Valve/ValveToken.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
namespace Sledge.Formats.Valve
{
internal class ValveToken
{
public ValveTokenType Type { get; }
public string Value { get; }
public int Line { get; set; }
public int Column { get; set; }

public ValveToken(ValveTokenType type, string value = null)
{
Type = type;
Value = value;
}
}
}
12 changes: 12 additions & 0 deletions Sledge.Formats/Valve/ValveTokenType.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
namespace Sledge.Formats.Valve
{
internal enum ValveTokenType
{
Invalid,
Open,
Close,
Name,
String,
End
}
}
149 changes: 149 additions & 0 deletions Sledge.Formats/Valve/ValveTokeniser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;

namespace Sledge.Formats.Valve
{
internal static class ValveTokeniser
{
public static IEnumerable<ValveToken> Tokenise(string text)
{
using (var reader = new StringReader(text))
{
foreach (var t in Tokenise(reader)) yield return t;
}
}

internal static IEnumerable<ValveToken> Tokenise(TextReader input)
{
var line = 1;
var col = 0;
int b;
while ((b = input.Read()) >= 0)
{
col++;

// Whitespace
if (b == ' ' || b == '\t' || b == '\r' || b == 0)
{
continue;
}

// Newline
if (b == '\n')
{
line++;
col = 0;
continue;
}

// Comment
if (b == '/')
{
// Need to check the next character
if (input.Read() == '/')
{
// It's a comment, skip everything until we hit a newline
var done = false;
while ((b = input.Read()) >= 0)
{
if (b == '\n')
{
line++;
col = 0;
done = true;
break;
}
}

if (done) continue;
break; // EOF
}

// It's not a comment, so it's invalid
yield return new ValveToken(ValveTokenType.Invalid, $"Unexpected token: {(char) b}") {Line = line, Column = col};
}

ValveToken t;
if (b == '"') t = TokenString(input);
else if (b == '{') t = new ValveToken(ValveTokenType.Open);
else if (b == '}') t = new ValveToken(ValveTokenType.Close);
else if (b >= 'a' && b <= 'z' || (b >= 'A' && b <= 'Z') || b == '_') t = TokenName(b, input);
else t = new ValveToken(ValveTokenType.Invalid, $"Unexpected token: {(char) b}");

t.Line = line;
t.Column = col;

yield return t;

if (t.Type == ValveTokenType.Invalid)
{
yield break;
}
}

yield return new ValveToken(ValveTokenType.End);
}

private static ValveToken TokenString(TextReader input)
{
var sb = new StringBuilder();
int b;
while ((b = input.Read()) >= 0)
{
// Newline in string (not allowed)
if (b == '\n')
{
return new ValveToken(ValveTokenType.Invalid, "String cannot contain a newline");
}
// End of string
else if (b == '"')
{
return new ValveToken(ValveTokenType.String, sb.ToString());
}
// Escaped character
else if (b == '\\')
{
// Read the next character
b = input.Read();
// EOF reached
if (b < 0) return new ValveToken(ValveTokenType.Invalid, "Unexpected end of file while reading string value");
// Some common escaped characters
else if (b == 'n') sb.Append('\n'); // newline
else if (b == 'r') sb.Append('\r'); // return
else if (b == 't') sb.Append('\t'); // tab
// Otherwise, just use whatever it is
sb.Append((char) b);
}
// Any other character
else
{
sb.Append((char) b);
}
}

return new ValveToken(ValveTokenType.Invalid, "Unexpected end of file while reading string value");
}

private static ValveToken TokenName(int first, TextReader input)
{
var name = ((char) first).ToString();
int b;
while ((b = input.Peek()) >= 0)
{
if ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9') || b == '_')
{
name += (char) b;
input.Read(); // advance the stream
}
else
{
break;
}
}

return new ValveToken(ValveTokenType.Name, name);
}
}
}

0 comments on commit 319f6ec

Please sign in to comment.