Skip to content

Add a CreateSpanCollation method on SqliteConnection #36225

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions src/Microsoft.Data.Sqlite.Core/SqliteConnection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
using System.IO;
using System.Reflection;
using System.Runtime.InteropServices;
using System.Text;
using Microsoft.Data.Sqlite.Properties;
using SQLitePCL;
using static SQLitePCL.raw;
Expand All @@ -31,6 +32,7 @@ public partial class SqliteConnection : DbConnection
private readonly List<WeakReference<SqliteCommand>> _commands = [];

private Dictionary<string, (object? state, strdelegate_collation? collation)>? _collations;
private Dictionary<string, (object? state, delegate_collation? collation)>? _collationsSpan;

private Dictionary<(string name, int arity), (int flags, object? state, delegate_function_scalar? func)>? _functions;

Expand Down Expand Up @@ -278,6 +280,15 @@ public override void Open()
}
}

if (_collationsSpan != null)
{
foreach (var item in _collationsSpan)
{
rc = sqlite3__create_collation_utf8(Handle, item.Key, item.Value.state, item.Value.collation);
SqliteException.ThrowExceptionForRC(rc, Handle);
}
}

if (_functions != null)
{
foreach (var item in _functions)
Expand Down Expand Up @@ -372,6 +383,15 @@ internal void Deactivate()
}
}

if (_collationsSpan != null)
{
foreach (var item in _collationsSpan.Keys)
{
rc = sqlite3__create_collation_utf8(Handle, item, null, null);
SqliteException.ThrowExceptionForRC(rc, Handle);
}
}

if (_functions != null)
{
foreach (var (name, arity) in _functions.Keys)
Expand Down Expand Up @@ -493,6 +513,63 @@ public virtual void CreateCollation<T>(string name, T state, Func<T, string, str
_collations[name] = (state, collation);
}


/// <summary>
/// Create custom collation.
/// </summary>
/// <typeparam name="T">The type of the state object.</typeparam>
/// <param name="name">Name of the collation.</param>
/// <param name="state">State object passed to each invocation of the collation.</param>
/// <param name="comparison">Method that compares two char spans, using additional state.</param>
/// <seealso href="https://docs.microsoft.com/dotnet/standard/data/sqlite/collation">Collation</seealso>
public virtual void CreateSpanCollation<T>(string name, T state, SpanDelegateCollation<T>? comparison)
{
if (string.IsNullOrEmpty(name))
{
throw new ArgumentNullException(nameof(name));
}
#if NET5_0_OR_GREATER
delegate_collation? collation = comparison != null ? (v, s1, s2) =>
{
Span<char> s1Span = stackalloc char[Encoding.UTF8.GetCharCount(s1)];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks unsafe: stackalloc should never be used with an unbounded size. Unless I'm missing something, a huge string in the database could cause an arbitrary stack allocation and potentially cause a stack overflow, no?

Regardless, this copies/decodes the SQLite-provided ReadOnlySpan<byte> to ReadOnlySpan<char>; the point of this PR is to provide better performance, so that's not great. You should be able to simply reinterpret-cast using MemoryMarshal.Cast<byte, char>(), but need to check that the memory is aligned first. I'm assuming that SQLite-provided memory will generally be aligned (this needs to be checked); if that's the case, we can fall back to inefficient copying for unaligned memory (if that's a rare case).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're right about the size issue. I'm not sure if there's an existing pattern but it seems like we should avoid a stack allocation if the size is over some limit? Not sure what that would be though.

As for copying, that's required as a char is utf16 and a byte is only 8 bits, so we can't just reinterpret a byte as a char because they have different sizes.

That said there's an argument for this whole method to use ReadOnlySpan<byte> and not ReadOnlySpan<char>, or we could make both? I'm not sure, it's not very easy to handle utf8 strings in dotnet and if someone is going down to that level then they can just call sqlite3_create_collation themselves. Though it would be pretty easy for CreateCollation char to be implemented on top of the utf8 version.

Span<char> s2Span = stackalloc char[Encoding.UTF8.GetCharCount(s2)];
Encoding.UTF8.GetChars(s1, s1Span);
Encoding.UTF8.GetChars(s2, s2Span);
return comparison((T)v, s1Span, s2Span);
}
: null;
#else
delegate_collation? collation = comparison != null ? (v, s1, s2) =>
{
return comparison((T)v, Encoding.UTF8.GetChars(s1.ToArray()), Encoding.UTF8.GetChars(s2.ToArray()));
}
: null;
#endif
if (State == ConnectionState.Open)
{
var rc = sqlite3__create_collation_utf8(Handle, name, state, collation);
SqliteException.ThrowExceptionForRC(rc, Handle);
}

_collationsSpan ??= new Dictionary<string, (object?, delegate_collation?)>(StringComparer.OrdinalIgnoreCase);
_collationsSpan[name] = (state, collation);
}

/// <summary>
/// Represents a method signature for a custom collation delegate that compares two read-only spans of characters.
/// </summary>
/// <typeparam name="T">The type of the state object.</typeparam>
/// <param name="state">An optional user-defined state object to be passed to the comparison function.</param>
/// <param name="s1">The first read-only span of characters to compare.</param>
/// <param name="s2">The second read-only span of characters to compare.</param>
/// <returns>
/// A signed integer indicating the relative order of the strings being compared:
/// Less than zero if <paramref name="s1"/> precedes <paramref name="s2"/>;
/// Zero if <paramref name="s1"/> is equal to <paramref name="s2"/>;
/// Greater than zero if <paramref name="s1"/> follows <paramref name="s2"/>.
/// </returns>
public delegate int SpanDelegateCollation<in T>(T state, ReadOnlySpan<char> s1, ReadOnlySpan<char> s2);

/// <summary>
/// Begins a transaction on the connection.
/// </summary>
Expand Down
66 changes: 65 additions & 1 deletion test/Microsoft.Data.Sqlite.Tests/SqliteConnectionTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,70 @@ public void CreateCollation_works_with_state()
Assert.Equal("Invoked", item);
}

[Fact]
public void CreateCollationSpan_works()
{
using var connection = new SqliteConnection("Data Source=:memory:");
connection.Open();
connection.CreateSpanCollation<object?>("MY_NOCASE", null, (_, s1, s2) => s1.CompareTo(s2, StringComparison.OrdinalIgnoreCase));

Assert.Equal(1L, connection.ExecuteScalar<long>("SELECT 'Νικοσ' = 'ΝΙΚΟΣ' COLLATE MY_NOCASE;"));
}

[Fact]
public void CreateCollationSpan_with_null_comparer_works()
{
using var connection = new SqliteConnection("Data Source=:memory:");
connection.Open();
connection.CreateSpanCollation<object?>("MY_NOCASE", null, (_, s1, s2) => s1.CompareTo(s2, StringComparison.OrdinalIgnoreCase));
connection.CreateSpanCollation<object?>("MY_NOCASE", null, null);

var ex = Assert.Throws<SqliteException>(
() => connection.ExecuteScalar<long>("SELECT 'Νικοσ' = 'ΝΙΚΟΣ' COLLATE MY_NOCASE;"));

Assert.Equal(Resources.SqliteNativeError(SQLITE_ERROR, "no such collation sequence: MY_NOCASE"), ex.Message);
}

[Fact]
public void CreateCollationSpan_works_when_closed()
{
using var connection = new SqliteConnection("Data Source=:memory:");
connection.CreateSpanCollation<object?>("MY_NOCASE", null, (_, s1, s2) => s1.CompareTo(s2, StringComparison.OrdinalIgnoreCase));
connection.Open();

Assert.Equal(1L, connection.ExecuteScalar<long>("SELECT 'Νικοσ' = 'ΝΙΚΟΣ' COLLATE MY_NOCASE;"));
}

[Fact]
public void CreateCollationSpan_throws_with_empty_name()
{
using var connection = new SqliteConnection("Data Source=:memory:");
connection.Open();
var ex = Assert.Throws<ArgumentNullException>(() => connection.CreateSpanCollation<object?>(null!, null, null));

Assert.Equal("name", ex.ParamName);
}

[Fact]
public void CreateCollationSpan_works_with_state()
{
using var connection = new SqliteConnection("Data Source=:memory:");
connection.Open();
var list = new List<string>();
connection.CreateSpanCollation(
"MY_NOCASE",
list,
(l, s1, s2) =>
{
l.Add("Invoked");
return s1.CompareTo(s2, StringComparison.OrdinalIgnoreCase);
});

Assert.Equal(1L, connection.ExecuteScalar<long>("SELECT 'Νικοσ' = 'ΝΙΚΟΣ' COLLATE MY_NOCASE;"));
var item = Assert.Single(list);
Assert.Equal("Invoked", item);
}

[Fact]
public void CreateFunction_works_when_closed()
{
Expand Down Expand Up @@ -1321,7 +1385,7 @@ public void Open_releases_handle_when_constructor_fails()
}
else
{
// On Unix-like systems, we can still delete the file but cannot
// On Unix-like systems, we can still delete the file but cannot
// reliably detect handle leaks this way.
File.Delete(dbPath);
}
Expand Down