Skip to content

Commit

Permalink
Use merge intersection, multimap
Browse files Browse the repository at this point in the history
  • Loading branch information
SuperAuguste committed May 28, 2024
1 parent 2fe497a commit 63ac820
Show file tree
Hide file tree
Showing 6 changed files with 294 additions and 112 deletions.
4 changes: 2 additions & 2 deletions build.zig.zon
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
.hash = "1220102cb2c669d82184fb1dc5380193d37d68b54e8d75b76b2d155b9af7d7e2e76d",
},
.fastfilter = .{
.url = "https://github.com/hexops/fastfilter/archive/b504ba1e636fb4847a7978c10ea32c64d457aaeb.tar.gz",
.hash = "1220e672d4980c183c6b29cf9b1200c3db23a7987968b0de7e6357f49b6b174c3276",
.url = "https://github.com/hexops/fastfilter/archive/b6e46f6d4811da0d8a0f8675ab85407172a207ba.tar.gz",
.hash = "1220c2275142456c3eb8152f626092237a7795e93518896581a9358ce857b3ca550e",
},
},
.paths = .{""},
Expand Down
2 changes: 2 additions & 0 deletions src/DocumentScope.zig
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ const DocumentScope = @This();

scopes: std.MultiArrayList(Scope) = .{},
declarations: std.MultiArrayList(Declaration) = .{},
trigram_decls_mapping_capacity: u32 = 0,
declarations_that_should_be_trigram_indexed: std.ArrayListUnmanaged(Declaration.Index) = .{},
/// Used for looking up a child declaration in a given scope
declaration_lookup_map: DeclarationLookupMap = .{},
Expand Down Expand Up @@ -209,6 +210,7 @@ const ScopeContext = struct {
const declaration_index: Declaration.Index = @enumFromInt(doc_scope.declarations.len - 1);

if (pushed.should_trigrams_be_indexed and name.len >= 3) {
doc_scope.trigram_decls_mapping_capacity += @intCast(name.len - 2);
try doc_scope.declarations_that_should_be_trigram_indexed.append(allocator, declaration_index);
}

Expand Down
53 changes: 25 additions & 28 deletions src/Server.zig
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const inlay_hints = @import("features/inlay_hints.zig");
const code_actions = @import("features/code_actions.zig");
const folding_range = @import("features/folding_range.zig");
const document_symbol = @import("features/document_symbol.zig");
const workspace_symbols = @import("features/workspace_symbols.zig");
const completions = @import("features/completions.zig");
const goto = @import("features/goto.zig");
const hover_handler = @import("features/hover.zig");
Expand Down Expand Up @@ -1636,9 +1637,10 @@ fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: ty
}
} else |_| return null;

var workspace_symbols = std.ArrayListUnmanaged(types.WorkspaceSymbol){};
var candidates_decl_map = std.ArrayListUnmanaged(Analyser.Declaration.Index){};
var narrowing_decl_map = std.AutoArrayHashMapUnmanaged(Analyser.Declaration.Index, void){};
if (trigrams.items.len == 0) return null;

var symbols = std.ArrayListUnmanaged(types.WorkspaceSymbol){};
var candidate_decls_buffer = std.ArrayListUnmanaged(Analyser.Declaration.Index){};

doc_loop: for (server.document_store.trigram_stores.keys(), server.document_store.trigram_stores.values()) |uri, trigram_store| {
const handle = server.document_store.getOrLoadHandle(uri) orelse continue;
Expand All @@ -1651,40 +1653,35 @@ fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: ty
if (!trigram_store.filter.?.contain(@bitCast(trigram))) continue :doc_loop;
}

candidates_decl_map.clearRetainingCapacity();
narrowing_decl_map.clearRetainingCapacity();
candidate_decls_buffer.clearRetainingCapacity();

var first_pass = true;
for (trigrams.items) |trigram| {
if (!first_pass and candidates_decl_map.items.len == 0) break;
const first = trigram_store.getDeclarationsForTrigram(trigrams.items[0]) orelse continue;

var it = trigram_store.iterate(trigram);
while (it.next().unwrap()) |decl| {
if (first_pass) {
try candidates_decl_map.append(arena, decl);
} else {
try narrowing_decl_map.put(arena, decl, {});
}
}
try candidate_decls_buffer.resize(arena, first.len * 2);

if (!first_pass) {
var index: usize = 0;
while (index < candidates_decl_map.items.len) {
if (!narrowing_decl_map.contains(candidates_decl_map.items[index])) {
_ = candidates_decl_map.swapRemove(index);
} else index += 1;
}
}
var len = first.len;

@memcpy(candidate_decls_buffer.items[0..len], first);
@memcpy(candidate_decls_buffer.items[len..], first);

first_pass = false;
for (trigrams.items[1..]) |trigram| {
len = workspace_symbols.mergeIntersection(
trigram_store.getDeclarationsForTrigram(trigram) orelse continue :doc_loop,
candidate_decls_buffer.items[len..],
candidate_decls_buffer.items[0..len],
);
candidate_decls_buffer.items.len = len * 2;
@memcpy(candidate_decls_buffer.items[len..], candidate_decls_buffer.items[0..len]);
}

for (candidates_decl_map.items) |decl_idx| {
candidate_decls_buffer.items.len = len;

for (candidate_decls_buffer.items) |decl_idx| {
const decl = doc_scope.declarations.get(@intFromEnum(decl_idx));
const name_token = decl.nameToken(tree);

// TODO: integrate with document_symbol.zig for right kind info
try workspace_symbols.append(arena, .{
try symbols.append(arena, .{
.name = tree.tokenSlice(name_token),
.kind = .Variable,
.location = .{
Expand All @@ -1697,7 +1694,7 @@ fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: ty
}
}

return .{ .array_of_WorkspaceSymbol = workspace_symbols.items };
return .{ .array_of_WorkspaceSymbol = symbols.items };
}

const HandledRequestMethods = enum {
Expand Down
164 changes: 82 additions & 82 deletions src/TrigramStore.zig
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,86 @@ const analysis = @import("analysis.zig");
const fastfilter = @import("fastfilter");
const Declaration = analysis.Declaration;
const DocumentStore = @import("DocumentStore.zig");
const CompactingMultiList = @import("compacting_multi_list.zig").CompactingMultiList;

const TrigramStore = @This();

/// Fast lookup with false positives.
filter: ?fastfilter.BinaryFuse8 = null,
/// Index into `extra`.
/// Body:
/// prev: u32 or none = maxInt(u32)
/// decl: Declaration.Index
lookup: std.AutoArrayHashMapUnmanaged(Trigram, u32) = .{},
extra: std.ArrayListUnmanaged(u32) = .{},
filter: ?fastfilter.BinaryFuse8,
/// Map index is a slice in decls.
lookup: std.AutoArrayHashMapUnmanaged(Trigram, void),
decls: CompactingMultiList(Declaration.Index).Compacted,

pub const Builder = struct {
lookup: std.AutoArrayHashMapUnmanaged(Trigram, void),
decls: CompactingMultiList(Declaration.Index),

pub fn init(allocator: std.mem.Allocator, decls_capacity: u32) error{OutOfMemory}!Builder {
var builder = Builder{ .lookup = .{}, .decls = .{} };
try builder.decls.ensureTotalCapacity(allocator, decls_capacity);
return builder;
}

/// Appends declaration with `name`'s trigrams to store.
pub fn append(
builder: *Builder,
allocator: std.mem.Allocator,
name: []const u8,
declaration: Declaration.Index,
) error{ OutOfMemory, InvalidUtf8 }!void {
std.debug.assert(name.len >= 3);

// These will either be exact, or in the case of non-ASCII text
// be a slight overshoot.
try builder.lookup.ensureUnusedCapacity(allocator, name.len - 2);

const view = try std.unicode.Utf8View.init(name);

var iterator = view.iterator();
while (iterator.nextCodepoint()) |codepoint_0| {
const next_idx = iterator.i;
const codepoint_1 = iterator.nextCodepoint() orelse break;
const codepoint_2 = iterator.nextCodepoint() orelse break;

const gop = builder.lookup.getOrPutAssumeCapacity(.{
.codepoint_0 = codepoint_0,
.codepoint_1 = codepoint_1,
.codepoint_2 = codepoint_2,
});

if (!gop.found_existing) {
_ = try builder.decls.appendToNewListAssumeCapacity(allocator, declaration);
} else {
builder.decls.appendAssumeCapacity(@intCast(gop.index), declaration);
}

iterator.i = next_idx;
}
}

/// Must be called before any queries are executed.
pub fn finalize(builder: *Builder, allocator: std.mem.Allocator) error{OutOfMemory}!TrigramStore {
var filter = try fastfilter.BinaryFuse8.init(allocator, builder.lookup.count());
filter.populate(allocator, @ptrCast(builder.lookup.keys())) catch |err| switch (err) {
error.KeysLikelyNotUnique => {
// NOTE(SuperAuguste): Ignore this? It shouldn't happen ever
// and should, at worst, break lookups for one document, unless
// the filter state is all messed up (might crash at lookup time?).
// TODO: Look into this more.
},
else => |e| return e,
};

const store = TrigramStore{
.filter = filter,
.lookup = builder.lookup,
.decls = try builder.decls.compact(allocator),
};
builder.decls.deinit(allocator);

return store;
}
};

pub const Trigram = packed struct(u64) {
codepoint_0: u21,
Expand All @@ -27,27 +96,12 @@ pub const Trigram = packed struct(u64) {

pub fn init(allocator: std.mem.Allocator, handle: *DocumentStore.Handle) error{ InvalidUtf8, OutOfMemory }!TrigramStore {
const doc_scope = try handle.getDocumentScope();
var store = TrigramStore{};
var builder = try Builder.init(allocator, doc_scope.trigram_decls_mapping_capacity);
for (doc_scope.declarations_that_should_be_trigram_indexed.items) |decl_idx| {
const decl = doc_scope.declarations.get(@intFromEnum(decl_idx));
try store.append(allocator, handle.tree.tokenSlice(decl.nameToken(handle.tree)), decl_idx);
try builder.append(allocator, handle.tree.tokenSlice(decl.nameToken(handle.tree)), decl_idx);
}
try store.finalize(allocator);
return store;
}

/// Must be called before any queries are executed.
pub fn finalize(store: *TrigramStore, allocator: std.mem.Allocator) error{OutOfMemory}!void {
store.filter = try fastfilter.BinaryFuse8.init(allocator, store.lookup.count());
store.filter.?.populate(allocator, @ptrCast(store.lookup.keys())) catch |err| switch (err) {
error.KeysLikelyNotUnique => {
// NOTE(SuperAuguste): Ignore this? It shouldn't happen ever
// and should, at worst, break lookups for one document, unless
// the filter state is all messed up (might crash at lookup time?).
// TODO: Look into this more.
},
else => |e| return e,
};
return try builder.finalize(allocator);
}

pub fn reset(store: *TrigramStore, allocator: std.mem.Allocator) void {
Expand All @@ -62,64 +116,10 @@ pub fn reset(store: *TrigramStore, allocator: std.mem.Allocator) void {
pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void {
if (store.filter) |filter| filter.deinit(allocator);
store.lookup.deinit(allocator);
store.extra.deinit(allocator);
store.decls.deinit(allocator);
store.* = undefined;
}

/// Appends declaration with `name`'s trigrams to store.
pub fn append(
store: *TrigramStore,
allocator: std.mem.Allocator,
name: []const u8,
declaration: Declaration.Index,
) error{ OutOfMemory, InvalidUtf8 }!void {
std.debug.assert(name.len >= 3);

// These will either be exact, or in the case of non-ASCII text
// be a slight overshoot.
try store.lookup.ensureUnusedCapacity(allocator, name.len - 2);
try store.extra.ensureUnusedCapacity(allocator, (name.len - 2) * 2);

const view = try std.unicode.Utf8View.init(name);

var iterator = view.iterator();
while (iterator.nextCodepoint()) |codepoint_0| {
const next_idx = iterator.i;
const codepoint_1 = iterator.nextCodepoint() orelse break;
const codepoint_2 = iterator.nextCodepoint() orelse break;

const gop = store.lookup.getOrPutAssumeCapacity(.{
.codepoint_0 = codepoint_0,
.codepoint_1 = codepoint_1,
.codepoint_2 = codepoint_2,
});

const prev_or_none = if (gop.found_existing) gop.value_ptr.* else std.math.maxInt(u32);
const new_last = store.extra.items.len;
store.extra.appendSliceAssumeCapacity(&.{ prev_or_none, @intFromEnum(declaration) });

gop.value_ptr.* = @intCast(new_last);

iterator.i = next_idx;
}
}

pub const TrigramIterator = struct {
extra: []const u32,
extra_index: ?u32,

pub fn next(iterator: *TrigramIterator) Declaration.OptionalIndex {
if (iterator.extra_index == null) return .none;
const prev, const decl = iterator.extra[iterator.extra_index.?..][0..2].*;
iterator.extra_index = if (prev == std.math.maxInt(u32)) null else prev;
return @as(Declaration.Index, @enumFromInt(decl)).toOptional();
}
};

/// Iterates all declarations containing `trigram`.
pub fn iterate(store: TrigramStore, trigram: Trigram) TrigramIterator {
return .{
.extra = store.extra.items,
.extra_index = store.lookup.get(trigram),
};
pub fn getDeclarationsForTrigram(store: TrigramStore, trigram: Trigram) ?[]const Declaration.Index {
return store.decls.slice(@intCast(store.lookup.getIndex(trigram) orelse return null));
}
Loading

0 comments on commit 63ac820

Please sign in to comment.