diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 01ddaba2..906eccd7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,6 +7,7 @@ on: - 'master' - 'dev' - 'release/*' + - 'hotfix/*' tags: - 'v*' pull_request: @@ -14,6 +15,7 @@ on: - 'master' - 'dev' - 'release/*' + - 'hotfix/*' jobs: build: @@ -40,10 +42,9 @@ jobs: - name: Setup dotnet uses: actions/setup-dotnet@v3 with: - dotnet-version: | - 5.0.x + dotnet-version: | 6.0.x - 7.0.x + 8.0.x - name: Install GitVersion uses: gittools/actions/gitversion/setup@v0.9.9 @@ -60,10 +61,10 @@ jobs: run: dotnet build ${{ env.Solution_File }} --configuration ${{ env.Configuration }} --no-restore - name: Test - run: dotnet test "${{ env.Test_Proj }}" --no-build --verbosity normal --results-directory ${{ github.workspace }}/_TestResults --logger "trx;logfilename=tests.trx" + run: dotnet test "${{ env.Test_Proj }}" --no-build --verbosity normal --results-directory ${{ github.workspace }}/_TestResults --logger "trx;logfilename=tests.trx" --filter "TestCategory!~Benchmarks" - name: Upload test results - uses: actions/upload-artifact@v2 # upload test results + uses: actions/upload-artifact@v4 # upload test results if: success() || failure() # run this step even if previous step failed with: name: examine-test-results @@ -85,7 +86,7 @@ jobs: --output=${{ github.workspace }}/_NugetOutput - name: Upload artifacts - uses: actions/upload-artifact@v2 # upload nuget + uses: actions/upload-artifact@v4 # upload nuget if: success() with: name: examine-nuget-${{ env.GitVersion_SemVer }} diff --git a/.github/workflows/docfx-gh-pages.yml b/.github/workflows/docfx-gh-pages.yml index 9ec09b6d..e6adb7e4 100644 --- a/.github/workflows/docfx-gh-pages.yml +++ b/.github/workflows/docfx-gh-pages.yml @@ -4,6 +4,9 @@ on: # Runs on pushes targeting the default branch push: branches: ["feature/docfx"] + pull_request: + branches: + - 'feature/docfx' # Allows you to run this workflow manually from the Actions tab workflow_dispatch: diff --git a/.github/workflows/test-report.yml b/.github/workflows/test-report.yml index a6efea56..708b7260 100644 --- a/.github/workflows/test-report.yml +++ b/.github/workflows/test-report.yml @@ -8,7 +8,7 @@ jobs: report: runs-on: ubuntu-latest steps: - - uses: dorny/test-reporter@v1.4.2 + - uses: dorny/test-reporter@v1 with: artifact: examine-test-results # artifact name name: Publish Tests # Name of the check run which will be created diff --git a/docs/articles/indexing.md b/docs/articles/indexing.md index 7ad7ca84..bf7b539d 100644 --- a/docs/articles/indexing.md +++ b/docs/articles/indexing.md @@ -115,9 +115,10 @@ myIndex.IndexItem(new ValueSet( Be default all indexing is done asynchronously. If you need to run indexing synchronously you should create a synchronous scope. This is for instance a necessary step for unit tests. ```cs -using (myIndex.ProcessNonAsync()) +using (var luceneIndex = (LuceneIndex)myIndex) +using (var syncIndexContext = luceneIndex.WithThreadingMode(IndexThreadingMode.Synchronous)) { - myIndex.IndexItem(new ValueSet( + luceneIndex.IndexItem(new ValueSet( "SKU987", "Product", new Dictionary() @@ -251,4 +252,4 @@ private void IndexCommited(object sender, EventArgs e) { // Triggered when the index is commited } -``` \ No newline at end of file +``` diff --git a/docs/articles/searching.md b/docs/articles/searching.md index 6b2fd025..4a454ee9 100644 --- a/docs/articles/searching.md +++ b/docs/articles/searching.md @@ -132,7 +132,7 @@ query.Field("nodeTypeAlias", "CWS_Home".Boost(20)); var results = query.Execute(); ``` -This will boost the term `CWS_Home` and make enteries with `nodeTypeAlias:CWS_Home` score higher in the results. +This will boost the term `CWS_Home` and make entries with `nodeTypeAlias:CWS_Home` score higher in the results. ## Proximity diff --git a/docs/docs-v1-v2/searching.md b/docs/docs-v1-v2/searching.md index 2905d780..6a294158 100644 --- a/docs/docs-v1-v2/searching.md +++ b/docs/docs-v1-v2/searching.md @@ -124,7 +124,7 @@ query.Field("nodeTypeAlias", "CWS_Home".Boost(20)); var results = query.Execute(); ``` -This will boost the term `CWS_Home` and make enteries with `nodeTypeAlias:CWS_Home` score higher in the results. +This will boost the term `CWS_Home` and make entries with `nodeTypeAlias:CWS_Home` score higher in the results. ## Proximity @@ -248,4 +248,4 @@ var query = searcher.CreateQuery(); var query = (LuceneSearchQuery)query.NativeQuery("hello:world").And(); // Make query ready for extending query.LuceneQuery(NumericRangeQuery.NewInt64Range("numTest", 4, 5, true, true)); // Add the raw lucene query var results = query.Execute(); -``` \ No newline at end of file +``` diff --git a/src/Directory.Build.props b/src/Directory.Build.props index ade8d226..13fefb76 100644 --- a/src/Directory.Build.props +++ b/src/Directory.Build.props @@ -8,9 +8,12 @@ snupkg - + + + true + https://github.com/Shazwazza/Examine https://github.com/Shazwazza/Examine git @@ -22,7 +25,8 @@ 2.0.0 - net6.0;netstandard2.1;netstandard2.0 + net6.0;net8.0; + latest diff --git a/src/Examine.Benchmarks/ConcurrentAcquireBenchmarks.cs b/src/Examine.Benchmarks/ConcurrentAcquireBenchmarks.cs new file mode 100644 index 00000000..8d95df1d --- /dev/null +++ b/src/Examine.Benchmarks/ConcurrentAcquireBenchmarks.cs @@ -0,0 +1,147 @@ +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Jobs; +using Lucene.Net.Analysis; +using Lucene.Net.Analysis.Standard; +using Lucene.Net.Index; +using Lucene.Net.Search; +using Lucene.Net.Store; +using Lucene.Net.Util; +using Microsoft.Extensions.Logging; + +namespace Examine.Benchmarks +{ + [MediumRunJob(RuntimeMoniker.Net80)] + [ThreadingDiagnoser] + [MemoryDiagnoser] + public class ConcurrentAcquireBenchmarks : ExamineBaseTest + { + private readonly StandardAnalyzer _analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + private string? _tempBasePath; + private FSDirectory? _indexDir; + private IndexWriter? _writer; + private SearcherManager? _searcherManager; + + [GlobalSetup] + public override void Setup() + { + base.Setup(); + + _tempBasePath = Path.Combine(Path.GetTempPath(), "ExamineTests"); + + // indexer for lucene + var tempIndexer = InitializeAndIndexItems(_tempBasePath, _analyzer, out var indexDir); + tempIndexer.Dispose(); + _indexDir = FSDirectory.Open(indexDir); + var writerConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, _analyzer); + //writerConfig.SetMaxBufferedDocs(1000); + //writerConfig.SetReaderTermsIndexDivisor(4); + //writerConfig.SetOpenMode(OpenMode.APPEND); + //writerConfig.SetReaderPooling(true); + //writerConfig.SetCodec(new Lucene46Codec()); + _writer = new IndexWriter(_indexDir, writerConfig); + var trackingWriter = new TrackingIndexWriter(_writer); + _searcherManager = new SearcherManager(trackingWriter.IndexWriter, applyAllDeletes: true, new SearcherFactory()); + } + + [GlobalCleanup] + public override void TearDown() + { + _searcherManager.Dispose(); + _writer.Dispose(); + _indexDir.Dispose(); + + base.TearDown(); + + System.IO.Directory.Delete(_tempBasePath, true); + } + + [Params(1, 15, 30, 100)] + public int ThreadCount { get; set; } + + [Benchmark(Baseline = true)] + public async Task SimpleMultiThreadLoop() + { + var tasks = new List(); + + for (var i = 0; i < ThreadCount; i++) + { + tasks.Add(new Task(() => + { + var i = 0; + })); + } + + foreach (var task in tasks) + { + task.Start(); + } + + await Task.WhenAll(tasks); + } + + [Benchmark] + public async Task TestAcquireThreadContention() + { + var tasks = new List(); + + for (var i = 0; i < ThreadCount; i++) + { + tasks.Add(new Task(() => + { + var searcher = _searcherManager.Acquire(); + try + { + if (searcher.IndexReader.RefCount > (ThreadCount + 1)) + { + Console.WriteLine(searcher.IndexReader.RefCount); + } + } + finally + { + _searcherManager.Release(searcher); + } + })); + } + + foreach (var task in tasks) + { + task.Start(); + } + + await Task.WhenAll(tasks); + } + +#if RELEASE + protected override ILoggerFactory CreateLoggerFactory() + => Microsoft.Extensions.Logging.LoggerFactory.Create(builder => builder.AddConsole().SetMinimumLevel(LogLevel.Information)); +#endif + private TestIndex InitializeAndIndexItems( + string tempBasePath, + Analyzer analyzer, + out DirectoryInfo indexDir) + { + var tempPath = Path.Combine(tempBasePath, Guid.NewGuid().ToString()); + System.IO.Directory.CreateDirectory(tempPath); + indexDir = new DirectoryInfo(tempPath); + var luceneDirectory = FSDirectory.Open(indexDir); + var indexer = GetTestIndex(luceneDirectory, analyzer); + + var random = new Random(); + var valueSets = new List(); + + for (var i = 0; i < 1000; i++) + { + valueSets.Add(ValueSet.FromObject(Guid.NewGuid().ToString(), "content", + new + { + nodeName = "location " + i, + bodyText = Enumerable.Range(0, random.Next(10, 100)).Select(x => Guid.NewGuid().ToString()) + })); + } + + indexer.IndexItems(valueSets); + + return indexer; + } + } +} diff --git a/src/Examine.Benchmarks/ConcurrentSearchBenchmarks.cs b/src/Examine.Benchmarks/ConcurrentSearchBenchmarks.cs new file mode 100644 index 00000000..ad2d82c5 --- /dev/null +++ b/src/Examine.Benchmarks/ConcurrentSearchBenchmarks.cs @@ -0,0 +1,554 @@ +#if LocalBuild +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Jobs; +using Examine.Lucene.Search; +using Examine.Search; +using Lucene.Net.Analysis; +using Lucene.Net.Analysis.Standard; +using Lucene.Net.Index; +using Lucene.Net.QueryParsers.Classic; +using Lucene.Net.Search; +using Lucene.Net.Store; +using Lucene.Net.Util; +using Microsoft.Extensions.Logging; + +//[assembly: Config(typeof(MyDefaultConfig))] + +//internal class MyDefaultConfig : ManualConfig +//{ +// public MyDefaultConfig() +// { +// WithOptions(ConfigOptions.DisableOptimizationsValidator); +// } +//} + +namespace Examine.Benchmarks +{ + /* + + Original + + | Method | ThreadCount | Mean | Error | StdDev | Completed Work Items | Lock Contentions | Gen0 | Gen1 | Gen2 | Allocated | + |---------------- |------------ |-----------:|------------:|-----------:|---------------------:|-----------------:|----------:|----------:|----------:|----------:| + |---------------- |------------ |-----------:|------------:|-----------:|---------------------:|-----------------:|----------:|----------:|----------:|----------:| + | ExamineStandard | 1 | 8.712 ms | 0.6798 ms | 0.0373 ms | 1.0000 | - | 234.3750 | 140.6250 | - | 2.86 MB | + | LuceneSimple | 1 | 9.723 ms | 0.4864 ms | 0.0267 ms | 1.0000 | 0.0469 | 250.0000 | 234.3750 | - | 3.01 MB | + | ExamineStandard | 5 | 154.451 ms | 39.5553 ms | 2.1682 ms | 5.0000 | - | 1000.0000 | 750.0000 | - | 14.3 MB | + | LuceneSimple | 5 | 16.953 ms | 6.1768 ms | 0.3386 ms | 5.0000 | - | 1250.0000 | 1000.0000 | 93.7500 | 15.06 MB | + | ExamineStandard | 15 | 657.503 ms | 195.5415 ms | 10.7183 ms | 15.0000 | - | 3000.0000 | 1000.0000 | - | 42.92 MB | + | LuceneSimple | 15 | 60.278 ms | 100.6474 ms | 5.5168 ms | 15.0000 | - | 4333.3333 | 2666.6667 | 1000.0000 | 45.2 MB | + + Without NRT (no diff really) + + | Method | ThreadCount | Mean | Error | StdDev | Gen0 | Completed Work Items | Lock Contentions | Gen1 | Allocated | + |---------------- |------------ |----------:|-----------:|----------:|----------:|---------------------:|-----------------:|----------:|----------:| + | ExamineStandard | 1 | 12.48 ms | 3.218 ms | 0.176 ms | 250.0000 | 1.0000 | 0.0938 | 156.2500 | 3.13 MB | + | ExamineStandard | 5 | 149.31 ms | 88.914 ms | 4.874 ms | 1000.0000 | 5.0000 | 4.0000 | 750.0000 | 14.7 MB | + | ExamineStandard | 15 | 613.14 ms | 897.936 ms | 49.219 ms | 3000.0000 | 15.0000 | 14.0000 | 1000.0000 | 43.67 MB | + + Without querying MaxDoc (Shows we were double/triple querying) + + | Method | ThreadCount | Mean | Error | StdDev | Gen0 | Completed Work Items | Lock Contentions | Gen1 | Allocated | + |---------------- |------------ |-----------:|----------:|----------:|---------:|---------------------:|-----------------:|---------:|------------:| + | ExamineStandard | 1 | 5.223 ms | 1.452 ms | 0.0796 ms | 78.1250 | 1.0000 | 0.0313 | 7.8125 | 962 KB | + | ExamineStandard | 5 | 26.772 ms | 9.982 ms | 0.5471 ms | 312.5000 | 5.0000 | 4.0000 | 187.5000 | 3825.35 KB | + | ExamineStandard | 15 | 101.483 ms | 65.690 ms | 3.6007 ms | 800.0000 | 15.0000 | 14.0000 | 400.0000 | 10989.05 KB | + + Without apply deletes (should be faster, we'll keep it) + UPDATE: We cannot, that is specialized and we cannot support it. + + | Method | ThreadCount | Mean | Error | StdDev | Gen0 | Completed Work Items | Lock Contentions | Gen1 | Allocated | + |---------------- |------------ |-----------:|----------:|----------:|---------:|---------------------:|-----------------:|---------:|------------:| + | ExamineStandard | 1 | 5.554 ms | 1.745 ms | 0.0957 ms | 78.1250 | 1.0000 | - | 31.2500 | 961.73 KB | + | ExamineStandard | 5 | 26.960 ms | 4.797 ms | 0.2629 ms | 312.5000 | 5.0000 | 4.0313 | 187.5000 | 3826.6 KB | + | ExamineStandard | 15 | 103.939 ms | 49.361 ms | 2.7057 ms | 800.0000 | 15.0000 | 14.0000 | 400.0000 | 10991.87 KB | + + Using struct (doesn't change anything) + + | Method | ThreadCount | Mean | Error | StdDev | Gen0 | Completed Work Items | Lock Contentions | Gen1 | Allocated | + |---------------- |------------ |-----------:|----------:|----------:|---------:|---------------------:|-----------------:|---------:|------------:| + | ExamineStandard | 1 | 5.661 ms | 2.477 ms | 0.1357 ms | 78.1250 | 1.0000 | 0.0625 | 31.2500 | 961.56 KB | + | ExamineStandard | 5 | 28.364 ms | 3.615 ms | 0.1981 ms | 312.5000 | 5.0000 | 4.0000 | 187.5000 | 3825.91 KB | + | ExamineStandard | 15 | 100.561 ms | 26.820 ms | 1.4701 ms | 800.0000 | 15.0000 | 14.0000 | 400.0000 | 10986.15 KB | + + With Latest changes (don't re-create SearchContext, cache fields if nothing changes, etc...): + + | Method | ThreadCount | Mean | Error | StdDev | Completed Work Items | Lock Contentions | Gen0 | Gen1 | Allocated | + |---------------- |------------ |-----------:|------------:|-----------:|---------------------:|-----------------:|----------:|----------:|------------:| + | ExamineStandard | 1 | 5.157 ms | 1.0374 ms | 0.0569 ms | 1.0000 | 0.0156 | 78.1250 | 39.0625 | 963.3 KB | + | LuceneSimple | 1 | 11.338 ms | 0.8416 ms | 0.0461 ms | 1.0000 | 0.0156 | 265.6250 | 187.5000 | 3269.09 KB | + | ExamineStandard | 5 | 27.038 ms | 7.2847 ms | 0.3993 ms | 5.0000 | 4.0000 | 312.5000 | 187.5000 | 3812.7 KB | + | LuceneSimple | 5 | 144.196 ms | 185.2203 ms | 10.1526 ms | 5.0000 | - | 1000.0000 | 750.0000 | 15047.06 KB | + | ExamineStandard | 15 | 95.799 ms | 64.1371 ms | 3.5156 ms | 15.0000 | 14.0000 | 833.3333 | 500.0000 | 10940.31 KB | + | LuceneSimple | 15 | 566.652 ms | 275.2278 ms | 15.0862 ms | 15.0000 | - | 3000.0000 | 1000.0000 | 44485.6 KB | + + Determining the best NRT values + + | Method | ThreadCount | NrtTargetMaxStaleSec | NrtTargetMinStaleSec | Mean | Error | StdDev | Gen0 | Completed Work Items | Lock Contentions | Gen1 | Allocated | + |---------------- |------------ |--------------------- |--------------------- |-----------:|------------:|----------:|---------:|---------------------:|-----------------:|---------:|------------:| + | ExamineStandard | 1 | 5 | 1 | 5.507 ms | 1.7993 ms | 0.0986 ms | 78.1250 | 1.0000 | - | 31.2500 | 963.59 KB | + | ExamineStandard | 1 | 5 | 5 | 5.190 ms | 0.4792 ms | 0.0263 ms | 78.1250 | 1.0000 | 0.0078 | 39.0625 | 963.65 KB | + | ExamineStandard | 1 | 60 | 1 | 5.406 ms | 2.2636 ms | 0.1241 ms | 78.1250 | 1.0000 | 0.0313 | 31.2500 | 963.71 KB | + | ExamineStandard | 1 | 60 | 5 | 5.316 ms | 3.4301 ms | 0.1880 ms | 78.1250 | 1.0000 | - | 39.0625 | 963.42 KB | + | ExamineStandard | 5 | 5 | 1 | 26.439 ms | 1.2601 ms | 0.0691 ms | 312.5000 | 5.0000 | 4.0000 | 187.5000 | 3813.45 KB | + | ExamineStandard | 5 | 5 | 5 | 27.341 ms | 13.3950 ms | 0.7342 ms | 312.5000 | 5.0000 | 4.0313 | 187.5000 | 3813.83 KB | + | ExamineStandard | 5 | 60 | 1 | 26.768 ms | 9.4732 ms | 0.5193 ms | 312.5000 | 5.0000 | 4.0000 | 156.2500 | 3814.06 KB | + | ExamineStandard | 5 | 60 | 5 | 27.216 ms | 3.3213 ms | 0.1821 ms | 312.5000 | 5.0000 | 4.0000 | 187.5000 | 3813.83 KB | + | ExamineStandard | 15 | 5 | 1 | 101.040 ms | 44.3254 ms | 2.4296 ms | 800.0000 | 15.0000 | 14.0000 | 600.0000 | 10940.73 KB | + | ExamineStandard | 15 | 5 | 5 | 104.027 ms | 44.7547 ms | 2.4532 ms | 800.0000 | 15.0000 | 14.0000 | 400.0000 | 10939.87 KB | + | ExamineStandard | 15 | 60 | 1 | 96.622 ms | 162.1682 ms | 8.8890 ms | 800.0000 | 15.0000 | 14.0000 | 400.0000 | 10941.64 KB | + | ExamineStandard | 15 | 60 | 5 | 102.469 ms | 78.0316 ms | 4.2772 ms | 800.0000 | 15.0000 | 14.0000 | 400.0000 | 10936.86 KB | + + Putting MaxDoc back in makes it go crazy + + | Method | ThreadCount | Mean | Error | StdDev | Gen0 | Completed Work Items | Lock Contentions | Gen1 | Allocated | + |---------------- |------------ |----------:|-----------:|----------:|----------:|---------------------:|-----------------:|----------:|----------:| + | ExamineStandard | 1 | 12.90 ms | 4.049 ms | 0.222 ms | 250.0000 | 1.0000 | - | 156.2500 | 3.13 MB | + | ExamineStandard | 5 | 149.16 ms | 74.884 ms | 4.105 ms | 1000.0000 | 5.0000 | 4.0000 | 750.0000 | 14.69 MB | + | ExamineStandard | 15 | 635.77 ms | 899.620 ms | 49.311 ms | 3000.0000 | 15.0000 | 14.0000 | 1000.0000 | 43.57 MB | + + Using different MaxResults leads to crazy results + + | Method | ThreadCount | MaxResults | Mean | Error | StdDev | Completed Work Items | Lock Contentions | Gen0 | Gen1 | Gen2 | Allocated | + |---------------- |------------ |----------- |-------------:|--------------:|------------:|---------------------:|-----------------:|----------:|----------:|----------:|----------:| + | ExamineStandard | 15 | 10 | 4.979 ms | 1.6928 ms | 0.0928 ms | 15.0000 | 14.0000 | 257.8125 | 109.3750 | - | 3 MB | + | LuceneSimple | 15 | 10 | 4.168 ms | 0.6606 ms | 0.0362 ms | 15.0000 | 0.0234 | 218.7500 | 93.7500 | - | 2.57 MB | + | ExamineStandard | 15 | 100 | 92.838 ms | 88.3517 ms | 4.8429 ms | 15.0000 | 14.0000 | 833.3333 | 666.6667 | - | 10.68 MB | + | LuceneSimple | 15 | 100 | 103.927 ms | 64.1171 ms | 3.5145 ms | 15.0000 | - | 800.0000 | 600.0000 | - | 10.33 MB | + | ExamineStandard | 15 | 1000 | 1,278.769 ms | 826.1505 ms | 45.2841 ms | 15.0000 | 14.0000 | 7000.0000 | 4000.0000 | 1000.0000 | 84.55 MB | + | LuceneSimple | 15 | 1000 | 1,248.199 ms | 1,921.5844 ms | 105.3285 ms | 15.0000 | - | 7000.0000 | 4000.0000 | 1000.0000 | 84.08 MB | + + After changing to use singleton indexers/managers + + | Method | ThreadCount | MaxResults | Mean | Error | StdDev | Completed Work Items | Lock Contentions | Gen0 | Gen1 | Gen2 | Allocated | + |---------------- |------------ |----------- |---------------:|--------------:|-------------:|---------------------:|-----------------:|-----------:|-----------:|----------:|-------------:| + | ExamineStandard | 1 | 10 | 101.9 μs | 9.70 μs | 0.53 μs | 1.0000 | 0.0029 | 12.6953 | 0.9766 | - | 157.77 KB | + | LuceneSimple | 1 | 10 | 120.7 us | 9.33 us | 0.51 us | 1.0000 | 0.0022 | 11.4746 | 1.2207 | - | 141.66 KB | + | ExamineStandard | 1 | 100 | 1,555.0 us | 407.07 us | 22.31 us | 1.0000 | 0.0078 | 54.6875 | 15.6250 | - | 681.92 KB | + | LuceneSimple | 1 | 100 | 1,598.8 μs | 233.79 μs | 12.81 μs | 1.0000 | 0.0078 | 52.7344 | 17.5781 | - | 664.64 KB | + | ExamineStandard | 1 | 1000 | 17,449.3 μs | 1,472.32 μs | 80.70 μs | 1.0000 | - | 437.5000 | 312.5000 | 31.2500 | 5723.12 KB | + | LuceneSimple | 1 | 1000 | 17,739.7 μs | 3,797.03 μs | 208.13 μs | 1.0000 | 0.0313 | 437.5000 | 312.5000 | 31.2500 | 5698.42 KB | + | ExamineStandard | 15 | 10 | 1,630.6 μs | 2,436.46 μs | 133.55 μs | 15.0000 | 0.0430 | 195.3125 | 15.6250 | - | 2362.51 KB | + | LuceneSimple | 15 | 10 | 1,742.6 μs | 214.81 μs | 11.77 μs | 15.0000 | 0.0820 | 179.6875 | 27.3438 | - | 2118.47 KB | + | ExamineStandard | 15 | 100 | 105,817.2 μs | 28,398.55 μs | 1,556.62 μs | 15.0000 | - | 833.3333 | 666.6667 | - | 10225.39 KB | + | LuceneSimple | 15 | 100 | 95,732.1 μs | 57,903.39 μs | 3,173.88 μs | 15.0000 | - | 666.6667 | 500.0000 | - | 9967.2 KB | + | ExamineStandard | 15 | 1000 | 1,125,955.0 μs | 822,782.38 μs | 45,099.48 μs | 15.0000 | - | 7000.0000 | 4000.0000 | 1000.0000 | 85877.8 KB | + | LuceneSimple | 15 | 1000 | 1,446,507.5 μs | 855,107.53 μs | 46,871.33 μs | 15.0000 | - | 7000.0000 | 4000.0000 | 1000.0000 | 85509.77 KB | + | ExamineStandard | 30 | 10 | 4,261.3 μs | 1,676.61 μs | 91.90 μs | 30.0000 | 0.3047 | 390.6250 | 70.3125 | - | 4724.59 KB | + | LuceneSimple | 30 | 10 | 3,895.8 μs | 1,768.88 μs | 96.96 μs | 30.0000 | 0.1250 | 359.3750 | 46.8750 | - | 4237.24 KB | + | ExamineStandard | 30 | 100 | 232,909.0 μs | 30,215.14 μs | 1,656.19 μs | 30.0000 | - | 1500.0000 | 1000.0000 | - | 20455.26 KB | + | LuceneSimple | 30 | 100 | 259,557.3 μs | 40,643.51 μs | 2,227.81 μs | 30.0000 | - | 1500.0000 | 1000.0000 | - | 19940.39 KB | + | ExamineStandard | 30 | 1000 | 2,886,589.2 μs | 328,362.02 μs | 17,998.63 μs | 30.0000 | 1.0000 | 16000.0000 | 11000.0000 | 3000.0000 | 171858.03 KB | + | LuceneSimple | 30 | 1000 | 2,662,715.9 μs | 898,686.63 μs | 49,260.05 μs | 30.0000 | - | 16000.0000 | 11000.0000 | 3000.0000 | 171094.02 KB | + + + */ + [LongRunJob(RuntimeMoniker.Net80)] + [ThreadingDiagnoser] + [MemoryDiagnoser] + //[DotNetCountersDiagnoser] + //[CPUUsageDiagnoser] + public class ConcurrentSearchBenchmarks : ExamineBaseTest + { + private readonly StandardAnalyzer _analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + private ILogger? _logger; + private string? _tempBasePath; + private TestIndex? _indexer; + private FSDirectory? _indexDir; + private IndexWriter? _writer; + private SearcherManager? _searcherManager; + + [GlobalSetup] + public override void Setup() + { + base.Setup(); + + _logger = LoggerFactory.CreateLogger(); + _tempBasePath = Path.Combine(Path.GetTempPath(), "ExamineTests"); + + // indexer for examine + _indexer = InitializeAndIndexItems(_tempBasePath, _analyzer, out _); + + // indexer for lucene + var tempIndexer = InitializeAndIndexItems(_tempBasePath, _analyzer, out var indexDir); + tempIndexer.Dispose(); + _indexDir = FSDirectory.Open(indexDir); + var writerConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, _analyzer); + //writerConfig.SetMaxBufferedDocs(1000); + //writerConfig.SetReaderTermsIndexDivisor(4); + //writerConfig.SetOpenMode(OpenMode.APPEND); + //writerConfig.SetReaderPooling(true); + //writerConfig.SetCodec(new Lucene46Codec()); + _writer = new IndexWriter(_indexDir, writerConfig); + var trackingWriter = new TrackingIndexWriter(_writer); + _searcherManager = new SearcherManager(trackingWriter.IndexWriter, applyAllDeletes: true, new SearcherFactory()); + } + + [GlobalCleanup] + public override void TearDown() + { + _indexer.Dispose(); + _searcherManager.Dispose(); + _writer.Dispose(); + _indexDir.Dispose(); + + base.TearDown(); + + System.IO.Directory.Delete(_tempBasePath, true); + } + + [Params(1, 50, 100)] + public int ThreadCount { get; set; } + + [Params(10/*, 100, 1000*/)] + public int MaxResults { get; set; } + + [Benchmark(Baseline = true)] + public async Task ExamineStandard() + { + var tasks = new List(); + + for (var i = 0; i < ThreadCount; i++) + { + tasks.Add(new Task(() => + { + // always resolve the searcher from the indexer + var searcher = _indexer.Searcher; + + var query = searcher.CreateQuery("content").Field("nodeName", "location".MultipleCharacterWildcard()); + var results = query.Execute(QueryOptions.SkipTake(0, MaxResults)); + + // enumerate (forces the result to execute) + var logOutput = "ThreadID: " + Thread.CurrentThread.ManagedThreadId + ", Results: " + string.Join(',', results.Select(x => $"{x.Id}-{x.Values.Count}-{x.Score}").ToArray()); + _logger.LogDebug(logOutput); + })); + } + + foreach (var task in tasks) + { + task.Start(); + } + + await Task.WhenAll(tasks); + } + + [Benchmark] + public async Task SimpleMultiThreadLoop() + { + var tasks = new List(); + + for (var i = 0; i < ThreadCount; i++) + { + tasks.Add(new Task(() => + { + })); + } + + foreach (var task in tasks) + { + task.Start(); + } + + await Task.WhenAll(tasks); + } + + public async Task TestAcquireThreadContention() + { + var tasks = new List(); + + for (var i = 0; i < ThreadCount; i++) + { + tasks.Add(new Task(() => + { + var parser = new QueryParser(LuceneVersion.LUCENE_48, ExamineFieldNames.ItemIdFieldName, new StandardAnalyzer(LuceneVersion.LUCENE_48)); + var query = parser.Parse($"{ExamineFieldNames.CategoryFieldName}:content AND nodeName:location*"); + + // this is like doing Acquire, does it perform the same (it will allocate more) + using var context = _searcherManager.GetContext(); + + var searcher = context.Reference; + + // Don't use this, increasing the max docs substantially decreases performance + //var maxDoc = searcher.IndexReader.MaxDoc; + var topDocsCollector = TopScoreDocCollector.Create(MaxResults, null, true); + + searcher.Search(query, topDocsCollector); + + var topDocs = topDocsCollector.GetTopDocs(0, MaxResults); + + var totalItemCount = topDocs.TotalHits; + var maxScore = topDocs.MaxScore; + })); + } + + foreach (var task in tasks) + { + task.Start(); + } + + await Task.WhenAll(tasks); + } + + [Benchmark] + public async Task LuceneAcquireAlways() + { + var tasks = new List(); + + for (var i = 0; i < ThreadCount; i++) + { + tasks.Add(new Task(() => + { + var parser = new QueryParser(LuceneVersion.LUCENE_48, ExamineFieldNames.ItemIdFieldName, new StandardAnalyzer(LuceneVersion.LUCENE_48)); + var query = parser.Parse($"{ExamineFieldNames.CategoryFieldName}:content AND nodeName:location*"); + + // this is like doing Acquire, does it perform the same (it will allocate more) + using var context = _searcherManager.GetContext(); + + var searcher = context.Reference; + + // Don't use this, increasing the max docs substantially decreases performance + //var maxDoc = searcher.IndexReader.MaxDoc; + var topDocsCollector = TopScoreDocCollector.Create(MaxResults, null, true); + + searcher.Search(query, topDocsCollector); + + var topDocs = topDocsCollector.GetTopDocs(0, MaxResults); + + var totalItemCount = topDocs.TotalHits; + + var results = new List(topDocs.ScoreDocs.Length); + + foreach (var scoreDoc in topDocs.ScoreDocs) + { + var docId = scoreDoc.Doc; + var score = scoreDoc.Score; + var shardIndex = scoreDoc.ShardIndex; + var doc = searcher.Doc(docId); + var result = LuceneSearchExecutor.CreateSearchResult(doc, score, shardIndex); + results.Add(result); + } + + var maxScore = topDocs.MaxScore; + + // enumerate (forces the result to execute) + var logOutput = "ThreadID: " + Thread.CurrentThread.ManagedThreadId + ", Results: " + string.Join(',', results.Select(x => $"{x.Id}-{x.Values.Count}-{x.Score}").ToArray()); + _logger.LogDebug(logOutput); + })); + } + + foreach (var task in tasks) + { + task.Start(); + } + + await Task.WhenAll(tasks); + } + + [Benchmark] + public async Task LuceneAcquireAlwaysWithLock() + { + var tasks = new List(); + var myLock = new object(); + + for (var i = 0; i < ThreadCount; i++) + { + tasks.Add(new Task(() => + { + lock (myLock) + { + var parser = new QueryParser(LuceneVersion.LUCENE_48, ExamineFieldNames.ItemIdFieldName, new StandardAnalyzer(LuceneVersion.LUCENE_48)); + var query = parser.Parse($"{ExamineFieldNames.CategoryFieldName}:content AND nodeName:location*"); + + // this is like doing Acquire, does it perform the same (it will allocate more) + using var context = _searcherManager.GetContext(); + + var searcher = context.Reference; + + // Don't use this, increasing the max docs substantially decreases performance + //var maxDoc = searcher.IndexReader.MaxDoc; + var topDocsCollector = TopScoreDocCollector.Create(MaxResults, null, true); + + searcher.Search(query, topDocsCollector); + + var topDocs = topDocsCollector.GetTopDocs(0, MaxResults); + + var totalItemCount = topDocs.TotalHits; + + var results = new List(topDocs.ScoreDocs.Length); + + foreach (var scoreDoc in topDocs.ScoreDocs) + { + var docId = scoreDoc.Doc; + var score = scoreDoc.Score; + var shardIndex = scoreDoc.ShardIndex; + var doc = searcher.Doc(docId); + var result = LuceneSearchExecutor.CreateSearchResult(doc, score, shardIndex); + results.Add(result); + } + + var maxScore = topDocs.MaxScore; + + // enumerate (forces the result to execute) + var logOutput = "ThreadID: " + Thread.CurrentThread.ManagedThreadId + ", Results: " + string.Join(',', results.Select(x => $"{x.Id}-{x.Values.Count}-{x.Score}").ToArray()); + _logger.LogDebug(logOutput); + } + })); + } + + foreach (var task in tasks) + { + task.Start(); + } + + await Task.WhenAll(tasks); + } + + [Benchmark] + public async Task LuceneAcquireOnce() + { + var tasks = new List(); + + var searcher = _searcherManager.Acquire(); + + try + { + for (var i = 0; i < ThreadCount; i++) + { + tasks.Add(new Task(() => + { + var parser = new QueryParser(LuceneVersion.LUCENE_48, ExamineFieldNames.ItemIdFieldName, new StandardAnalyzer(LuceneVersion.LUCENE_48)); + var query = parser.Parse($"{ExamineFieldNames.CategoryFieldName}:content AND nodeName:location*"); + + // Don't use this, increasing the max docs substantially decreases performance + //var maxDoc = searcher.IndexReader.MaxDoc; + var topDocsCollector = TopScoreDocCollector.Create(MaxResults, null, true); + + searcher.Search(query, topDocsCollector); + var topDocs = topDocsCollector.GetTopDocs(0, MaxResults); + + var totalItemCount = topDocs.TotalHits; + + var results = new List(topDocs.ScoreDocs.Length); + for (var i = 0; i < topDocs.ScoreDocs.Length; i++) + { + var scoreDoc = topDocs.ScoreDocs[i]; + var docId = scoreDoc.Doc; + var doc = searcher.Doc(docId); + var score = scoreDoc.Score; + var shardIndex = scoreDoc.ShardIndex; + var result = LuceneSearchExecutor.CreateSearchResult(doc, score, shardIndex); + results.Add(result); + } + + var maxScore = topDocs.MaxScore; + + // enumerate (forces the result to execute) + var logOutput = "ThreadID: " + Thread.CurrentThread.ManagedThreadId + ", Results: " + string.Join(',', results.Select(x => $"{x.Id}-{x.Values.Count}-{x.Score}").ToArray()); + _logger.LogDebug(logOutput); + })); + } + + foreach (var task in tasks) + { + task.Start(); + } + + await Task.WhenAll(tasks); + } + finally + { + _searcherManager.Release(searcher); + } + } + + [Benchmark] + public async Task LuceneSortedDocIds() + { + var tasks = new List(); + + for (var i = 0; i < ThreadCount; i++) + { + tasks.Add(new Task(() => + { + var parser = new QueryParser(LuceneVersion.LUCENE_48, ExamineFieldNames.ItemIdFieldName, new StandardAnalyzer(LuceneVersion.LUCENE_48)); + var query = parser.Parse($"{ExamineFieldNames.CategoryFieldName}:content AND nodeName:location*"); + + // this is like doing Acquire, does it perform the same (it will allocate more) + using var context = _searcherManager.GetContext(); + + var searcher = context.Reference; + + // Don't use this, increasing the max docs substantially decreases performance + //var maxDoc = searcher.IndexReader.MaxDoc; + var topDocsCollector = TopScoreDocCollector.Create(MaxResults, null, true); + + searcher.Search(query, topDocsCollector); + + var topDocs = topDocsCollector.GetTopDocs(0, MaxResults); + + var totalItemCount = topDocs.TotalHits; + + var results = new List(topDocs.ScoreDocs.Length); + + foreach (var scoreDoc in topDocs.ScoreDocs.OrderBy(x => x.Doc)) + { + var docId = scoreDoc.Doc; + var score = scoreDoc.Score; + var shardIndex = scoreDoc.ShardIndex; + var doc = searcher.Doc(docId); + var result = LuceneSearchExecutor.CreateSearchResult(doc, score, shardIndex); + results.Add(result); + } + + var maxScore = topDocs.MaxScore; + + // enumerate (forces the result to execute) + var logOutput = "ThreadID: " + Thread.CurrentThread.ManagedThreadId + ", Results: " + string.Join(',', results.Select(x => $"{x.Id}-{x.Values.Count}-{x.Score}").ToArray()); + _logger.LogDebug(logOutput); + })); + } + + foreach (var task in tasks) + { + task.Start(); + } + + await Task.WhenAll(tasks); + } + +#if RELEASE + protected override ILoggerFactory CreateLoggerFactory() + => Microsoft.Extensions.Logging.LoggerFactory.Create(builder => builder.AddConsole().SetMinimumLevel(LogLevel.Information)); +#endif + private TestIndex InitializeAndIndexItems( + string tempBasePath, + Analyzer analyzer, + out DirectoryInfo indexDir) + { + var tempPath = Path.Combine(tempBasePath, Guid.NewGuid().ToString()); + System.IO.Directory.CreateDirectory(tempPath); + indexDir = new DirectoryInfo(tempPath); + var luceneDirectory = FSDirectory.Open(indexDir); + var indexer = GetTestIndex(luceneDirectory, analyzer); + + var random = new Random(); + var valueSets = new List(); + + for (var i = 0; i < 1000; i++) + { + valueSets.Add(ValueSet.FromObject(Guid.NewGuid().ToString(), "content", + new + { + nodeName = "location " + i, + bodyText = Enumerable.Range(0, random.Next(10, 100)).Select(x => Guid.NewGuid().ToString()) + })); + } + + indexer.IndexItems(valueSets); + + return indexer; + } + } +} + +#endif diff --git a/src/Examine.Benchmarks/Examine.Benchmarks.csproj b/src/Examine.Benchmarks/Examine.Benchmarks.csproj new file mode 100644 index 00000000..ea9959ce --- /dev/null +++ b/src/Examine.Benchmarks/Examine.Benchmarks.csproj @@ -0,0 +1,27 @@ + + + + net8.0 + enable + enable + false + false + Exe + + true + $(DefineConstants);LocalBuild + + + + + + + + + + + + + + + diff --git a/src/Examine.Benchmarks/ExamineBaseTest.cs b/src/Examine.Benchmarks/ExamineBaseTest.cs new file mode 100644 index 00000000..06b588c4 --- /dev/null +++ b/src/Examine.Benchmarks/ExamineBaseTest.cs @@ -0,0 +1,65 @@ +using Examine.Lucene; +using Examine.Lucene.Directories; +using Lucene.Net.Analysis; +using Lucene.Net.Index; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using Moq; +using Directory = Lucene.Net.Store.Directory; + +namespace Examine.Benchmarks +{ + public abstract class ExamineBaseTest + { + protected ILoggerFactory? LoggerFactory { get; private set; } + + public virtual void Setup() + { + LoggerFactory = CreateLoggerFactory(); + LoggerFactory.CreateLogger(typeof(ExamineBaseTest)).LogDebug("Initializing test"); + } + + public virtual void TearDown() => LoggerFactory.Dispose(); + + public TestIndex GetTestIndex( + Directory d, + Analyzer analyzer, + FieldDefinitionCollection? fieldDefinitions = null, + IndexDeletionPolicy? indexDeletionPolicy = null, + IReadOnlyDictionary? indexValueTypesFactory = null, + double nrtTargetMaxStaleSec = 60, + double nrtTargetMinStaleSec = 1, + bool nrtEnabled = true) + => new TestIndex( + LoggerFactory, + Mock.Of>(x => x.Get(TestIndex.TestIndexName) == new LuceneDirectoryIndexOptions + { + FieldDefinitions = fieldDefinitions, + DirectoryFactory = new GenericDirectoryFactory(_ => d), + Analyzer = analyzer, + IndexDeletionPolicy = indexDeletionPolicy, + IndexValueTypesFactory = indexValueTypesFactory, +#if LocalBuild + NrtTargetMaxStaleSec = nrtTargetMaxStaleSec, + NrtTargetMinStaleSec = nrtTargetMinStaleSec, + NrtEnabled = nrtEnabled +#endif + })); + + //public TestIndex GetTestIndex( + // IndexWriter writer, + // double nrtTargetMaxStaleSec = 60, + // double nrtTargetMinStaleSec = 1) + // => new TestIndex( + // LoggerFactory, + // Mock.Of>(x => x.Get(TestIndex.TestIndexName) == new LuceneIndexOptions + // { + // NrtTargetMaxStaleSec = nrtTargetMaxStaleSec, + // NrtTargetMinStaleSec = nrtTargetMinStaleSec + // }), + // writer); + + protected virtual ILoggerFactory CreateLoggerFactory() + => Microsoft.Extensions.Logging.LoggerFactory.Create(builder => builder.AddConsole().SetMinimumLevel(LogLevel.Debug)); + } +} diff --git a/src/Examine.Benchmarks/IndexVersionComparison.cs b/src/Examine.Benchmarks/IndexVersionComparison.cs new file mode 100644 index 00000000..76201786 --- /dev/null +++ b/src/Examine.Benchmarks/IndexVersionComparison.cs @@ -0,0 +1,48 @@ +using BenchmarkDotNet.Attributes; +using Examine.Lucene.Providers; +using Lucene.Net.Analysis.Standard; +using Microsoft.Extensions.Logging; + +namespace Examine.Benchmarks +{ + [Config(typeof(NugetConfig))] + [ThreadingDiagnoser] + [MemoryDiagnoser] + public class IndexVersionComparison : ExamineBaseTest + { + private readonly List _valueSets = InitTools.CreateValueSet(100); + private readonly StandardAnalyzer _analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + private ILogger? _logger; + private string? _tempBasePath; + private LuceneIndex? _indexer; + + [GlobalSetup] + public override void Setup() + { + base.Setup(); + + _logger = LoggerFactory.CreateLogger(); + _tempBasePath = Path.Combine(Path.GetTempPath(), "ExamineTests"); + _indexer = InitTools.InitializeIndex(this, _tempBasePath, _analyzer, out _); + } + + [GlobalCleanup] + public override void TearDown() + { + _indexer!.Dispose(); + base.TearDown(); + System.IO.Directory.Delete(_tempBasePath!, true); + } + + [Benchmark] + public void IndexItemsNonAsync() => IndexItems(_indexer!, _valueSets); + +#if RELEASE + protected override ILoggerFactory CreateLoggerFactory() + => Microsoft.Extensions.Logging.LoggerFactory.Create(builder => builder.AddConsole().SetMinimumLevel(LogLevel.Information)); +#endif + + private static void IndexItems(LuceneIndex indexer, IEnumerable valueSets) + => indexer.IndexItems(valueSets); + } +} diff --git a/src/Examine.Benchmarks/InitTools.cs b/src/Examine.Benchmarks/InitTools.cs new file mode 100644 index 00000000..319b6baa --- /dev/null +++ b/src/Examine.Benchmarks/InitTools.cs @@ -0,0 +1,42 @@ +using Lucene.Net.Analysis; +using Lucene.Net.Store; + +namespace Examine.Benchmarks +{ + internal class InitTools + { + public static TestIndex InitializeIndex( + ExamineBaseTest examineBaseTest, + string tempBasePath, + Analyzer analyzer, + out DirectoryInfo indexDir) + { + var tempPath = Path.Combine(tempBasePath, Guid.NewGuid().ToString()); + System.IO.Directory.CreateDirectory(tempPath); + indexDir = new DirectoryInfo(tempPath); + var luceneDirectory = FSDirectory.Open(indexDir); + var indexer = examineBaseTest.GetTestIndex(luceneDirectory, analyzer); + return indexer; + } + + public static List CreateValueSet(int count) + { + var random = new Random(); + var valueSets = new List(); + + for (var i = 0; i < count; i++) + { + valueSets.Add(ValueSet.FromObject(Guid.NewGuid().ToString(), "content", + new + { + nodeName = "location" + (i % 2 == 0 ? "1" : "2"), + bodyText = Enumerable.Range(0, random.Next(10, 100)).Select(x => Guid.NewGuid().ToString()), + number = random.Next(0, 1000), + date = DateTime.Now.AddMinutes(random.Next(-1000, 1000)) + })); + } + + return valueSets; + } + } +} diff --git a/src/Examine.Benchmarks/NugetConfig.cs b/src/Examine.Benchmarks/NugetConfig.cs new file mode 100644 index 00000000..957aa8e3 --- /dev/null +++ b/src/Examine.Benchmarks/NugetConfig.cs @@ -0,0 +1,21 @@ +using BenchmarkDotNet.Configs; +using BenchmarkDotNet.Environments; +using BenchmarkDotNet.Jobs; + +namespace Examine.Benchmarks +{ + public class NugetConfig : ManualConfig + { + public NugetConfig() + { + var baseJob = Job.ShortRun + .WithRuntime(CoreRuntime.Core80); + + AddJob(baseJob.WithId("Source")); + AddJob(baseJob.WithNuGet("Examine", "3.3.0").WithId("3.3.0").WithArguments([new MsBuildArgument("/p:LocalBuild=false")])); + AddJob(baseJob.WithNuGet("Examine", "3.2.1").WithId("3.2.1").WithArguments([new MsBuildArgument("/p:LocalBuild=false")])); + AddJob(baseJob.WithNuGet("Examine", "3.1.0").WithId("3.1.0").WithArguments([new MsBuildArgument("/p:LocalBuild=false")])); + AddJob(baseJob.WithNuGet("Examine", "3.0.1").WithId("3.0.1").WithArguments([new MsBuildArgument("/p:LocalBuild=false")])); + } + } +} diff --git a/src/Examine.Benchmarks/Program.cs b/src/Examine.Benchmarks/Program.cs new file mode 100644 index 00000000..e09c0a4e --- /dev/null +++ b/src/Examine.Benchmarks/Program.cs @@ -0,0 +1,59 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using BenchmarkDotNet.Running; +using Microsoft.Diagnostics.Tracing.Parsers.Kernel; + +namespace Examine.Benchmarks +{ + public class Program + { + public static async Task Main(string[] args) + { +#if RELEASE + // Benchmark your function here. + BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args); +#else + var bench = new SearchVersionComparison(); + try + { + bench.Setup(); + //await Threads100(bench); + await Threads1(bench); + } + finally + { + bench.TearDown(); + } + +#endif + // Call your function here. + } + +#if LocalBuild + private static async Task Threads100(ConcurrentSearchBenchmarks bench) + { + bench.ThreadCount = 50; + //bench.MaxResults = 10; + + for (var i = 0; i < 100; i++) + { + await bench.ExamineStandard(); + } + } + + private static async Task Threads1(SearchVersionComparison bench) + { + bench.ThreadCount = 1; + //bench.MaxResults = 10; + + for (var i = 0; i < 100; i++) + { + await bench.ConcurrentSearch(); + } + } +#endif + } +} diff --git a/src/Examine.Benchmarks/SearchVersionComparison.cs b/src/Examine.Benchmarks/SearchVersionComparison.cs new file mode 100644 index 00000000..c46f093a --- /dev/null +++ b/src/Examine.Benchmarks/SearchVersionComparison.cs @@ -0,0 +1,79 @@ +using BenchmarkDotNet.Attributes; +using Examine.Lucene.Providers; +using Lucene.Net.Analysis.Standard; +using Microsoft.Extensions.Logging; + +namespace Examine.Benchmarks +{ + [Config(typeof(NugetConfig))] + [HideColumns("Arguments", "StdDev", "Error", "NuGetReferences")] + [MemoryDiagnoser] + public class SearchVersionComparison : ExamineBaseTest + { + private readonly List _valueSets = InitTools.CreateValueSet(10000); + private readonly StandardAnalyzer _analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + private ILogger? _logger; + private string? _tempBasePath; + private LuceneIndex? _indexer; + + [GlobalSetup] + public override void Setup() + { + base.Setup(); + + _logger = LoggerFactory.CreateLogger(); + _tempBasePath = Path.Combine(Path.GetTempPath(), "ExamineTests"); + _indexer = InitTools.InitializeIndex(this, _tempBasePath, _analyzer, out _); + _indexer!.IndexItems(_valueSets); + + _logger.LogInformation("Indexed {DocumentCount} documents", _valueSets.Count); + } + + [GlobalCleanup] + public override void TearDown() + { + _indexer!.Dispose(); + base.TearDown(); + Directory.Delete(_tempBasePath!, true); + } + + [Params(1, 25, 100)] + public int ThreadCount { get; set; } + + [Benchmark] + public async Task ConcurrentSearch() + { + var tasks = new List(); + + for (var i = 0; i < ThreadCount; i++) + { + tasks.Add(new Task(() => + { + // always resolve the searcher from the indexer + var searcher = _indexer!.Searcher; + + var query = searcher.CreateQuery().Field("nodeName", "location1"); + var results = query.Execute(); + + // enumerate (forces the result to execute) + var logOutput = "ThreadID: " + Thread.CurrentThread.ManagedThreadId + ", Results: " + string.Join(',', results.Select(x => $"{x.Id}-{x.Values.Count}-{x.Score}").ToArray()); + _logger!.LogDebug(logOutput); + + //_logger!.LogInformation("Results: {Results}", results.TotalItemCount); + })); + } + + foreach (var task in tasks) + { + task.Start(); + } + + await Task.WhenAll(tasks); + } + +#if RELEASE + protected override ILoggerFactory CreateLoggerFactory() + => Microsoft.Extensions.Logging.LoggerFactory.Create(builder => builder.AddConsole().SetMinimumLevel(LogLevel.Information)); +#endif + } +} diff --git a/src/Examine.Benchmarks/TestIndex.cs b/src/Examine.Benchmarks/TestIndex.cs new file mode 100644 index 00000000..7eaebf57 --- /dev/null +++ b/src/Examine.Benchmarks/TestIndex.cs @@ -0,0 +1,35 @@ +using Examine.Lucene; +using Examine.Lucene.Providers; +using Lucene.Net.Index; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; + +namespace Examine.Benchmarks +{ + public class TestIndex : LuceneIndex + { + public const string TestIndexName = "testIndexer"; + + public TestIndex(ILoggerFactory loggerFactory, IOptionsMonitor options) + : base(loggerFactory, TestIndexName, options) + { + RunAsync = false; + } + + //public TestIndex(ILoggerFactory loggerFactory, IOptionsMonitor options, IndexWriter writer) + // : base(loggerFactory, TestIndexName, options, writer) + //{ + // RunAsync = false; + //} + + public static IEnumerable AllData() + { + var data = new List(); + for (var i = 0; i < 100; i++) + { + data.Add(ValueSet.FromObject(i.ToString(), "category" + (i % 2), new { item1 = "value" + i, item2 = "value" + i })); + } + return data; + } + } +} diff --git a/src/Examine.Core/Examine.Core.csproj b/src/Examine.Core/Examine.Core.csproj index 9d22f986..dac762de 100644 --- a/src/Examine.Core/Examine.Core.csproj +++ b/src/Examine.Core/Examine.Core.csproj @@ -29,8 +29,8 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - - + + diff --git a/src/Examine.Core/FieldDefinition.cs b/src/Examine.Core/FieldDefinition.cs index a8804a48..4d426f26 100644 --- a/src/Examine.Core/FieldDefinition.cs +++ b/src/Examine.Core/FieldDefinition.cs @@ -5,7 +5,7 @@ namespace Examine /// /// Defines a field to be indexed /// - public struct FieldDefinition : IEquatable + public readonly struct FieldDefinition : IEquatable { /// /// Constructor @@ -18,12 +18,10 @@ public FieldDefinition(string name, string type) { throw new ArgumentException("Value cannot be null or whitespace.", nameof(name)); } - if (string.IsNullOrWhiteSpace(type)) { throw new ArgumentException("Value cannot be null or whitespace.", nameof(type)); } - Name = name; Type = type; } @@ -42,13 +40,10 @@ public FieldDefinition(string name, string type) public bool Equals(FieldDefinition other) => string.Equals(Name, other.Name) && string.Equals(Type, other.Type); /// - public override bool Equals(object? obj) + public override bool Equals(object obj) { - if (obj is null) - { + if (ReferenceEquals(null, obj)) return false; - } - return obj is FieldDefinition definition && Equals(definition); } diff --git a/src/Examine.Core/IndexOperation.cs b/src/Examine.Core/IndexOperation.cs index 7ddbd4ea..5d7122c8 100644 --- a/src/Examine.Core/IndexOperation.cs +++ b/src/Examine.Core/IndexOperation.cs @@ -3,7 +3,7 @@ namespace Examine /// /// Represents an indexing operation (either add/remove) /// - public struct IndexOperation + public readonly struct IndexOperation { /// /// Initializes a new instance of the class. @@ -27,4 +27,4 @@ public IndexOperation(ValueSet valueSet, IndexOperationType operation) /// public IndexOperationType Operation { get; } } -} \ No newline at end of file +} diff --git a/src/Examine.Core/PublicAPI.Unshipped.txt b/src/Examine.Core/PublicAPI.Unshipped.txt index b43c11e7..e2f8245d 100644 --- a/src/Examine.Core/PublicAPI.Unshipped.txt +++ b/src/Examine.Core/PublicAPI.Unshipped.txt @@ -90,3 +90,6 @@ Examine.Search.Int64Range.Min.get -> long Examine.Search.Int64Range.MinInclusive.get -> bool Examine.Search.OrderingExtensions static Examine.Search.OrderingExtensions.WithFacets(this Examine.Search.IOrdering! ordering, System.Action! facets) -> Examine.Search.IQueryExecutor! +const Examine.Search.QueryOptions.AbsoluteMaxResults = 10000 -> int +const Examine.Search.QueryOptions.DefaultMaxResults = 100 -> int +static Examine.SearchExtensions.Escape(this string s, float boost) -> Examine.Search.IExamineValue \ No newline at end of file diff --git a/src/Examine.Core/Search/ExamineValue.cs b/src/Examine.Core/Search/ExamineValue.cs index 06cb5668..7c99ce2a 100644 --- a/src/Examine.Core/Search/ExamineValue.cs +++ b/src/Examine.Core/Search/ExamineValue.cs @@ -3,7 +3,7 @@ namespace Examine.Search { /// - public struct ExamineValue : IExamineValue + public readonly struct ExamineValue : IExamineValue { /// public ExamineValue(Examineness vagueness, string value) @@ -27,6 +27,5 @@ public ExamineValue(Examineness vagueness, string value, float level) /// public float Level { get; } - } } diff --git a/src/Examine.Core/Search/QueryOptions.cs b/src/Examine.Core/Search/QueryOptions.cs index 587468a0..0e3e2a95 100644 --- a/src/Examine.Core/Search/QueryOptions.cs +++ b/src/Examine.Core/Search/QueryOptions.cs @@ -7,10 +7,12 @@ namespace Examine.Search /// public class QueryOptions { + public const int AbsoluteMaxResults = 10000; + /// /// The default maximum ammount of results /// - public const int DefaultMaxResults = 500; + public const int DefaultMaxResults = 100; /// /// Creates a with the specified parameters diff --git a/src/Examine.Core/Search/SortableField.cs b/src/Examine.Core/Search/SortableField.cs index 2ef14efa..14665a03 100644 --- a/src/Examine.Core/Search/SortableField.cs +++ b/src/Examine.Core/Search/SortableField.cs @@ -1,9 +1,9 @@ -namespace Examine.Search +namespace Examine.Search { /// /// Represents a field used to sort results /// - public struct SortableField + public readonly struct SortableField { /// /// The field name to sort by @@ -36,4 +36,4 @@ public SortableField(string fieldName, SortType sortType) SortType = sortType; } } -} \ No newline at end of file +} diff --git a/src/Examine.Core/ValueSetValidationResult.cs b/src/Examine.Core/ValueSetValidationResult.cs index dbdb2ada..015fbed1 100644 --- a/src/Examine.Core/ValueSetValidationResult.cs +++ b/src/Examine.Core/ValueSetValidationResult.cs @@ -3,7 +3,7 @@ namespace Examine /// /// Represents a value set validation result /// - public struct ValueSetValidationResult + public readonly struct ValueSetValidationResult { /// public ValueSetValidationResult(ValueSetValidationStatus status, ValueSet valueSet) diff --git a/src/Examine.Host/Examine.csproj b/src/Examine.Host/Examine.csproj index ae0f8a7d..747ce302 100644 --- a/src/Examine.Host/Examine.csproj +++ b/src/Examine.Host/Examine.csproj @@ -23,12 +23,12 @@ + all runtime; build; native; contentfiles; analyzers; buildtransitive - - + diff --git a/src/Examine.Lucene/Directories/FakeLuceneDirectoryIndexOptionsOptionsMonitor.cs b/src/Examine.Lucene/Directories/FakeLuceneDirectoryIndexOptionsOptionsMonitor.cs new file mode 100644 index 00000000..b15cd26d --- /dev/null +++ b/src/Examine.Lucene/Directories/FakeLuceneDirectoryIndexOptionsOptionsMonitor.cs @@ -0,0 +1,16 @@ +using System; +using Microsoft.Extensions.Options; + +namespace Examine.Lucene.Directories +{ + internal sealed class FakeLuceneDirectoryIndexOptionsOptionsMonitor : IOptionsMonitor + { + private static readonly LuceneDirectoryIndexOptions s_default = new LuceneDirectoryIndexOptions(); + + public LuceneDirectoryIndexOptions CurrentValue => s_default; + + public LuceneDirectoryIndexOptions Get(string name) => s_default; + + public IDisposable OnChange(Action listener) => throw new NotImplementedException(); + } +} diff --git a/src/Examine.Lucene/Directories/FileSystemDirectoryFactory.cs b/src/Examine.Lucene/Directories/FileSystemDirectoryFactory.cs index 8c1ea5bc..d6abcc5c 100644 --- a/src/Examine.Lucene/Directories/FileSystemDirectoryFactory.cs +++ b/src/Examine.Lucene/Directories/FileSystemDirectoryFactory.cs @@ -1,7 +1,9 @@ +using System; using System.IO; using Examine.Lucene.Providers; using Lucene.Net.Index; using Lucene.Net.Store; +using Microsoft.Extensions.Options; using Directory = Lucene.Net.Store.Directory; namespace Examine.Lucene.Directories @@ -13,15 +15,31 @@ public class FileSystemDirectoryFactory : DirectoryFactoryBase { private readonly DirectoryInfo _baseDir; - /// + /// /// Creates an instance of /// /// The base directory /// The lock factory - public FileSystemDirectoryFactory(DirectoryInfo baseDir, ILockFactory lockFactory) + [Obsolete("Use ctor with all dependencies")] + public FileSystemDirectoryFactory( + DirectoryInfo baseDir, + ILockFactory lockFactory) + : this (baseDir, lockFactory, new FakeLuceneDirectoryIndexOptionsOptionsMonitor()) + { + } + /// + /// Creates an instance of + /// + /// The base directory + /// The lock factory + public FileSystemDirectoryFactory( + DirectoryInfo baseDir, + ILockFactory lockFactory, + IOptionsMonitor indexOptions) { _baseDir = baseDir; LockFactory = lockFactory; + IndexOptions = indexOptions; } /// @@ -29,6 +47,8 @@ public FileSystemDirectoryFactory(DirectoryInfo baseDir, ILockFactory lockFactor /// public ILockFactory LockFactory { get; } + protected IOptionsMonitor IndexOptions { get; } + /// protected override Directory CreateDirectory(LuceneIndex luceneIndex, bool forceUnlock) { @@ -40,7 +60,16 @@ protected override Directory CreateDirectory(LuceneIndex luceneIndex, bool force { IndexWriter.Unlock(dir); } - return dir; + + var options = IndexOptions.GetNamedOptions(luceneIndex.Name); + if (options.NrtEnabled) + { + return new NRTCachingDirectory(dir, options.NrtCacheMaxMergeSizeMB, options.NrtCacheMaxCachedMB); + } + else + { + return dir; + } } /// @@ -54,7 +83,15 @@ protected override Directory CreateTaxonomyDirectory(LuceneIndex luceneIndex, bo { IndexWriter.Unlock(dir); } - return dir; + var options = IndexOptions.GetNamedOptions(luceneIndex.Name); + if (options.NrtEnabled) + { + return new NRTCachingDirectory(dir, options.NrtCacheMaxMergeSizeMB, options.NrtCacheMaxCachedMB); + } + else + { + return dir; + } } } } diff --git a/src/Examine.Lucene/Directories/SyncedFileSystemDirectoryFactory.cs b/src/Examine.Lucene/Directories/SyncedFileSystemDirectoryFactory.cs index 012baf16..f032f295 100644 --- a/src/Examine.Lucene/Directories/SyncedFileSystemDirectoryFactory.cs +++ b/src/Examine.Lucene/Directories/SyncedFileSystemDirectoryFactory.cs @@ -1,7 +1,5 @@ - using System; using System.IO; -using System.Threading; using Examine.Lucene.Providers; using Lucene.Net.Analysis.Standard; using Lucene.Net.Index; @@ -14,73 +12,137 @@ namespace Examine.Lucene.Directories { /// /// A directory factory that replicates the index from main storage on initialization to another - /// directory, then creates a lucene Directory based on that replicated index. A replication thread - /// is spawned to then replicate the local index back to the main storage location. + /// directory, then creates a Lucene Directory based on that replicated index. /// /// + /// A replication thread is spawned to then replicate the local index back to the main storage location. /// By default, Examine configures the local directory to be the %temp% folder. + /// This also checks if the main/local storage indexes are healthy and syncs/removes accordingly. /// public class SyncedFileSystemDirectoryFactory : FileSystemDirectoryFactory { private readonly DirectoryInfo _localDir; + private readonly DirectoryInfo _mainDir; private readonly ILoggerFactory _loggerFactory; - private ExamineReplicator? _replicator; + private readonly bool _tryFixMainIndexIfCorrupt; + private readonly ILogger _logger; + private ExamineReplicator _replicator; + private Directory _mainLuceneDir; + + [Obsolete("Use ctor with all dependencies")] + public SyncedFileSystemDirectoryFactory( + DirectoryInfo localDir, + DirectoryInfo mainDir, + ILockFactory lockFactory, + ILoggerFactory loggerFactory) + : this(localDir, mainDir, lockFactory, loggerFactory, new FakeLuceneDirectoryIndexOptionsOptionsMonitor(), false) + { + } + + [Obsolete("Use ctor with all dependencies")] + public SyncedFileSystemDirectoryFactory( + DirectoryInfo localDir, + DirectoryInfo mainDir, + ILockFactory lockFactory, + ILoggerFactory loggerFactory, + bool tryFixMainIndexIfCorrupt) + : base(mainDir, lockFactory, new FakeLuceneDirectoryIndexOptionsOptionsMonitor()) + { + } + + public SyncedFileSystemDirectoryFactory( + DirectoryInfo localDir, + DirectoryInfo mainDir, + ILockFactory lockFactory, + ILoggerFactory loggerFactory, + IOptionsMonitor indexOptions) + : this(localDir, mainDir, lockFactory, loggerFactory, indexOptions, false) + { + } - /// public SyncedFileSystemDirectoryFactory( DirectoryInfo localDir, DirectoryInfo mainDir, ILockFactory lockFactory, - ILoggerFactory loggerFactory) - : base(mainDir, lockFactory) + ILoggerFactory loggerFactory, + IOptionsMonitor indexOptions, + bool tryFixMainIndexIfCorrupt) + : base(mainDir, lockFactory, indexOptions) { _localDir = localDir; + _mainDir = mainDir; _loggerFactory = loggerFactory; + _tryFixMainIndexIfCorrupt = tryFixMainIndexIfCorrupt; + _logger = _loggerFactory.CreateLogger(); } - /// - protected override Directory CreateDirectory(LuceneIndex luceneIndex, bool forceUnlock) + internal CreateResult TryCreateDirectory(LuceneIndex luceneIndex, bool forceUnlock, out Directory directory) { - var path = Path.Combine(_localDir.FullName, luceneIndex.Name); - var localLuceneIndexFolder = new DirectoryInfo(path); + var mainPath = Path.Combine(_mainDir.FullName, luceneIndex.Name); + var mainLuceneIndexFolder = new DirectoryInfo(mainPath); - var mainDir = base.CreateDirectory(luceneIndex, forceUnlock); + var localPath = Path.Combine(_localDir.FullName, luceneIndex.Name); + var localLuceneIndexFolder = new DirectoryInfo(localPath); // used by the replicator, will be a short lived directory for each synced revision and deleted when finished. var tempDir = new DirectoryInfo(Path.Combine(_localDir.FullName, "Rep", Guid.NewGuid().ToString("N"))); - if (DirectoryReader.IndexExists(mainDir)) + _mainLuceneDir = base.CreateDirectory(luceneIndex, forceUnlock); + var localLuceneDir = FSDirectory.Open( + localLuceneIndexFolder, + LockFactory.GetLockFactory(localLuceneIndexFolder)); + + var mainIndexExists = DirectoryReader.IndexExists(_mainLuceneDir); + var localIndexExists = DirectoryReader.IndexExists(localLuceneDir); + + var mainResult = CreateResult.Init; + + if (mainIndexExists) + { + mainResult = CheckIndexHealthAndFix(_mainLuceneDir, luceneIndex.Name, _tryFixMainIndexIfCorrupt); + } + + // the main index is/was unhealthy or missing, lets check the local index if it exists + if (localIndexExists && (!mainIndexExists || mainResult.HasFlag(CreateResult.NotClean) || mainResult.HasFlag(CreateResult.MissingSegments))) + { + var localResult = CheckIndexHealthAndFix(localLuceneDir, luceneIndex.Name, false); + + if (localResult == CreateResult.Init) + { + // it was read successfully, we can sync back to main + localResult |= TryGetIndexWriter(OpenMode.APPEND, localLuceneDir, false, luceneIndex.Name, out var indexWriter); + using (indexWriter) + { + if (localResult.HasFlag(CreateResult.OpenedSuccessfully)) + { + SyncIndex(indexWriter, true, luceneIndex.Name, mainLuceneIndexFolder, tempDir); + mainResult |= CreateResult.SyncedFromLocal; + } + } + } + } + + if (mainIndexExists) { // when the lucene directory is going to be created, we'll sync from main storage to local // storage before any index/writer is opened. - using (var tempMainIndexWriter = new IndexWriter( - mainDir, - new IndexWriterConfig( - LuceneInfo.CurrentVersion, - new StandardAnalyzer(LuceneInfo.CurrentVersion)) - { - OpenMode = OpenMode.APPEND, - IndexDeletionPolicy = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()) - })) - using (var tempMainIndex = new LuceneIndex(_loggerFactory, luceneIndex.Name, new TempOptions(), tempMainIndexWriter)) - using (var tempLocalDirectory = new SimpleFSDirectory(localLuceneIndexFolder, LockFactory.GetLockFactory(localLuceneIndexFolder))) - using (var replicator = new ExamineReplicator(_loggerFactory, tempMainIndex, tempLocalDirectory, tempDir)) + + var openMode = mainResult == CreateResult.Init || mainResult.HasFlag(CreateResult.Fixed) || mainResult.HasFlag(CreateResult.SyncedFromLocal) + ? OpenMode.APPEND + : OpenMode.CREATE; + + mainResult |= TryGetIndexWriter(openMode, _mainLuceneDir, true, luceneIndex.Name, out var indexWriter); + using (indexWriter) { - if (forceUnlock) + if (!mainResult.HasFlag(CreateResult.SyncedFromLocal)) { - IndexWriter.Unlock(tempLocalDirectory); + SyncIndex(indexWriter, forceUnlock, luceneIndex.Name, localLuceneIndexFolder, tempDir); } - - // replicate locally. - replicator.ReplicateIndex(); } } // now create the replicator that will copy from local to main on schedule - _replicator = new ExamineReplicator(_loggerFactory, luceneIndex, mainDir, tempDir); - var localLuceneDir = FSDirectory.Open( - localLuceneIndexFolder, - LockFactory.GetLockFactory(localLuceneIndexFolder)); + _replicator = new ExamineReplicator(_loggerFactory, luceneIndex, _mainLuceneDir, tempDir); if (forceUnlock) { @@ -90,22 +152,196 @@ protected override Directory CreateDirectory(LuceneIndex luceneIndex, bool force // Start replicating back to main _replicator.StartIndexReplicationOnSchedule(1000); - return localLuceneDir; + var options = IndexOptions.GetNamedOptions(luceneIndex.Name); + if (options.NrtEnabled) + { + directory = new NRTCachingDirectory(localLuceneDir, options.NrtCacheMaxMergeSizeMB, options.NrtCacheMaxCachedMB); + } + else + { + directory = localLuceneDir; + } + + return mainResult; + } + + [Flags] + internal enum CreateResult + { + Init = 0, + MissingSegments = 1, + NotClean = 2, + Fixed = 4, + NotFixed = 8, + ExceptionNotFixed = 16, + CorruptCreatedNew = 32, + OpenedSuccessfully = 64, + SyncedFromLocal = 128 + } + + protected override Directory CreateDirectory(LuceneIndex luceneIndex, bool forceUnlock) + { + _ = TryCreateDirectory(luceneIndex, forceUnlock, out var directory); + return directory; } - /// - /// Disposes the instance - /// - /// If the call is coming from Dispose protected override void Dispose(bool disposing) { base.Dispose(disposing); if (disposing) { _replicator?.Dispose(); + _mainLuceneDir?.Dispose(); } } + private CreateResult TryGetIndexWriter( + OpenMode openMode, + Directory luceneDirectory, + bool createNewIfCorrupt, + string indexName, + out IndexWriter indexWriter) + { + try + { + indexWriter = GetIndexWriter(luceneDirectory, openMode); + + if (openMode == OpenMode.APPEND) + { + return CreateResult.OpenedSuccessfully; + } + else + { + // Required to remove old index files which can be problematic + // if they remain in the index folder when replication is attempted. + indexWriter.Commit(); + indexWriter.WaitForMerges(); + + return CreateResult.CorruptCreatedNew; + } + } + catch (Exception ex) + { + if (createNewIfCorrupt) + { + // Index is corrupted, typically this will be FileNotFoundException or CorruptIndexException + _logger.LogError(ex, "{IndexName} index is corrupt, a new one will be created", indexName); + + indexWriter = GetIndexWriter(luceneDirectory, OpenMode.CREATE); + } + else + { + indexWriter = null; + } + + return CreateResult.CorruptCreatedNew; + } + } + + private void SyncIndex(IndexWriter sourceIndexWriter, bool forceUnlock, string indexName, DirectoryInfo destinationDirectory, DirectoryInfo tempDir) + { + // First, we need to clear the main index. If for some reason it is at the same revision, the syncing won't do anything. + if (destinationDirectory.Exists) + { + foreach (var file in destinationDirectory.EnumerateFiles()) + { + file.Delete(); + } + } + + using (var sourceIndex = new LuceneIndex(_loggerFactory, indexName, new TempOptions(), sourceIndexWriter)) + using (var destinationLuceneDirectory = FSDirectory.Open(destinationDirectory, LockFactory.GetLockFactory(destinationDirectory))) + using (var replicator = new ExamineReplicator(_loggerFactory, sourceIndex, destinationLuceneDirectory, tempDir)) + { + if (forceUnlock) + { + IndexWriter.Unlock(destinationLuceneDirectory); + } + + // replicate locally. + replicator.ReplicateIndex(); + } + } + + private CreateResult CheckIndexHealthAndFix( + Directory luceneDir, + string indexName, + bool doFix) + { + using var writer = new StringWriter(); + var result = CreateResult.Init; + + var checker = new CheckIndex(luceneDir) + { + // Redirect the logging output of the checker + InfoStream = writer + }; + + var status = checker.DoCheckIndex(); + writer.Flush(); + + _logger.LogDebug("{IndexName} health check report {IndexReport}", indexName, writer.ToString()); + + if (status.MissingSegments) + { + _logger.LogWarning("{IndexName} index is missing segments, it will be deleted.", indexName); + result = CreateResult.MissingSegments; + } + else if (!status.Clean) + { + _logger.LogWarning("Checked main index {IndexName} and it is not clean.", indexName); + result = CreateResult.NotClean; + + if (doFix) + { + _logger.LogWarning("Attempting to fix {IndexName}. {DocumentsLost} documents will be lost.", indexName, status.TotLoseDocCount); + + try + { + checker.FixIndex(status); + status = checker.DoCheckIndex(); + + if (!status.Clean) + { + _logger.LogError("{IndexName} index could not be fixed, it will be deleted.", indexName); + result |= CreateResult.NotFixed; + } + else + { + _logger.LogInformation("Index {IndexName} fixed. {DocumentsLost} documents were lost.", indexName, status.TotLoseDocCount); + result |= CreateResult.Fixed; + } + } + catch (Exception ex) + { + _logger.LogError(ex, "{IndexName} index could not be fixed, it will be deleted.", indexName); + result |= CreateResult.ExceptionNotFixed; + } + } + } + else + { + _logger.LogInformation("Checked main index {IndexName} and it is clean.", indexName); + } + + return result; + } + + private IndexWriter GetIndexWriter(Directory mainDir, OpenMode openMode) + { + var indexWriter = new IndexWriter( + mainDir, + new IndexWriterConfig( + LuceneInfo.CurrentVersion, + new StandardAnalyzer(LuceneInfo.CurrentVersion)) + { + OpenMode = openMode, + IndexDeletionPolicy = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()) + }); + + return indexWriter; + } + private class TempOptions : IOptionsMonitor { public LuceneDirectoryIndexOptions CurrentValue => new LuceneDirectoryIndexOptions(); diff --git a/src/Examine.Lucene/Directories/TempEnvFileSystemDirectoryFactory.cs b/src/Examine.Lucene/Directories/TempEnvFileSystemDirectoryFactory.cs index 9d0d420d..16973249 100644 --- a/src/Examine.Lucene/Directories/TempEnvFileSystemDirectoryFactory.cs +++ b/src/Examine.Lucene/Directories/TempEnvFileSystemDirectoryFactory.cs @@ -1,5 +1,6 @@ using System; using System.IO; +using Microsoft.Extensions.Options; namespace Examine.Lucene.Directories { @@ -12,11 +13,20 @@ namespace Examine.Lucene.Directories /// public class TempEnvFileSystemDirectoryFactory : FileSystemDirectoryFactory { + [Obsolete("Use ctor with all dependencies")] /// public TempEnvFileSystemDirectoryFactory( IApplicationIdentifier applicationIdentifier, ILockFactory lockFactory) - : base(new DirectoryInfo(GetTempPath(applicationIdentifier)), lockFactory) + : this(applicationIdentifier, lockFactory, new FakeLuceneDirectoryIndexOptionsOptionsMonitor()) + { + } + + public TempEnvFileSystemDirectoryFactory( + IApplicationIdentifier applicationIdentifier, + ILockFactory lockFactory, + IOptionsMonitor indexOptions) + : base(new DirectoryInfo(GetTempPath(applicationIdentifier)), lockFactory, indexOptions) { } diff --git a/src/Examine.Lucene/Examine.Lucene.csproj b/src/Examine.Lucene/Examine.Lucene.csproj index 0ddf6cad..7fe8b6a5 100644 --- a/src/Examine.Lucene/Examine.Lucene.csproj +++ b/src/Examine.Lucene/Examine.Lucene.csproj @@ -21,15 +21,18 @@ <_Parameter1>Examine.Test + + <_Parameter1>Examine.Benchmarks + - 4.8.0-beta00016 + 4.8.0-beta00017 - + all runtime; build; native; contentfiles; analyzers; buildtransitive @@ -38,7 +41,7 @@ 4.3.0 - 6.0.0 + 8.0.0 diff --git a/src/Examine.Lucene/ExamineReplicator.cs b/src/Examine.Lucene/ExamineReplicator.cs index 627abd60..7e05672e 100644 --- a/src/Examine.Lucene/ExamineReplicator.cs +++ b/src/Examine.Lucene/ExamineReplicator.cs @@ -81,6 +81,11 @@ public void ReplicateIndex() throw new InvalidOperationException("The destination directory is locked"); } + _logger.LogInformation( + "Replicating index from {SourceIndex} to {DestinationIndex}", + _sourceIndex.GetLuceneDirectory(), + _destinationDirectory); + IndexRevision rev; try { @@ -89,11 +94,17 @@ public void ReplicateIndex() catch (InvalidOperationException) { // will occur if there is nothing to sync + _logger.LogInformation("There was nothing to replicate to {DestinationIndex}", _destinationDirectory); return; } _replicator.Publish(rev); _localReplicationClient.UpdateNow(); + + _logger.LogInformation( + "Replication from index {SourceIndex} to {DestinationIndex} complete.", + _sourceIndex.GetLuceneDirectory(), + _destinationDirectory); } /// diff --git a/src/Examine.Lucene/ExamineTaxonomyReplicator.cs b/src/Examine.Lucene/ExamineTaxonomyReplicator.cs index 535e0e3c..16fd4904 100644 --- a/src/Examine.Lucene/ExamineTaxonomyReplicator.cs +++ b/src/Examine.Lucene/ExamineTaxonomyReplicator.cs @@ -95,7 +95,7 @@ public void ReplicateIndex() IndexAndTaxonomyRevision rev; try { - rev = new IndexAndTaxonomyRevision(_sourceIndex.IndexWriter.IndexWriter, _sourceIndex.TaxonomyWriter as SnapshotDirectoryTaxonomyWriter); + rev = new IndexAndTaxonomyRevision(_sourceIndex.IndexWriter.IndexWriter, _sourceIndex.SnapshotDirectoryTaxonomyIndexWriterFactory); } catch (InvalidOperationException) { @@ -159,7 +159,7 @@ private void SourceIndex_IndexCommitted(object? sender, EventArgs? e) _logger.LogDebug("{IndexName} committed", index.Name); } } - var rev = new IndexAndTaxonomyRevision(_sourceIndex.IndexWriter.IndexWriter, _sourceIndex.TaxonomyWriter as SnapshotDirectoryTaxonomyWriter); + var rev = new IndexAndTaxonomyRevision(_sourceIndex.IndexWriter.IndexWriter, _sourceIndex.SnapshotDirectoryTaxonomyIndexWriterFactory); _replicator.Publish(rev); } diff --git a/src/Examine.Lucene/Indexing/FullTextType.cs b/src/Examine.Lucene/Indexing/FullTextType.cs index afa50ef7..acd5e758 100644 --- a/src/Examine.Lucene/Indexing/FullTextType.cs +++ b/src/Examine.Lucene/Indexing/FullTextType.cs @@ -128,6 +128,7 @@ protected override void AddSingleValue(Document doc, object value) if (_sortable) { //to be sortable it cannot be analyzed so we have to make a different field + // TODO: Investigate https://lucene.apache.org/core/4_3_0/core/org/apache/lucene/document/SortedDocValuesField.html doc.Add(new StringField( ExamineFieldNames.SortedFieldNamePrefix + FieldName, str, diff --git a/src/Examine.Lucene/Indexing/GenericAnalyzerFieldValueType.cs b/src/Examine.Lucene/Indexing/GenericAnalyzerFieldValueType.cs index b6760e8f..6620ed74 100644 --- a/src/Examine.Lucene/Indexing/GenericAnalyzerFieldValueType.cs +++ b/src/Examine.Lucene/Indexing/GenericAnalyzerFieldValueType.cs @@ -41,6 +41,7 @@ protected override void AddSingleValue(Document doc, object value) if (_sortable) { //to be sortable it cannot be analyzed so we have to make a different field + // TODO: Investigate https://lucene.apache.org/core/4_3_0/core/org/apache/lucene/document/SortedDocValuesField.html doc.Add(new StringField( ExamineFieldNames.SortedFieldNamePrefix + FieldName, str, diff --git a/src/Examine.Lucene/Indexing/Int32Type.cs b/src/Examine.Lucene/Indexing/Int32Type.cs index c82cd023..87a7c343 100644 --- a/src/Examine.Lucene/Indexing/Int32Type.cs +++ b/src/Examine.Lucene/Indexing/Int32Type.cs @@ -80,8 +80,16 @@ protected override void AddSingleValue(Document doc, object value) return; } - doc.Add(new Int32Field(FieldName, parsedVal, Store ? Field.Store.YES : Field.Store.NO)); + // TODO: We can use this for better scoring/sorting performance + // https://stackoverflow.com/a/44953624/694494 + // https://lucene.apache.org/core/7_4_0/core/org/apache/lucene/document/NumericDocValuesField.html + //var dvField = new NumericDocValuesField(_docValuesFieldName, 0); + //dvField.SetInt32Value(parsedVal); + //doc.Add(dvField); + + doc.Add(new Int32Field(FieldName, parsedVal, Store ? Field.Store.YES : Field.Store.NO)); + if (_isFacetable && _taxonomyIndex) { doc.Add(new FacetField(FieldName, parsedVal.ToString())); @@ -93,9 +101,11 @@ protected override void AddSingleValue(Document doc, object value) doc.Add(new NumericDocValuesField(FieldName, parsedVal)); } } - /// - public override Query? GetQuery(string query) => !TryConvert(query, out int parsedVal) ? null : GetQuery(parsedVal, parsedVal); + public override Query GetQuery(string query) + { + return !TryConvert(query, out int parsedVal) ? null : GetQuery(parsedVal, parsedVal); + } /// public override Query GetQuery(int? lower, int? upper, bool lowerInclusive = true, bool upperInclusive = true) diff --git a/src/Examine.Lucene/Indexing/RawStringType.cs b/src/Examine.Lucene/Indexing/RawStringType.cs index b91fd30e..d966ff2d 100644 --- a/src/Examine.Lucene/Indexing/RawStringType.cs +++ b/src/Examine.Lucene/Indexing/RawStringType.cs @@ -35,6 +35,12 @@ protected override void AddSingleValue(Document doc, object value) switch (value) { case IIndexableField f: + // https://lucene.apache.org/core/4_3_0/core/org/apache/lucene/index/IndexableField.html + // BinaryDocValuesField, ByteDocValuesField, DerefBytesDocValuesField, DoubleDocValuesField, DoubleField, + // Field, FloatDocValuesField, FloatField, IntDocValuesField, IntField, LongDocValuesField, LongField, + // NumericDocValuesField, PackedLongDocValuesField, ShortDocValuesField, SortedBytesDocValuesField, + // SortedDocValuesField, SortedSetDocValuesField, StoredField, StraightBytesDocValuesField, StringField, TextField + // https://solr.apache.org/guide/6_6/docvalues.html doc.Add(f); break; case TokenStream ts: diff --git a/src/Examine.Lucene/LuceneIndexOptions.cs b/src/Examine.Lucene/LuceneIndexOptions.cs index 5c136c27..4847c8bb 100644 --- a/src/Examine.Lucene/LuceneIndexOptions.cs +++ b/src/Examine.Lucene/LuceneIndexOptions.cs @@ -1,6 +1,4 @@ -using System; using System.Collections.Generic; -using System.Text; using Lucene.Net.Analysis; using Lucene.Net.Analysis.Standard; using Lucene.Net.Facet; @@ -13,6 +11,15 @@ namespace Examine.Lucene /// public class LuceneIndexOptions : IndexOptions { + public bool NrtEnabled { get; set; } = true; + + public double NrtTargetMaxStaleSec { get; set; } = 60.0; + + public double NrtTargetMinStaleSec { get; set; } = 1.0; + + public double NrtCacheMaxMergeSizeMB { get; set; } = 5.0; + + public double NrtCacheMaxCachedMB { get; set; } = 60.0; /// /// THe index deletion policy /// diff --git a/src/Examine.Lucene/Providers/LuceneIndex.cs b/src/Examine.Lucene/Providers/LuceneIndex.cs index 5a25b3c8..18a73595 100644 --- a/src/Examine.Lucene/Providers/LuceneIndex.cs +++ b/src/Examine.Lucene/Providers/LuceneIndex.cs @@ -4,23 +4,29 @@ using System.Diagnostics; using System.IO; using System.Linq; +using System.Runtime.Intrinsics.X86; using System.Threading; using System.Threading.Tasks; +using Examine.Lucene.Directories; +using Examine.Lucene.Indexing; using Lucene.Net.Analysis; using Lucene.Net.Analysis.Miscellaneous; +using Lucene.Net.Analysis.Standard; using Lucene.Net.Documents; using Lucene.Net.Index; using Lucene.Net.Search; using Microsoft.Extensions.Logging; -using Directory = Lucene.Net.Store.Directory; -using static Lucene.Net.Index.IndexWriter; using Microsoft.Extensions.Options; +using static Lucene.Net.Index.IndexWriter; +using static Lucene.Net.Store.Lock; +using Directory = Lucene.Net.Store.Directory; using Lucene.Net.Analysis.Standard; using Examine.Lucene.Indexing; using Examine.Lucene.Directories; using Lucene.Net.Facet.Taxonomy; using Lucene.Net.Facet.Taxonomy.Directory; using static Lucene.Net.Replicator.IndexAndTaxonomyRevision; +using Lucene.Net.Replicator; namespace Examine.Lucene.Providers { @@ -157,7 +163,7 @@ public LuceneIndex( /// /// Constructor to allow for creating an indexer at runtime - using NRT /// - internal LuceneIndex( + public LuceneIndex( ILoggerFactory loggerFactory, string name, IOptionsMonitor indexOptions, @@ -170,19 +176,20 @@ internal LuceneIndex( #endregion + private static readonly string[] s_possibleSuffixes = new[] { "Index", "Indexer" }; private readonly LuceneIndexOptions _options; private PerFieldAnalyzerWrapper? _fieldAnalyzer; private ControlledRealTimeReopenThread? _nrtReopenThread; private readonly ILogger _logger; - private readonly Lazy? _directory; -#if FULLDEBUG - private FileStream? _logOutput; -#endif + private readonly Lazy _directory; + private readonly FileStream _logOutput; private bool _disposedValue; private readonly IIndexCommiter _committer; private volatile TrackingIndexWriter? _writer; + private SnapshotDirectoryTaxonomyIndexWriterFactory _snapshotDirectoryTaxonomyIndexWriterFactory; + private int _activeWrites = 0; /// @@ -255,20 +262,10 @@ internal LuceneIndex( /// /// Gets the field ananlyzer /// - public PerFieldAnalyzerWrapper FieldAnalyzer - { - get - { - if (DefaultAnalyzer is PerFieldAnalyzerWrapper pfa) - { - return _fieldAnalyzer ??= pfa; - } - else - { - return _fieldAnalyzer ??= _fieldValueTypeCollection.Value.Analyzer; - } - } - } + public PerFieldAnalyzerWrapper FieldAnalyzer => (PerFieldAnalyzerWrapper)(_fieldAnalyzer ??= + (DefaultAnalyzer is PerFieldAnalyzerWrapper pfa) + ? pfa + : _fieldValueTypeCollection.Value.Analyzer); /// @@ -289,9 +286,9 @@ public PerFieldAnalyzerWrapper FieldAnalyzer [EditorBrowsable(EditorBrowsableState.Never)] protected bool IsCancellationRequested => _cancellationToken.IsCancellationRequested; -#endregion + #endregion -#region Events + #region Events /// /// Occurs when [document writing]. @@ -337,9 +334,9 @@ protected override void OnIndexingError(IndexingErrorEventArgs e) protected virtual void OnDocumentWriting(DocumentWritingEventArgs docArgs) => DocumentWriting?.Invoke(this, docArgs); -#endregion + #endregion -#region Provider implementation + #region Provider implementation /// protected override void PerformIndexItems(IEnumerable values, Action onComplete) @@ -543,6 +540,7 @@ private void CreateNewIndex(Directory? dir) //unlock it! Unlock(dir); } + //create the writer (this will overwrite old index files) var writerConfig = new IndexWriterConfig(LuceneInfo.CurrentVersion, FieldAnalyzer) { @@ -550,8 +548,18 @@ private void CreateNewIndex(Directory? dir) MergeScheduler = new ErrorLoggingConcurrentMergeScheduler(Name, (s, e) => OnIndexingError(new IndexingErrorEventArgs(this, s, "-1", e))) }; + + // TODO: With NRT, we should apparently use this but there is no real implementation of it!? + // https://stackoverflow.com/questions/12271614/lucene-net-indexwriter-setmergedsegmentwarmer + //writerConfig.SetMergedSegmentWarmer(new SimpleMergedSegmentWarmer()) + writer = new IndexWriter(dir, writerConfig); + // Required to remove old index files which can be problematic + // if they remain in the index folder when replication is attempted. + writer.Commit(); + writer.WaitForMerges(); + } catch (Exception ex) { @@ -697,9 +705,9 @@ private int PerformDeleteFromIndexInternal(IEnumerable itemIds, Cancella return indexedNodes; } -#endregion + #endregion -#region Protected + #region Protected @@ -908,7 +916,7 @@ protected virtual void AddDocument(Document doc, ValueSet valueSet) var indexTypeValueType = FieldValueTypeCollection.GetValueType(ExamineFieldNames.ItemTypeFieldName, FieldValueTypeCollection.ValueTypeFactories.GetRequiredFactory(FieldDefinitionTypes.InvariantCultureIgnoreCase)); indexTypeValueType.AddValue(doc, valueSet.ItemType); - if(valueSet.Values != null) + if (valueSet.Values != null) { foreach (var field in valueSet.Values) { @@ -960,14 +968,16 @@ protected virtual void AddDocument(Document doc, ValueSet valueSet) } // TODO: try/catch with OutOfMemoryException (see docs on UpdateDocument), though i've never seen this in real life + _latestGen = UpdateLuceneDocument(new Term(ExamineFieldNames.ItemIdFieldName, valueSet.Id), doc); + } + + protected virtual long? UpdateLuceneDocument(Term term, Document doc) + { if (_options.UseTaxonomyIndex) { - _latestGen = IndexWriter.UpdateDocument(new Term(ExamineFieldNames.ItemIdFieldName, valueSet.Id), _options.FacetsConfig.Build(TaxonomyWriter, doc)); - } - else - { - _latestGen = IndexWriter.UpdateDocument(new Term(ExamineFieldNames.ItemIdFieldName, valueSet.Id), _options.FacetsConfig.Build(doc)); + return IndexWriter.UpdateDocument(term, _options.FacetsConfig.Build(TaxonomyWriter, doc)); } + return IndexWriter.UpdateDocument(term, _options.FacetsConfig.Build(doc)); } /// @@ -1213,9 +1223,6 @@ protected virtual IndexWriter CreateIndexWriter(Directory? d) /// See example: http://www.lucenetutorial.com/lucene-nrt-hello-world.html /// http://blog.mikemccandless.com/2011/11/near-real-time-readers-with-lucenes.html /// https://stackoverflow.com/questions/17993960/lucene-4-4-0-new-controlledrealtimereopenthread-sample-usage - /// TODO: Do we need/want to use the ControlledRealTimeReopenThread? Else according to mikecandles above in comments - /// we can probably just get away with using MaybeReopen each time we search. Though there are comments in the lucene - /// code to avoid that and do that on a background thread, which is exactly what ControlledRealTimeReopenThread already does. /// public TrackingIndexWriter IndexWriter { @@ -1285,9 +1292,33 @@ protected virtual DirectoryTaxonomyWriter CreateTaxonomyWriter(Directory? d) { throw new ArgumentNullException(nameof(d)); } - var taxonomyWriter = new SnapshotDirectoryTaxonomyWriter(d); + return new DirectoryTaxonomyWriter(SnapshotDirectoryTaxonomyIndexWriterFactory, d); + } + + /// + /// Gets the taxonomy writer for the current index + /// + public SnapshotDirectoryTaxonomyIndexWriterFactory SnapshotDirectoryTaxonomyIndexWriterFactory + { + get + { + EnsureIndex(false); + + if (_snapshotDirectoryTaxonomyIndexWriterFactory == null) + { + Monitor.Enter(_writerLocker); + try + { + _snapshotDirectoryTaxonomyIndexWriterFactory = new SnapshotDirectoryTaxonomyIndexWriterFactory(); + } + finally + { + Monitor.Exit(_writerLocker); + } + } - return taxonomyWriter; + return _snapshotDirectoryTaxonomyIndexWriterFactory ?? throw new NullReferenceException(nameof(_snapshotDirectoryTaxonomyIndexWriterFactory)); + } } /// @@ -1319,40 +1350,71 @@ public DirectoryTaxonomyWriter TaxonomyWriter #endregion -#region Private + #region Private private LuceneSearcher CreateSearcher() { - var possibleSuffixes = new[] { "Index", "Indexer" }; var name = Name; - foreach (var suffix in possibleSuffixes) + foreach (var suffix in s_possibleSuffixes) { //trim the "Indexer" / "Index" suffix if it exists if (!name.EndsWith(suffix)) - { + { continue; } -#pragma warning disable IDE0057 // Use range operator - name = name.Substring(0, name.LastIndexOf(suffix, StringComparison.Ordinal)); -#pragma warning restore IDE0057 // Use range operator + + name = name[..name.LastIndexOf(suffix, StringComparison.Ordinal)]; } var writer = IndexWriter; - var searcherManager = new SearcherManager(writer.IndexWriter, true, new SearcherFactory()); + + // Create an IndexSearcher ReferenceManager to safely share IndexSearcher instances across + // multiple threads + var searcherManager = new SearcherManager( + writer.IndexWriter, + + // TODO: Apply All Deletes? Will be faster if this is false, https://blog.mikemccandless.com/2011/11/near-real-time-readers-with-lucenes.html + // BUT ... to do that we would need to fulfill this requirement: + // "yet during searching you have some way to ignore the old versions" + // Without fulfilling that requirement our Index_Read_And_Write_Ensure_No_Errors_In_Async tests fail when using + // non in-memory directories because it will return more results than what is actually in the index. + true, + + new SearcherFactory()); + searcherManager.AddListener(this); - _nrtReopenThread = new ControlledRealTimeReopenThread(writer, searcherManager, 5.0, 1.0) - { - Name = $"{Name} NRT Reopen Thread", - IsBackground = true - }; + if (_options.NrtEnabled) + { + // Create the ControlledRealTimeReopenThread that reopens the index periodically having into + // account the changes made to the index and tracked by the TrackingIndexWriter instance + // The index is refreshed every XX sec when nobody is waiting + // and every XX sec whenever is someone waiting (see search method) + // (see http://lucene.apache.org/core/4_3_0/core/org/apache/lucene/search/NRTManagerReopenThread.html) + _nrtReopenThread = new ControlledRealTimeReopenThread( + writer, + searcherManager, + _options.NrtTargetMaxStaleSec, // when there is nobody waiting + _options.NrtTargetMinStaleSec) // when there is someone waiting + { + Name = $"{Name} NRT Reopen Thread", + IsBackground = true + }; - _nrtReopenThread.Start(); + _nrtReopenThread.Start(); + // wait for most recent changes when first creating the searcher + WaitForChanges(); + } + else + { + // wait for most recent changes when first creating the searcher + searcherManager.MaybeRefreshBlocking(); + } // wait for most recent changes when first creating the searcher WaitForChanges(); - return new LuceneSearcher(name + "Searcher", searcherManager, FieldAnalyzer, FieldValueTypeCollection, _options.FacetsConfig); + return new LuceneSearcher(name + "Searcher", searcherManager, FieldAnalyzer, FieldValueTypeCollection, _options.NrtEnabled, _options.FacetsConfig); } private LuceneTaxonomySearcher CreateTaxonomySearcher() @@ -1363,7 +1425,7 @@ private LuceneTaxonomySearcher CreateTaxonomySearcher() { //trim the "Indexer" / "Index" suffix if it exists if (!name.EndsWith(suffix)) - { + { continue; } #pragma warning disable IDE0057 // Use range operator @@ -1382,11 +1444,10 @@ private LuceneTaxonomySearcher CreateTaxonomySearcher() }; _taxonomyNrtReopenThread.Start(); - // wait for most recent changes when first creating the searcher WaitForChanges(); - return new LuceneTaxonomySearcher(name + "Searcher", searcherManager, FieldAnalyzer, FieldValueTypeCollection, _options.FacetsConfig); + return new LuceneTaxonomySearcher(name + "Searcher", searcherManager, FieldAnalyzer, FieldValueTypeCollection, _options.NrtEnabled, _options.FacetsConfig); } /// @@ -1420,6 +1481,13 @@ private bool ProcessIndexQueueItem(IndexOperation op) return false; } + // TODO: We can re-use the same document object to save a lot of GC! + // https://cwiki.apache.org/confluence/display/lucene/ImproveIndexingSpeed + // Re-use Document and Field instances + // As of Lucene 2.3 there are new setValue(...) methods that allow you to change the value of a Field.This allows you to re - use a single Field instance across many added documents, which can save substantial GC cost. + // It's best to create a single Document instance, then add multiple Field instances to it, but hold onto these Field instances and re-use them by changing their values for each added document. For example you might have an idField, bodyField, nameField, storedField1, etc. After the document is added, you then directly change the Field values (idField.setValue(...), etc), and then re-add your Document instance. + // Note that you cannot re - use a single Field instance within a Document, and, you should not change a Field's value until the Document containing that Field has been added to the index. See Field for details. + var d = new Document(); AddDocument(d, indexingNodeDataArgs.ValueSet); @@ -1504,7 +1572,7 @@ private void QueueTask(Func op, Action onComplete, } } -#endregion + #endregion /// /// Blocks the calling thread until the internal searcher can see latest documents @@ -1517,10 +1585,13 @@ public void WaitForChanges() { if (_latestGen.HasValue && !_disposedValue && !_cancellationToken.IsCancellationRequested) { - var found = _nrtReopenThread?.WaitForGeneration(_latestGen.Value, 5000); - if (_logger.IsEnabled(LogLevel.Debug)) + if (_options.NrtEnabled) { - _logger.LogDebug("{IndexName} WaitForChanges returned {GenerationFound}", Name, found); + var found = _nrtReopenThread?.WaitForGeneration(_latestGen.Value, 5000); + if (_logger.IsEnabled(LogLevel.Debug)) + { + _logger.LogDebug("{IndexName} WaitForChanges returned {GenerationFound}", Name, found); + } } } } @@ -1648,8 +1719,6 @@ protected virtual void Dispose(bool disposing) { OnIndexingError(new IndexingErrorEventArgs(this, "Error closing the index", "-1", e)); } - - } if (_taxonomyWriter != null) { @@ -1669,6 +1738,11 @@ protected virtual void Dispose(bool disposing) #if FULLDEBUG _logOutput?.Close(); #endif + _fieldAnalyzer?.Dispose(); + if (!object.ReferenceEquals(_fieldAnalyzer, DefaultAnalyzer)) + { + DefaultAnalyzer?.Dispose(); + } } _disposedValue = true; } @@ -1687,7 +1761,5 @@ void ReferenceManager.IRefreshListener.AfterRefresh(bool didRefresh) } } } - - } diff --git a/src/Examine.Lucene/Providers/LuceneSearcher.cs b/src/Examine.Lucene/Providers/LuceneSearcher.cs index c4010d5a..e4be5c6a 100644 --- a/src/Examine.Lucene/Providers/LuceneSearcher.cs +++ b/src/Examine.Lucene/Providers/LuceneSearcher.cs @@ -14,9 +14,11 @@ public class LuceneSearcher : BaseLuceneSearcher, IDisposable { private readonly SearcherManager _searcherManager; private readonly FieldValueTypeCollection _fieldValueTypeCollection; + private readonly bool _isNrt; private bool _disposedValue; + private volatile ISearchContext _searchContext; - + /// /// Constructor allowing for creating a NRT instance based on a given writer /// @@ -24,13 +26,29 @@ public class LuceneSearcher : BaseLuceneSearcher, IDisposable /// /// /// - [Obsolete("To remove in Examine V5")] + [Obsolete("Use ctor with all dependencies")] public LuceneSearcher(string name, SearcherManager searcherManager, Analyzer analyzer, FieldValueTypeCollection fieldValueTypeCollection) : base(name, analyzer) { _searcherManager = searcherManager; _fieldValueTypeCollection = fieldValueTypeCollection; } + + /// + /// Constructor allowing for creating a NRT instance based on a given writer + /// + /// + /// + /// + /// + /// + public LuceneSearcher(string name, SearcherManager searcherManager, Analyzer analyzer, FieldValueTypeCollection fieldValueTypeCollection, bool isNrt) + : base(name, analyzer) + { + _searcherManager = searcherManager; + _fieldValueTypeCollection = fieldValueTypeCollection; + _isNrt = isNrt; + } /// /// Constructor allowing for creating a NRT instance based on a given writer @@ -40,16 +58,26 @@ public LuceneSearcher(string name, SearcherManager searcherManager, Analyzer ana /// /// /// - public LuceneSearcher(string name, SearcherManager searcherManager, Analyzer analyzer, FieldValueTypeCollection fieldValueTypeCollection, FacetsConfig facetsConfig) + public LuceneSearcher(string name, SearcherManager searcherManager, Analyzer analyzer, FieldValueTypeCollection fieldValueTypeCollection, bool isNrt, FacetsConfig facetsConfig) : base(name, analyzer, facetsConfig) { _searcherManager = searcherManager; _fieldValueTypeCollection = fieldValueTypeCollection; + _isNrt = isNrt; } /// public override ISearchContext GetSearchContext() - => new SearchContext(_searcherManager, _fieldValueTypeCollection); + { + // Don't create a new search context unless something has changed + var isCurrent = _searcherManager.IsSearcherCurrent(); + if (_searchContext is null || !isCurrent) + { + _searchContext = new SearchContext(_searcherManager, _fieldValueTypeCollection, _isNrt); + } + + return _searchContext; + } /// [Obsolete("To remove in Examine v5")] diff --git a/src/Examine.Lucene/Providers/LuceneTaxonomySearcher.cs b/src/Examine.Lucene/Providers/LuceneTaxonomySearcher.cs index 0ef607a0..599749d4 100644 --- a/src/Examine.Lucene/Providers/LuceneTaxonomySearcher.cs +++ b/src/Examine.Lucene/Providers/LuceneTaxonomySearcher.cs @@ -4,6 +4,8 @@ using Lucene.Net.Analysis; using Lucene.Net.Facet; using Lucene.Net.Facet.Taxonomy; +using Lucene.Net.Index; +using Lucene.Net.Search; namespace Examine.Lucene.Providers { @@ -14,7 +16,9 @@ public class LuceneTaxonomySearcher : BaseLuceneSearcher, IDisposable, ILuceneTa { private readonly SearcherTaxonomyManager _searcherManager; private readonly FieldValueTypeCollection _fieldValueTypeCollection; + private readonly bool _isNrt; private bool _disposedValue; + private volatile ITaxonomySearchContext _searchContext; /// /// Constructor allowing for creating a NRT instance based on a given writer @@ -24,23 +28,42 @@ public class LuceneTaxonomySearcher : BaseLuceneSearcher, IDisposable, ILuceneTa /// /// /// - public LuceneTaxonomySearcher(string name, SearcherTaxonomyManager searcherManager, Analyzer analyzer, FieldValueTypeCollection fieldValueTypeCollection, FacetsConfig facetsConfig) + public LuceneTaxonomySearcher(string name, SearcherTaxonomyManager searcherManager, Analyzer analyzer, FieldValueTypeCollection fieldValueTypeCollection, bool isNrt, FacetsConfig facetsConfig) : base(name, analyzer, facetsConfig) { _searcherManager = searcherManager; _fieldValueTypeCollection = fieldValueTypeCollection; + _isNrt = isNrt; } /// public override ISearchContext GetSearchContext() - => new TaxonomySearchContext(_searcherManager, _fieldValueTypeCollection); + { + // Don't create a new search context unless something has changed + var isCurrent = IsSearcherCurrent(_searcherManager); + if (_searchContext is null || !isCurrent) + { + _searchContext = new TaxonomySearchContext(_searcherManager, _fieldValueTypeCollection, _isNrt); + } + + return _searchContext; + } /// /// Gets the Taxonomy SearchContext /// /// public virtual ITaxonomySearchContext GetTaxonomySearchContext() - => new TaxonomySearchContext(_searcherManager, _fieldValueTypeCollection); + { + // Don't create a new search context unless something has changed + var isCurrent = IsSearcherCurrent(_searcherManager); + if (_searchContext is null || !isCurrent) + { + _searchContext = new TaxonomySearchContext(_searcherManager, _fieldValueTypeCollection, _isNrt); + } + + return _searchContext; + } /// protected override void Dispose(bool disposing) @@ -83,6 +106,29 @@ public IFacetLabel GetPath(int ordinal) var examineFacetLabel = new LuceneFacetLabel(facetLabel); return examineFacetLabel; } + + // + // Summary: + // Returns true if no changes have occured since this searcher ie. reader was opened, + // otherwise false. + private bool IsSearcherCurrent(SearcherTaxonomyManager searcherTaxonomyManager) + { + var indexSearcher = searcherTaxonomyManager.Acquire(); + try + { + IndexReader indexReader = indexSearcher.Searcher.IndexReader; + //if (Debugging.AssertsEnabled) + //{ + // Debugging.Assert(indexReader is DirectoryReader, "searcher's IndexReader should be a DirectoryReader, but got {0}", indexReader); + //} + + return ((DirectoryReader)indexReader).IsCurrent(); + } + finally + { + searcherTaxonomyManager.Release(indexSearcher); + } + } } } diff --git a/src/Examine.Lucene/PublicAPI.Unshipped.txt b/src/Examine.Lucene/PublicAPI.Unshipped.txt index 64b7da0e..78f245db 100644 --- a/src/Examine.Lucene/PublicAPI.Unshipped.txt +++ b/src/Examine.Lucene/PublicAPI.Unshipped.txt @@ -40,6 +40,7 @@ Examine.Lucene.Providers.ILuceneTaxonomySearcher.GetPath(int ordinal) -> Examine Examine.Lucene.Providers.LuceneIndex.GetLuceneTaxonomyDirectory() -> Lucene.Net.Store.Directory? Examine.Lucene.Providers.LuceneIndex.LuceneIndex(Microsoft.Extensions.Logging.ILoggerFactory! loggerFactory, string! name, Microsoft.Extensions.Options.IOptionsMonitor! indexOptions, System.Func! indexCommiterFactory, Lucene.Net.Index.IndexWriter? writer = null) -> void Examine.Lucene.Providers.LuceneIndex.RaiseIndexCommited(object! sender, System.EventArgs! e) -> void +Examine.Lucene.Providers.LuceneIndex.SnapshotDirectoryTaxonomyIndexWriterFactory.get -> Lucene.Net.Replicator.SnapshotDirectoryTaxonomyIndexWriterFactory! Examine.Lucene.Providers.LuceneIndex.TaxonomyWriter.get -> Lucene.Net.Facet.Taxonomy.Directory.DirectoryTaxonomyWriter! Examine.Lucene.Providers.LuceneSearcher.LuceneSearcher(string! name, Lucene.Net.Search.SearcherManager! searcherManager, Lucene.Net.Analysis.Analyzer! analyzer, Examine.Lucene.FieldValueTypeCollection! fieldValueTypeCollection, Lucene.Net.Facet.FacetsConfig! facetsConfig) -> void Examine.Lucene.Providers.LuceneTaxonomySearcher @@ -175,3 +176,26 @@ virtual Examine.Lucene.Providers.LuceneIndex.TaxonomySearcher.get -> Examine.Luc virtual Examine.Lucene.Providers.LuceneTaxonomySearcher.GetTaxonomySearchContext() -> Examine.Lucene.Search.ITaxonomySearchContext! virtual Examine.Lucene.Search.LuceneFacetExtractionContext.GetFacetCounts(string! facetIndexFieldName, bool isTaxonomyIndexed) -> Lucene.Net.Facet.Facets! virtual Examine.Lucene.Search.TaxonomySearcherReference.Dispose(bool disposing) -> void +Examine.Lucene.Directories.FileSystemDirectoryFactory.FileSystemDirectoryFactory(System.IO.DirectoryInfo baseDir, Examine.Lucene.Directories.ILockFactory lockFactory, Microsoft.Extensions.Options.IOptionsMonitor indexOptions) -> void +Examine.Lucene.Directories.FileSystemDirectoryFactory.IndexOptions.get -> Microsoft.Extensions.Options.IOptionsMonitor +Examine.Lucene.Directories.SyncedFileSystemDirectoryFactory.SyncedFileSystemDirectoryFactory(System.IO.DirectoryInfo localDir, System.IO.DirectoryInfo mainDir, Examine.Lucene.Directories.ILockFactory lockFactory, Microsoft.Extensions.Logging.ILoggerFactory loggerFactory, bool tryFixMainIndexIfCorrupt) -> void +Examine.Lucene.Directories.SyncedFileSystemDirectoryFactory.SyncedFileSystemDirectoryFactory(System.IO.DirectoryInfo localDir, System.IO.DirectoryInfo mainDir, Examine.Lucene.Directories.ILockFactory lockFactory, Microsoft.Extensions.Logging.ILoggerFactory loggerFactory, Microsoft.Extensions.Options.IOptionsMonitor indexOptions) -> void +Examine.Lucene.Directories.SyncedFileSystemDirectoryFactory.SyncedFileSystemDirectoryFactory(System.IO.DirectoryInfo localDir, System.IO.DirectoryInfo mainDir, Examine.Lucene.Directories.ILockFactory lockFactory, Microsoft.Extensions.Logging.ILoggerFactory loggerFactory, Microsoft.Extensions.Options.IOptionsMonitor indexOptions, bool tryFixMainIndexIfCorrupt) -> void +Examine.Lucene.Directories.TempEnvFileSystemDirectoryFactory.TempEnvFileSystemDirectoryFactory(Examine.Lucene.Directories.IApplicationIdentifier applicationIdentifier, Examine.Lucene.Directories.ILockFactory lockFactory, Microsoft.Extensions.Options.IOptionsMonitor indexOptions) -> void +Examine.Lucene.LuceneIndexOptions.NrtCacheMaxCachedMB.get -> double +Examine.Lucene.LuceneIndexOptions.NrtCacheMaxCachedMB.set -> void +Examine.Lucene.LuceneIndexOptions.NrtCacheMaxMergeSizeMB.get -> double +Examine.Lucene.LuceneIndexOptions.NrtCacheMaxMergeSizeMB.set -> void +Examine.Lucene.LuceneIndexOptions.NrtEnabled.get -> bool +Examine.Lucene.LuceneIndexOptions.NrtEnabled.set -> void +Examine.Lucene.LuceneIndexOptions.NrtTargetMaxStaleSec.get -> double +Examine.Lucene.LuceneIndexOptions.NrtTargetMaxStaleSec.set -> void +Examine.Lucene.LuceneIndexOptions.NrtTargetMinStaleSec.get -> double +Examine.Lucene.LuceneIndexOptions.NrtTargetMinStaleSec.set -> void +Examine.Lucene.Providers.LuceneSearcher.LuceneSearcher(string name, Lucene.Net.Search.SearcherManager searcherManager, Lucene.Net.Analysis.Analyzer analyzer, Examine.Lucene.FieldValueTypeCollection fieldValueTypeCollection, bool isNrt) -> void +Examine.Lucene.Providers.LuceneSearcher.MaybeRefresh() -> bool +Examine.Lucene.Providers.LuceneSearcher.MaybeRefreshBlocking() -> void +Examine.Lucene.Search.LuceneSearchResults.LuceneSearchResults(System.Collections.Generic.IReadOnlyCollection results, int totalItemCount, float maxScore, Examine.Lucene.Search.SearchAfterOptions searchAfterOptions) -> void +Examine.Lucene.Search.SearchContext.SearchContext(Lucene.Net.Search.SearcherManager searcherManager, Examine.Lucene.FieldValueTypeCollection fieldValueTypeCollection, bool isNrt) -> void +Examine.Lucene.Search.SearcherReference.SearcherReference() -> void +virtual Examine.Lucene.Providers.LuceneIndex.UpdateLuceneDocument(Lucene.Net.Index.Term term, Lucene.Net.Documents.Document doc) -> long? \ No newline at end of file diff --git a/src/Examine.Lucene/Search/CustomMultiFieldQueryParser.cs b/src/Examine.Lucene/Search/CustomMultiFieldQueryParser.cs index 0a680ef7..de5f181a 100644 --- a/src/Examine.Lucene/Search/CustomMultiFieldQueryParser.cs +++ b/src/Examine.Lucene/Search/CustomMultiFieldQueryParser.cs @@ -14,6 +14,8 @@ namespace Examine.Lucene.Search /// public class CustomMultiFieldQueryParser : MultiFieldQueryParser { + private QueryParser _keywordAnalyzerQueryParser; + /// public CustomMultiFieldQueryParser(LuceneVersion matchVersion, string[] fields, Analyzer analyzer) : base(matchVersion, fields, analyzer) @@ -21,7 +23,8 @@ public CustomMultiFieldQueryParser(LuceneVersion matchVersion, string[] fields, SearchableFields = fields; } - internal static QueryParser KeywordAnalyzerQueryParser { get; } = new QueryParser(LuceneInfo.CurrentVersion, string.Empty, new KeywordAnalyzer()); + // NOTE: Query parsers are not thread safe so we need to create a new instance here + internal QueryParser KeywordAnalyzerQueryParser => _keywordAnalyzerQueryParser ??= new QueryParser(LuceneInfo.CurrentVersion, string.Empty, new KeywordAnalyzer()); /// /// Fields that are searchable by the query parser diff --git a/src/Examine.Lucene/Search/LuceneSearchExecutor.cs b/src/Examine.Lucene/Search/LuceneSearchExecutor.cs index 053a8e89..d578b12b 100644 --- a/src/Examine.Lucene/Search/LuceneSearchExecutor.cs +++ b/src/Examine.Lucene/Search/LuceneSearchExecutor.cs @@ -9,6 +9,7 @@ using Lucene.Net.Facet.Taxonomy; using Lucene.Net.Index; using Lucene.Net.Search; +using Lucene.Net.Util; using LuceneFacetResult = Lucene.Net.Facet.FacetResult; namespace Examine.Lucene.Search @@ -42,21 +43,6 @@ internal LuceneSearchExecutor(QueryOptions? options, Query query, IEnumerable /// Executes a query /// @@ -92,17 +78,17 @@ public ISearchResults Execute() } } - var maxResults = Math.Min((_options.Skip + 1) * _options.Take, MaxDoc); - maxResults = maxResults >= 1 ? maxResults : QueryOptions.DefaultMaxResults; - int numHits = maxResults; - var sortFields = _sortField as SortField[] ?? _sortField.ToArray(); - Sort? sort = null; - FieldDoc? scoreDocAfter = null; - Filter? filter = null; + Sort sort = null; + FieldDoc scoreDocAfter = null; + Filter filter = null; using (var searcher = _searchContext.GetSearcher()) { + var maxResults = Math.Min((_options.Skip + 1) * _options.Take, QueryOptions.AbsoluteMaxResults); + maxResults = maxResults >= 1 ? maxResults : QueryOptions.DefaultMaxResults; + int numHits = maxResults; + if (sortFields.Length > 0) { sort = new Sort(sortFields); @@ -178,10 +164,16 @@ public ISearchResults Execute() var totalItemCount = topDocs.TotalHits; - var results = new List(topDocs.ScoreDocs.Length); - for (int i = 0; i < topDocs.ScoreDocs.Length; i++) + var results = new List(topDocs.ScoreDocs.Length); + + // TODO: Order by Doc Id for improved perf?? + // Our benchmarks show this is isn't a significant performance improvement, + // but they could be wrong. Sorting by DocId here could only be done if there + // are no sort options. + // See https://cwiki.apache.org/confluence/display/lucene/ImproveSearchingSpeed + foreach (var scoreDoc in topDocs.ScoreDocs) { - var result = GetSearchResult(i, topDocs, searcher.IndexSearcher); + var result = GetSearchResult(scoreDoc, topDocs, searcher.IndexSearcher); if (result != null) { results.Add(result); @@ -216,7 +208,7 @@ private static FieldDoc GetScoreDocAfter(SearchAfterOptions searchAfterOptions) return scoreDocAfter; } - private static SearchAfterOptions? GetSearchAfterOptions(TopDocs topDocs) + internal static SearchAfterOptions GetSearchAfterOptions(TopDocs topDocs) { if (topDocs.TotalHits > 0) { @@ -229,6 +221,7 @@ private static FieldDoc GetScoreDocAfter(SearchAfterOptions searchAfterOptions) return new SearchAfterOptions(scoreDoc.Doc, scoreDoc.Score, new object[0], scoreDoc.ShardIndex); } } + return null; } @@ -266,18 +259,8 @@ private IReadOnlyDictionary ExtractFacets(FacetsCollector? return facets; } - private LuceneSearchResult? GetSearchResult(int index, TopDocs topDocs, IndexSearcher luceneSearcher) + private LuceneSearchResult GetSearchResult(ScoreDoc scoreDoc, TopDocs topDocs, IndexSearcher luceneSearcher) { - // I have seen IndexOutOfRangeException here which is strange as this is only called in one place - // and from that one place "i" is always less than the size of this collection. - // but we'll error check here anyways - if (topDocs.ScoreDocs.Length < index) - { - return null; - } - - var scoreDoc = topDocs.ScoreDocs[index]; - var docId = scoreDoc.Doc; Document doc; if (_fieldsToLoad != null) @@ -288,6 +271,7 @@ private IReadOnlyDictionary ExtractFacets(FacetsCollector? { doc = luceneSearcher.Doc(docId); } + var score = scoreDoc.Score; var shardIndex = scoreDoc.ShardIndex; var result = CreateSearchResult(doc, score, shardIndex); @@ -301,7 +285,7 @@ private IReadOnlyDictionary ExtractFacets(FacetsCollector? /// The score. /// /// A populated search result object - private LuceneSearchResult CreateSearchResult(Document doc, float score, int shardIndex) + internal static LuceneSearchResult CreateSearchResult(Document doc, float score, int shardIndex) { var id = doc.Get("id"); @@ -312,12 +296,12 @@ private LuceneSearchResult CreateSearchResult(Document doc, float score, int sha var searchResult = new LuceneSearchResult(id, score, () => { - //we can use lucene to find out the fields which have been stored for this particular document + //we can use Lucene to find out the fields which have been stored for this particular document var fields = doc.Fields; var resultVals = new Dictionary>(); - foreach (var field in fields.Cast()) + foreach (var field in fields) { var fieldName = field.Name; var values = doc.GetValues(fieldName); diff --git a/src/Examine.Lucene/Search/LuceneSearchQuery.cs b/src/Examine.Lucene/Search/LuceneSearchQuery.cs index b1a787a3..57d79cfe 100644 --- a/src/Examine.Lucene/Search/LuceneSearchQuery.cs +++ b/src/Examine.Lucene/Search/LuceneSearchQuery.cs @@ -245,13 +245,15 @@ private ISearchResults Search(QueryOptions? options) return EmptySearchResults.Instance; } + // TODO: Use a Filter for category, not a query + // https://cwiki.apache.org/confluence/display/lucene/ImproveSearchingSpeed query = new BooleanQuery { // prefix the category field query as a must { GetFieldInternalQuery(ExamineFieldNames.CategoryFieldName, new ExamineValue(Examineness.Explicit, Category), true), Occur.MUST } }; - // add the ones that we're already existing + // add the ones that were already existing foreach (var c in existingClauses) { query.Add(c); diff --git a/src/Examine.Lucene/Search/LuceneSearchQueryBase.cs b/src/Examine.Lucene/Search/LuceneSearchQueryBase.cs index e8960684..fb7321a9 100644 --- a/src/Examine.Lucene/Search/LuceneSearchQueryBase.cs +++ b/src/Examine.Lucene/Search/LuceneSearchQueryBase.cs @@ -523,7 +523,7 @@ protected internal LuceneBooleanOperationBase IdInternal(string id, Occur occurr /// /// private Query ParseRawQuery(string rawQuery) - => CustomMultiFieldQueryParser.KeywordAnalyzerQueryParser.Parse(rawQuery); + => _queryParser.KeywordAnalyzerQueryParser.Parse(rawQuery); /// /// Uses a PhraseQuery to build a 'raw/exact' match diff --git a/src/Examine.Lucene/Search/LuceneSearchResults.cs b/src/Examine.Lucene/Search/LuceneSearchResults.cs index 04e3df32..f3d303e5 100644 --- a/src/Examine.Lucene/Search/LuceneSearchResults.cs +++ b/src/Examine.Lucene/Search/LuceneSearchResults.cs @@ -21,11 +21,14 @@ public class LuceneSearchResults : ILuceneSearchResults, IFacetResults /// [Obsolete("To remove in Examine V5")] - public LuceneSearchResults(IReadOnlyCollection results, int totalItemCount) + public LuceneSearchResults(IReadOnlyCollection results, int totalItemCount, + float maxScore, + SearchAfterOptions searchAfterOptions) { _results = results; TotalItemCount = totalItemCount; - MaxScore = float.NaN; + MaxScore = maxScore; + SearchAfter = searchAfterOptions; Facets = _noFacets; } diff --git a/src/Examine.Lucene/Search/MultiSearchSearcherReference.cs b/src/Examine.Lucene/Search/MultiSearchSearcherReference.cs index 8d8c35a2..3900aa65 100644 --- a/src/Examine.Lucene/Search/MultiSearchSearcherReference.cs +++ b/src/Examine.Lucene/Search/MultiSearchSearcherReference.cs @@ -44,7 +44,7 @@ protected virtual void Dispose(bool disposing) { if (disposing) { - foreach(var i in _inner) + foreach (var i in _inner) { i.Dispose(); } diff --git a/src/Examine.Lucene/Search/SearchContext.cs b/src/Examine.Lucene/Search/SearchContext.cs index 725e9f62..fa576b80 100644 --- a/src/Examine.Lucene/Search/SearchContext.cs +++ b/src/Examine.Lucene/Search/SearchContext.cs @@ -1,5 +1,4 @@ using System; -using System.Collections.Generic; using System.Linq; using Examine.Lucene.Indexing; using Lucene.Net.Index; @@ -9,21 +8,38 @@ namespace Examine.Lucene.Search { /// - public class SearchContext : ISearchContext + public sealed class SearchContext : ISearchContext { private readonly SearcherManager _searcherManager; private readonly FieldValueTypeCollection _fieldValueTypeCollection; - private string[]? _searchableFields; - + private readonly bool _isNrt; + private string[] _searchableFields; + /// + [Obsolete("Use ctor with all dependencies")] public SearchContext(SearcherManager searcherManager, FieldValueTypeCollection fieldValueTypeCollection) { - _searcherManager = searcherManager; + _searcherManager = searcherManager; _fieldValueTypeCollection = fieldValueTypeCollection ?? throw new ArgumentNullException(nameof(fieldValueTypeCollection)); } - /// - public ISearcherReference GetSearcher() => new SearcherReference(_searcherManager); + public SearchContext(SearcherManager searcherManager, FieldValueTypeCollection fieldValueTypeCollection, bool isNrt) + { + _searcherManager = searcherManager; + _fieldValueTypeCollection = fieldValueTypeCollection ?? throw new ArgumentNullException(nameof(fieldValueTypeCollection)); + _isNrt = isNrt; + } + + // TODO: Do we want to create a new searcher every time? I think so, but we shouldn't allocate so much + public ISearcherReference GetSearcher() + { + if (!_isNrt) + { + _searcherManager.MaybeRefresh(); + } + + return new SearcherReference(_searcherManager); + } /// public string[] SearchableFields @@ -35,9 +51,10 @@ public string[] SearchableFields // IMPORTANT! Do not resolve the IndexSearcher from the `IndexSearcher` property above since this // will not release it from the searcher manager. When we are collecting fields, we are essentially // performing a 'search'. We must ensure that the underlying reader has the correct reference counts. - IndexSearcher searcher = _searcherManager.Acquire(); + var searcher = _searcherManager.Acquire(); + try - { + { var fields = MultiFields.GetMergedFieldInfos(searcher.IndexReader) .Select(x => x.Name) .ToList(); @@ -62,7 +79,7 @@ public IIndexFieldValueType GetFieldValueType(string fieldName) { //Get the value type for the field, or use the default if not defined return _fieldValueTypeCollection.GetValueType( - fieldName, + fieldName, _fieldValueTypeCollection.ValueTypeFactories.GetRequiredFactory(FieldDefinitionTypes.FullText)); } } diff --git a/src/Examine.Lucene/Search/SearcherReference.cs b/src/Examine.Lucene/Search/SearcherReference.cs index a375da3d..0f89cb84 100644 --- a/src/Examine.Lucene/Search/SearcherReference.cs +++ b/src/Examine.Lucene/Search/SearcherReference.cs @@ -1,58 +1,23 @@ -using System; using Lucene.Net.Search; namespace Examine.Lucene.Search { /// - public class SearcherReference : ISearcherReference + public readonly struct SearcherReference : ISearcherReference { - private bool _disposedValue; private readonly SearcherManager _searcherManager; - private IndexSearcher? _searcher; /// public SearcherReference(SearcherManager searcherManager) { _searcherManager = searcherManager; + IndexSearcher = _searcherManager.Acquire(); } /// - public IndexSearcher IndexSearcher - { - get - { - if (_disposedValue) - { - throw new ObjectDisposedException($"{nameof(SearcherReference)} is disposed"); - } - return _searcher ??= _searcherManager.Acquire(); - } - } + public IndexSearcher IndexSearcher { get; } /// - protected virtual void Dispose(bool disposing) - { - if (!_disposedValue) - { - if (disposing) - { - if (_searcher != null) - { - _searcherManager.Release(_searcher); - } - } - - _disposedValue = true; - } - } - - /// - public void Dispose() - { - // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method -#pragma warning disable IDE0022 // Use expression body for method - Dispose(disposing: true); -#pragma warning restore IDE0022 // Use expression body for method - } + public void Dispose() => _searcherManager.Release(IndexSearcher); } } diff --git a/src/Examine.Lucene/Search/TaxonomySearchContext.cs b/src/Examine.Lucene/Search/TaxonomySearchContext.cs index 56687872..7d1b63d9 100644 --- a/src/Examine.Lucene/Search/TaxonomySearchContext.cs +++ b/src/Examine.Lucene/Search/TaxonomySearchContext.cs @@ -13,6 +13,7 @@ public class TaxonomySearchContext : ITaxonomySearchContext { private readonly SearcherTaxonomyManager _searcherManager; private readonly FieldValueTypeCollection _fieldValueTypeCollection; + private readonly bool _isNrt; private string[]? _searchableFields; /// @@ -21,14 +22,22 @@ public class TaxonomySearchContext : ITaxonomySearchContext /// /// /// - public TaxonomySearchContext(SearcherTaxonomyManager searcherManager, FieldValueTypeCollection fieldValueTypeCollection) + public TaxonomySearchContext(SearcherTaxonomyManager searcherManager, FieldValueTypeCollection fieldValueTypeCollection, bool isNrt) { _searcherManager = searcherManager ?? throw new ArgumentNullException(nameof(searcherManager)); _fieldValueTypeCollection = fieldValueTypeCollection ?? throw new ArgumentNullException(nameof(fieldValueTypeCollection)); + _isNrt = isNrt; } /// - public ISearcherReference GetSearcher() => new TaxonomySearcherReference(_searcherManager); + public ISearcherReference GetSearcher() + { + if (!_isNrt) + { + _searcherManager.MaybeRefresh(); + } + return new TaxonomySearcherReference(_searcherManager); + } /// public string[] SearchableFields diff --git a/src/Examine.Test/Examine.Lucene/Directories/SyncedFileSystemDirectoryFactoryTests.cs b/src/Examine.Test/Examine.Lucene/Directories/SyncedFileSystemDirectoryFactoryTests.cs new file mode 100644 index 00000000..5562f7c8 --- /dev/null +++ b/src/Examine.Test/Examine.Lucene/Directories/SyncedFileSystemDirectoryFactoryTests.cs @@ -0,0 +1,229 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using Examine.Lucene; +using Examine.Lucene.Analyzers; +using Examine.Lucene.Directories; +using Examine.Lucene.Providers; +using Lucene.Net.Codecs.Lucene46; +using Lucene.Net.Index; +using Lucene.Net.Store; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using Moq; +using NUnit.Framework; +using Directory = Lucene.Net.Store.Directory; + +namespace Examine.Test.Examine.Lucene.Directories +{ + [TestFixture] + [NonParallelizable] + public class SyncedFileSystemDirectoryFactoryTests : ExamineBaseTest + { + private const int ItemCount = 100; + + [TestCase(true, false, true, SyncedFileSystemDirectoryFactory.CreateResult.NotClean | SyncedFileSystemDirectoryFactory.CreateResult.Fixed | SyncedFileSystemDirectoryFactory.CreateResult.OpenedSuccessfully)] + [TestCase(true, false, false, SyncedFileSystemDirectoryFactory.CreateResult.NotClean | SyncedFileSystemDirectoryFactory.CreateResult.CorruptCreatedNew)] + [TestCase(true, true, false, SyncedFileSystemDirectoryFactory.CreateResult.MissingSegments | SyncedFileSystemDirectoryFactory.CreateResult.CorruptCreatedNew)] + [TestCase(false, false, false, SyncedFileSystemDirectoryFactory.CreateResult.OpenedSuccessfully)] + [Test] + public void Given_ExistingCorruptIndex_When_CreatingDirectory_Then_IndexCreatedOrOpened( + bool corruptIndex, + bool removeSegments, + bool fixIndex, + Enum expected) + { + var mainPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + var tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + + try + { + CreateIndex(mainPath, corruptIndex, removeSegments); + + using var syncedDirFactory = new SyncedFileSystemDirectoryFactory( + new DirectoryInfo(tempPath), + new DirectoryInfo(mainPath), + new DefaultLockFactory(), + LoggerFactory, + Mock.Of>(x => x.Get(TestIndex.TestIndexName) == new LuceneDirectoryIndexOptions()), + fixIndex); + + using var index = new LuceneIndex( + LoggerFactory, + TestIndex.TestIndexName, + Mock.Of>(x => x.Get(TestIndex.TestIndexName) == new LuceneDirectoryIndexOptions + { + DirectoryFactory = syncedDirFactory + })); + + var result = syncedDirFactory.TryCreateDirectory(index, false, out var dir); + + Assert.IsTrue(result.HasFlag(expected), $"{result} does not have flag {expected}"); + } + finally + { + System.IO.Directory.Delete(mainPath, true); + System.IO.Directory.Delete(tempPath, true); + } + } + + [Test] + public void Given_CorruptMainIndex_And_HealthyLocalIndex_When_CreatingDirectory_Then_LocalIndexSyncedToMain() + { + var mainPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + var tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + + try + { + // create unhealthy index + CreateIndex(mainPath, true, false); + + // create healthy index + CreateIndex(tempPath, false, false); + + using (var syncedDirFactory = new SyncedFileSystemDirectoryFactory( + new DirectoryInfo(tempPath), + new DirectoryInfo(mainPath), + new DefaultLockFactory(), + LoggerFactory, + Mock.Of>(x => x.Get(TestIndex.TestIndexName) == new LuceneDirectoryIndexOptions()), + false)) + { + using var index = new LuceneIndex( + LoggerFactory, + TestIndex.TestIndexName, + Mock.Of>(x => x.Get(TestIndex.TestIndexName) == new LuceneDirectoryIndexOptions + { + DirectoryFactory = syncedDirFactory + })); + + var result = syncedDirFactory.TryCreateDirectory(index, false, out var dir); + + Assert.IsTrue(result.HasFlag(SyncedFileSystemDirectoryFactory.CreateResult.SyncedFromLocal)); + } + + // Ensure the docs are there in main + using var mainIndex = new LuceneIndex( + LoggerFactory, + TestIndex.TestIndexName, + Mock.Of>(x => x.Get(TestIndex.TestIndexName) == new LuceneDirectoryIndexOptions + { + DirectoryFactory = new GenericDirectoryFactory(_ => FSDirectory.Open(Path.Combine(mainPath, TestIndex.TestIndexName))), + })); + + var searchResults = mainIndex.Searcher.CreateQuery().All().Execute(); + Assert.AreEqual(ItemCount - 2, searchResults.TotalItemCount); + } + finally + { + System.IO.Directory.Delete(mainPath, true); + System.IO.Directory.Delete(tempPath, true); + } + } + + [Test] + public void Given_CorruptMainIndex_And_CorruptLocalIndex_When_CreatingDirectory_Then_NewIndexesCreatedAndUsable() + { + var mainPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + var tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + + try + { + // create unhealthy index + CreateIndex(mainPath, true, false); + + // create unhealthy index + CreateIndex(tempPath, true, false); + + using var syncedFactory = new SyncedFileSystemDirectoryFactory( + new DirectoryInfo(tempPath), + new DirectoryInfo(mainPath), + new DefaultLockFactory(), + LoggerFactory, + Mock.Of>(x => x.Get(TestIndex.TestIndexName) == new LuceneDirectoryIndexOptions()), + false); + + // Ensure the docs are there in main + using var mainIndex = new LuceneIndex( + LoggerFactory, + TestIndex.TestIndexName, + Mock.Of>(x => x.Get(TestIndex.TestIndexName) == new LuceneDirectoryIndexOptions + { + DirectoryFactory = syncedFactory, + })); + + var searchResults = mainIndex.Searcher.CreateQuery().All().Execute(); + Assert.AreEqual(0, searchResults.TotalItemCount); + } + finally + { + System.IO.Directory.Delete(mainPath, true); + System.IO.Directory.Delete(tempPath, true); + } + } + + private void CreateIndex(string rootPath, bool corruptIndex, bool removeSegments) + { + var logger = LoggerFactory.CreateLogger(); + + var indexPath = Path.Combine(rootPath, TestIndex.TestIndexName); + logger.LogInformation($"Creating index at {indexPath} with options: corruptIndex: {corruptIndex}, removeSegments: {removeSegments}"); + + using var luceneDir = FSDirectory.Open(indexPath); + + using (var writer = new IndexWriter(luceneDir, new IndexWriterConfig(LuceneInfo.CurrentVersion, new CultureInvariantStandardAnalyzer()))) + using (var indexer = GetTestIndex(writer)) + using (indexer.WithThreadingMode(IndexThreadingMode.Synchronous)) + { + var valueSets = new List(); + for (int i = 0; i < ItemCount; i++) + { + valueSets.Add( + new ValueSet(i.ToString(), "content", + new Dictionary> + { + {"item1", new List(new[] {"value1"})}, + {"item2", new List(new[] {"value2"})} + })); + } + + indexer.IndexItems(valueSets); + + // Now delete some items + indexer.DeleteFromIndex(new[] { "1", "2" }); + + // double ensure we commit here + indexer.IndexWriter.IndexWriter.Commit(); + indexer.IndexWriter.IndexWriter.WaitForMerges(); + } + + + logger.LogInformation("Created index at " + luceneDir.Directory); + Assert.IsTrue(DirectoryReader.IndexExists(luceneDir)); + + if (corruptIndex) + { + CorruptIndex(luceneDir.Directory, removeSegments, logger); + } + } + + private void CorruptIndex(DirectoryInfo dir, bool removeSegments, ILogger logger) + { + // index file extensions (no segments, no gen) + var indexFileExtensions = IndexFileNames.INDEX_EXTENSIONS + .Except(new[] { IndexFileNames.GEN_EXTENSION }) + .ToArray(); + + // Get an index (non segments file) and delete it (corrupt index) + var indexFile = dir.GetFiles() + .Where(x => removeSegments + ? x.Extension.Contains(Lucene46SegmentInfoFormat.SI_EXTENSION, StringComparison.OrdinalIgnoreCase) + : indexFileExtensions.Any(e => IndexFileNames.MatchesExtension(x.Extension, e))) + .First(); + + logger.LogInformation($"Deleting {indexFile.FullName}"); + File.Delete(indexFile.FullName); + } + } +} diff --git a/src/Examine.Test/Examine.Lucene/ExamineReplicatorTests.cs b/src/Examine.Test/Examine.Lucene/ExamineReplicatorTests.cs index 89c4eca7..2e5f4ef9 100644 --- a/src/Examine.Test/Examine.Lucene/ExamineReplicatorTests.cs +++ b/src/Examine.Test/Examine.Lucene/ExamineReplicatorTests.cs @@ -13,7 +13,7 @@ namespace Examine.Test.Examine.Lucene.Sync public class ExamineReplicatorTests : ExamineBaseTest { private ILoggerFactory GetLoggerFactory() - => LoggerFactory.Create(x => x.AddConsole().SetMinimumLevel(LogLevel.Debug)); + => Microsoft.Extensions.Logging.LoggerFactory.Create(x => x.AddConsole().SetMinimumLevel(LogLevel.Debug)); [Test] public void GivenAMainIndex_WhenReplicatedLocally_TheLocalIndexIsPopulated() @@ -28,7 +28,7 @@ public void GivenAMainIndex_WhenReplicatedLocally_TheLocalIndexIsPopulated() { mainIndex.CreateIndex(); - mainIndex.IndexItems(mainIndex.AllData()); + mainIndex.IndexItems(TestIndex.AllData()); var mainReader = mainIndex.IndexWriter.IndexWriter.GetReader(true); Assert.AreEqual(100, mainReader.NumDocs); @@ -72,7 +72,7 @@ public void GivenAnOpenedWriter_WhenReplicationAttempted_ThenAnExceptionIsThrown {"item2", new List(new[] {"value2"})} })); - mainIndex.IndexItems(mainIndex.AllData()); + mainIndex.IndexItems(TestIndex.AllData()); Assert.Throws(() => replicator.ReplicateIndex()); } @@ -91,7 +91,7 @@ public void GivenASyncedLocalIndex_WhenTriggered_ThenSyncedBackToMainIndex() using (var replicator = new ExamineReplicator(GetLoggerFactory(), mainIndex, localDir, tempStorage)) { mainIndex.CreateIndex(); - mainIndex.IndexItems(mainIndex.AllData()); + mainIndex.IndexItems(TestIndex.AllData()); replicator.ReplicateIndex(); } @@ -133,7 +133,7 @@ public void GivenASyncedLocalIndex_ThenSyncedBackToMainIndexOnSchedule() using (var replicator = new ExamineReplicator(GetLoggerFactory(), mainIndex, localDir, tempStorage)) { mainIndex.CreateIndex(); - mainIndex.IndexItems(mainIndex.AllData()); + mainIndex.IndexItems(TestIndex.AllData()); replicator.ReplicateIndex(); } diff --git a/src/Examine.Test/Examine.Lucene/ExamineTaxonomyReplicatorTests.cs b/src/Examine.Test/Examine.Lucene/ExamineTaxonomyReplicatorTests.cs index b840e94a..6fbf7297 100644 --- a/src/Examine.Test/Examine.Lucene/ExamineTaxonomyReplicatorTests.cs +++ b/src/Examine.Test/Examine.Lucene/ExamineTaxonomyReplicatorTests.cs @@ -13,7 +13,7 @@ namespace Examine.Test.Examine.Lucene.Sync public class ExamineTaxonomyReplicatorTests : ExamineBaseTest { private ILoggerFactory GetLoggerFactory() - => LoggerFactory.Create(x => x.AddConsole().SetMinimumLevel(LogLevel.Debug)); + => this.CreateLoggerFactory(); [Test] public void GivenAMainIndex_WhenReplicatedLocally_TheLocalIndexIsPopulated() @@ -30,7 +30,7 @@ public void GivenAMainIndex_WhenReplicatedLocally_TheLocalIndexIsPopulated() { mainIndex.CreateIndex(); - mainIndex.IndexItems(mainIndex.AllData()); + mainIndex.IndexItems(TestIndex.AllData()); DirectoryReader mainReader = mainIndex.IndexWriter.IndexWriter.GetReader(true); Assert.AreEqual(100, mainReader.NumDocs); @@ -76,7 +76,7 @@ public void GivenAnOpenedWriter_WhenReplicationAttempted_ThenAnExceptionIsThrown {"item2", new List(new[] {"value2"})} })); - mainIndex.IndexItems(mainIndex.AllData()); + mainIndex.IndexItems(TestIndex.AllData()); Assert.Throws(() => replicator.ReplicateIndex()); } @@ -97,7 +97,7 @@ public void GivenASyncedLocalIndex_WhenTriggered_ThenSyncedBackToMainIndex() using (var replicator = new ExamineTaxonomyReplicator(GetLoggerFactory(), mainIndex, localDir, localTaxonomyDir, tempStorage)) { mainIndex.CreateIndex(); - mainIndex.IndexItems(mainIndex.AllData()); + mainIndex.IndexItems(TestIndex.AllData()); replicator.ReplicateIndex(); } @@ -141,7 +141,7 @@ public void GivenASyncedLocalIndex_ThenSyncedBackToMainIndexOnSchedule() using (var replicator = new ExamineTaxonomyReplicator(GetLoggerFactory(), mainIndex, localDir, localTaxonomyDir, tempStorage)) { mainIndex.CreateIndex(); - mainIndex.IndexItems(mainIndex.AllData()); + mainIndex.IndexItems(TestIndex.AllData()); replicator.ReplicateIndex(); } diff --git a/src/Examine.Test/Examine.Lucene/Index/LuceneIndexTests.cs b/src/Examine.Test/Examine.Lucene/Index/LuceneIndexTests.cs index dc9e1830..22296e41 100644 --- a/src/Examine.Test/Examine.Lucene/Index/LuceneIndexTests.cs +++ b/src/Examine.Test/Examine.Lucene/Index/LuceneIndexTests.cs @@ -120,7 +120,7 @@ public void Index_Unlocks_When_Disposed() using (var indexer = GetTestIndex(luceneDir, new StandardAnalyzer(LuceneInfo.CurrentVersion))) { indexer.CreateIndex(); - indexer.IndexItems(indexer.AllData()); + indexer.IndexItems(TestIndex.AllData()); Assert.IsTrue(IndexWriter.IsLocked(luceneDir)); } @@ -137,7 +137,7 @@ public void Rebuild_Index() using (var indexer = GetTestIndex(d, new StandardAnalyzer(LuceneInfo.CurrentVersion))) { indexer.CreateIndex(); - indexer.IndexItems(indexer.AllData()); + indexer.IndexItems(TestIndex.AllData()); var indexWriter = indexer.IndexWriter; var reader = indexWriter.IndexWriter.GetReader(true); @@ -331,7 +331,7 @@ void AddData(object sender, IndexingItemEventArgs e, string key, string value) updatedValues[key] = new List() { value }; - e.SetValues(updatedValues.ToDictionary(x=>x.Key, x=>(IEnumerable) x.Value)); + e.SetValues(updatedValues.ToDictionary(x => x.Key, x => (IEnumerable)x.Value)); } void RemoveData(object sender, IndexingItemEventArgs e, string key) @@ -340,7 +340,7 @@ void RemoveData(object sender, IndexingItemEventArgs e, string key) updatedValues.Remove(key); - e.SetValues(updatedValues.ToDictionary(x=>x.Key, x=>(IEnumerable) x.Value)); + e.SetValues(updatedValues.ToDictionary(x => x.Key, x => (IEnumerable)x.Value)); } using (var luceneDir = new RandomIdRAMDirectory()) @@ -695,6 +695,18 @@ public void Index_Read_And_Write_Ensure_No_Errors_In_Async( // TODO: In this test can we ensure all readers are tracked and closed? // TODO: In the search part, we should be searching in various ways and also with skip + // capture the original console out + var consoleOut = TestContext.Out; + + void WriteLog(string msg) + { + // reset console out to the orig, this is required because we suppress + // ExecutionContext which is how this is flowed in Nunit so needed when logging + // in OperationComplete + Console.SetOut(consoleOut); + Console.WriteLine(msg); + } + DirectoryInfo temp = null; global::Lucene.Net.Store.Directory directory; if (inMemory) @@ -719,19 +731,28 @@ public void Index_Read_And_Write_Ensure_No_Errors_In_Async( var tempPath = Path.Combine(tempBasePath, Guid.NewGuid().ToString()); System.IO.Directory.CreateDirectory(tempPath); temp = new DirectoryInfo(tempPath); - directory = new SimpleFSDirectory(temp); + directory = FSDirectory.Open(temp); } try { using (var d = directory) using (var writer = new IndexWriter(d, new IndexWriterConfig(LuceneInfo.CurrentVersion, new CultureInvariantStandardAnalyzer()))) - using (var customIndexer = GetTestIndex(writer)) + using (var customIndexer = GetTestIndex(writer, nrtTargetMaxStaleSec: 1.0, nrtTargetMinStaleSec: 0.1)) using (var customSearcher = (LuceneSearcher)customIndexer.Searcher) using (customIndexer.WithThreadingMode(IndexThreadingMode.Asynchronous)) { + customIndexer.IndexCommitted += (sender, e) => + { + WriteLog("index committed!!!!!!!!!!!!!"); + }; + var waitHandle = new ManualResetEvent(false); + // TODO: This seems broken - we wan see many operations complete while we are indexing/searching + // but currently it seems like we are doing all indexing in a single Task which means we only end up + // committing once and then Boom, all searches are available, we want to be able to see search results + // more immediately. void OperationComplete(object sender, IndexOperationEventArgs e) { //signal that we are done @@ -782,14 +803,14 @@ void doSearch(ISearcher s) { idQueue.Enqueue(docId); var r = s.CreateQuery().Id(docId.ToString()).Execute(); - Console.WriteLine("searching thread: {0}, id: {1}, found: {2}", Thread.CurrentThread.ManagedThreadId, docId, r.Count()); + WriteLog(string.Format("searching thread: {0}, id: {1}, found: {2}", Thread.CurrentThread.ManagedThreadId, docId, r.Count())); Thread.Sleep(searchThreadWait); } } } catch (Exception ex) { - Console.WriteLine("Search ERROR!! {0}", ex); + WriteLog($"Search ERROR!! {ex}"); throw; } } @@ -808,7 +829,7 @@ void doIndex(IIndex ind) var node = getNode(docId - 1); node.Attribute("id").Value = docId.ToString(CultureInfo.InvariantCulture); - Console.WriteLine("Indexing {0}", docId); + WriteLog(string.Format("Indexing {0}", docId)); ind.IndexItems(new[] { node.ConvertToValueSet(IndexTypes.Content) }); Thread.Sleep(indexThreadWait); } @@ -816,7 +837,7 @@ void doIndex(IIndex ind) } catch (Exception ex) { - Console.WriteLine("Index ERROR!! {0}", ex); + WriteLog(string.Format("Index ERROR!! {0}", ex)); throw; } } @@ -856,7 +877,7 @@ void doIndex(IIndex ind) customIndexer.WaitForChanges(); var results = customSearcher.CreateQuery().All().Execute(); - Assert.AreEqual(20, results.Count()); + Assert.AreEqual(20, results.Count(), string.Join(", ", results.Select(x => x.Id))); //wait until we are done waitHandle.WaitOne(); diff --git a/src/Examine.Test/Examine.Lucene/Search/ConcurrentSearchBenchmarks.cs b/src/Examine.Test/Examine.Lucene/Search/ConcurrentSearchBenchmarks.cs new file mode 100644 index 00000000..e69de29b diff --git a/src/Examine.Test/Examine.Lucene/Search/FluentApiTests.cs b/src/Examine.Test/Examine.Lucene/Search/FluentApiTests.cs index a7e8746d..93ac2b2b 100644 --- a/src/Examine.Test/Examine.Lucene/Search/FluentApiTests.cs +++ b/src/Examine.Test/Examine.Lucene/Search/FluentApiTests.cs @@ -1,6 +1,8 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Threading; +using System.Threading.Tasks; using Examine.Lucene; using Examine.Lucene.Providers; using Examine.Lucene.Search; @@ -27,6 +29,32 @@ public enum FacetTestType SortedSetFacets } + [Test] + public void Multiple_Searches() + { + var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + + using (var luceneDir1 = new RandomIdRAMDirectory()) + using (var indexer1 = GetTestIndex(luceneDir1, analyzer, nrtEnabled: false)) + { + indexer1.IndexItem(ValueSet.FromObject("1", "content", new { item1 = "value1", item2 = "The agitated zebras gallop back and forth in short, panicky dashes, then skitter off into the total absolute darkness." })); + + var searcher = indexer1.Searcher; + + var result = searcher.Search("darkness"); + foreach (var r in result) + { + Console.WriteLine($"Id = {r.Id}, Score = {r.Score}"); + } + + result = searcher.Search("total darkness"); + foreach (var r in result) + { + Console.WriteLine($"Id = {r.Id}, Score = {r.Score}"); + } + } + } + private bool HasFacets(FacetTestType withFacets) => withFacets == FacetTestType.TaxonomyFacets || withFacets == FacetTestType.SortedSetFacets; @@ -4227,6 +4255,83 @@ public void Category() } } + [Test] + public void By_Id() + { + var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + using (var luceneDir = new RandomIdRAMDirectory()) + using (var indexer = GetTestIndex(luceneDir, analyzer)) + { + indexer.IndexItems(new[] { + ValueSet.FromObject(1.ToString(), "content", + new { Content = "hello world", Type = "type1" }), + ValueSet.FromObject(2.ToString(), "content", + new { Content = "hello something or other", Type = "type1" }), + ValueSet.FromObject(3.ToString(), "content", + new { Content = "hello you guys", Type = "type1" }) + }); + + var searcher = indexer.Searcher; + + var query = searcher.CreateQuery().Id(2.ToString()); + Console.WriteLine(query); + + var results = query.Execute(); + + //Assert + Assert.AreEqual(1, results.TotalItemCount); + } + } + + [Ignore("This test needs to be updated to ensure that searching calls GetFieldInternalQuery with useQueryParser = false, see https://github.com/Shazwazza/Examine/issues/335#issuecomment-1834677581")] + [Test] + public void Query_With_Category_Multi_Threaded() + { + var analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion); + using (var luceneDir = new RandomIdRAMDirectory()) + using (var indexer = GetTestIndex(luceneDir, analyzer)) + { + indexer.IndexItems(new[] { + ValueSet.FromObject(1.ToString(), "content", + new { Content = "hello world", Type = "type1" }), + ValueSet.FromObject(2.ToString(), "content", + new { Content = "hello something or other", Type = "type1" }), + ValueSet.FromObject(3.ToString(), "content", + new { Content = "hello you guys", Type = "type3" }), + ValueSet.FromObject(4.ToString(), "media", + new { Content = "hello you cruel world", Type = "type2" }), + ValueSet.FromObject(5.ToString(), "media", + new { Content = "hi there, hello world", Type = "type2" }) + }); + + var searcher = indexer.Searcher; + + var tasks = Enumerable.Range(0, 1) + .Select(x => new Task(() => + { + var criteria = searcher.CreateQuery("content", BooleanOperation.And); + IBooleanOperation examineQuery; + examineQuery = criteria + .GroupedOr(new string[] { "Type" }, "type1", "type2") + .And() + .Field("Content", "hel".MultipleCharacterWildcard()); + + var results = examineQuery.Execute(); + + //Assert + Console.WriteLine(results.TotalItemCount + ", Thread: " + Thread.CurrentThread.ManagedThreadId); + Assert.AreEqual(2, results.TotalItemCount); + })) + .ToArray(); + + Parallel.ForEach(tasks, x => x.Start()); + + Task.WaitAll(tasks); + + Assert.IsTrue(tasks.All(x => x.IsCompletedSuccessfully)); + } + } + //[Test] //public void Wildcard_Results_Sorted_By_Score() //{ diff --git a/src/Examine.Test/Examine.Test.csproj b/src/Examine.Test/Examine.Test.csproj index 9a5e64bc..7400c109 100644 --- a/src/Examine.Test/Examine.Test.csproj +++ b/src/Examine.Test/Examine.Test.csproj @@ -11,7 +11,7 @@ - net7.0;net6.0; + net6.0;net8.0; false false @@ -53,21 +53,21 @@ - + - 4.8.0-beta00016 + 4.8.0-beta00017 - - - - - + + + + + 3.13.3 - + - + diff --git a/src/Examine.Test/ExamineBaseTest.cs b/src/Examine.Test/ExamineBaseTest.cs index a75b6741..f5d6c446 100644 --- a/src/Examine.Test/ExamineBaseTest.cs +++ b/src/Examine.Test/ExamineBaseTest.cs @@ -14,18 +14,28 @@ namespace Examine.Test { public abstract class ExamineBaseTest { + protected ILoggerFactory LoggerFactory => CreateLoggerFactory(); + [SetUp] public virtual void Setup() { - var loggerFactory = LoggerFactory.Create(builder => builder.AddConsole().SetMinimumLevel(LogLevel.Debug)); - loggerFactory.CreateLogger(typeof(ExamineBaseTest)).LogDebug("Initializing test"); } - public TestIndex GetTestIndex(Directory d, Analyzer analyzer, FieldDefinitionCollection fieldDefinitions = null, IndexDeletionPolicy indexDeletionPolicy = null, IReadOnlyDictionary indexValueTypesFactory = null, FacetsConfig facetsConfig = null) - { - var loggerFactory = LoggerFactory.Create(builder => builder.AddConsole().SetMinimumLevel(LogLevel.Debug)); - return new TestIndex( - loggerFactory, + [TearDown] + public virtual void TearDown() => LoggerFactory.Dispose(); + + public TestIndex GetTestIndex( + Directory d, + Analyzer analyzer, + FieldDefinitionCollection fieldDefinitions = null, + IndexDeletionPolicy indexDeletionPolicy = null, + IReadOnlyDictionary indexValueTypesFactory = null, + double nrtTargetMaxStaleSec = 60, + double nrtTargetMinStaleSec = 1, + bool nrtEnabled = true, + FacetsConfig facetsConfig = null) + => new TestIndex( + LoggerFactory, Mock.Of>(x => x.Get(TestIndex.TestIndexName) == new LuceneDirectoryIndexOptions { FieldDefinitions = fieldDefinitions, @@ -33,22 +43,32 @@ public TestIndex GetTestIndex(Directory d, Analyzer analyzer, FieldDefinitionCol Analyzer = analyzer, IndexDeletionPolicy = indexDeletionPolicy, IndexValueTypesFactory = indexValueTypesFactory, + NrtTargetMaxStaleSec = nrtTargetMaxStaleSec, + NrtTargetMinStaleSec = nrtTargetMinStaleSec, + NrtEnabled = nrtEnabled, FacetsConfig = facetsConfig ?? new FacetsConfig() })); - } - public TestIndex GetTestIndex(IndexWriter writer) - { - var loggerFactory = LoggerFactory.Create(builder => builder.AddConsole().SetMinimumLevel(LogLevel.Debug)); - return new TestIndex( - loggerFactory, - Mock.Of>(x => x.Get(TestIndex.TestIndexName) == new LuceneIndexOptions()), + public TestIndex GetTestIndex( + IndexWriter writer, + double nrtTargetMaxStaleSec = 60, + double nrtTargetMinStaleSec = 1) + => new TestIndex( + LoggerFactory, + Mock.Of>(x => x.Get(TestIndex.TestIndexName) == new LuceneIndexOptions + { + NrtTargetMaxStaleSec = nrtTargetMaxStaleSec, + NrtTargetMinStaleSec = nrtTargetMinStaleSec + }), writer); - } + + protected virtual ILoggerFactory CreateLoggerFactory() + => Microsoft.Extensions.Logging.LoggerFactory.Create(builder => builder.AddConsole().SetMinimumLevel(LogLevel.Debug)); + public TestIndex GetTaxonomyTestIndex(Directory d, Directory taxonomyDirectory, Analyzer analyzer, FieldDefinitionCollection fieldDefinitions = null, IndexDeletionPolicy indexDeletionPolicy = null, IReadOnlyDictionary indexValueTypesFactory = null, FacetsConfig facetsConfig = null) { - var loggerFactory = LoggerFactory.Create(builder => builder.AddConsole().SetMinimumLevel(LogLevel.Debug)); + var loggerFactory = CreateLoggerFactory(); return new TestIndex( loggerFactory, Mock.Of>(x => x.Get(TestIndex.TestIndexName) == new LuceneDirectoryIndexOptions diff --git a/src/Examine.Test/TestIndex.cs b/src/Examine.Test/TestIndex.cs index a6dd1841..dc2df3bf 100644 --- a/src/Examine.Test/TestIndex.cs +++ b/src/Examine.Test/TestIndex.cs @@ -23,7 +23,7 @@ public TestIndex(ILoggerFactory loggerFactory, IOptionsMonitor AllData() + public static IEnumerable AllData() { var data = new List(); for (int i = 0; i < 100; i++) diff --git a/src/Examine.Web.Demo/Examine.Web.Demo.csproj b/src/Examine.Web.Demo/Examine.Web.Demo.csproj index 1b9b5229..18d4201c 100644 --- a/src/Examine.Web.Demo/Examine.Web.Demo.csproj +++ b/src/Examine.Web.Demo/Examine.Web.Demo.csproj @@ -7,6 +7,7 @@ + @@ -14,10 +15,6 @@ - - - - diff --git a/src/Examine.sln b/src/Examine.sln index 4841ded4..ebc1a49b 100644 --- a/src/Examine.sln +++ b/src/Examine.sln @@ -29,6 +29,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Examine", "Examine.Host\Exa EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Examine.Web.Demo", "Examine.Web.Demo\Examine.Web.Demo.csproj", "{99D0B284-AFDA-4A32-A88B-9B182DF8CE2F}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Examine.Benchmarks", "Examine.Benchmarks\Examine.Benchmarks.csproj", "{07D99A13-2B8B-4D13-90FE-0AB1F555C92D}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -55,6 +57,10 @@ Global {99D0B284-AFDA-4A32-A88B-9B182DF8CE2F}.Debug|Any CPU.Build.0 = Debug|Any CPU {99D0B284-AFDA-4A32-A88B-9B182DF8CE2F}.Release|Any CPU.ActiveCfg = Release|Any CPU {99D0B284-AFDA-4A32-A88B-9B182DF8CE2F}.Release|Any CPU.Build.0 = Release|Any CPU + {07D99A13-2B8B-4D13-90FE-0AB1F555C92D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {07D99A13-2B8B-4D13-90FE-0AB1F555C92D}.Debug|Any CPU.Build.0 = Debug|Any CPU + {07D99A13-2B8B-4D13-90FE-0AB1F555C92D}.Release|Any CPU.ActiveCfg = Release|Any CPU + {07D99A13-2B8B-4D13-90FE-0AB1F555C92D}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE