-
-
Notifications
You must be signed in to change notification settings - Fork 54
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ce04058
commit 46e87eb
Showing
25 changed files
with
475 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Migration Guide | ||
This document describes the changes that need to be made to migrate from one version of the blog to another. | ||
|
||
## 8.0 to 9.0 | ||
A new `SimilarBlogPost` table is introduced to store similar blog posts. | ||
|
||
```sql | ||
CREATE TABLE SimilarBlogPosts | ||
( | ||
Id int [NVARCHAR](450) NOT NULL, | ||
SimilarBlogPostId NVARCHAR(1350) NOT NULL, | ||
) | ||
|
||
ALTER TABLE SimilarBlogPosts | ||
ADD CONSTRAINT PK_SimilarBlogPosts PRIMARY KEY (Id) | ||
``` | ||
|
||
Add the following to the `appsettings.json`: | ||
|
||
```json | ||
{ | ||
"SimilarBlogPosts": true | ||
} | ||
``` | ||
|
||
Or `false` if you don't want to use this feature. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
using System.Collections.Generic; | ||
|
||
namespace LinkDotNet.Blog.Domain; | ||
|
||
public class SimilarBlogPost : Entity | ||
{ | ||
public IList<string> SimilarBlogPostIds { get; set; } = []; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
17 changes: 17 additions & 0 deletions
17
src/LinkDotNet.Blog.Infrastructure/Persistence/Sql/Mapping/SimilarBlogPostConfiguration.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
using LinkDotNet.Blog.Domain; | ||
using Microsoft.EntityFrameworkCore; | ||
using Microsoft.EntityFrameworkCore.Metadata.Builders; | ||
|
||
namespace LinkDotNet.Blog.Infrastructure.Persistence.Sql.Mapping; | ||
|
||
internal sealed class SimilarBlogPostConfiguration : IEntityTypeConfiguration<SimilarBlogPost> | ||
{ | ||
public void Configure(EntityTypeBuilder<SimilarBlogPost> builder) | ||
{ | ||
builder.HasKey(b => b.Id); | ||
builder.Property(b => b.Id) | ||
.IsUnicode(false) | ||
.ValueGeneratedOnAdd(); | ||
builder.Property(b => b.SimilarBlogPostIds).HasMaxLength(450 * 3).IsRequired(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
30 changes: 30 additions & 0 deletions
30
src/LinkDotNet.Blog.Web/Features/Services/Similiarity/SimiliarityCalculator.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
|
||
namespace LinkDotNet.Blog.Web.Features.Services.Similiarity; | ||
|
||
public static class SimilarityCalculator | ||
{ | ||
public static double CosineSimilarity(Dictionary<string, double> vectorA, Dictionary<string, double> vectorB) | ||
{ | ||
ArgumentNullException.ThrowIfNull(vectorA); | ||
ArgumentNullException.ThrowIfNull(vectorB); | ||
|
||
var dotProduct = 0d; | ||
var magnitudeA = 0d; | ||
|
||
foreach (var term in vectorA.Keys) | ||
{ | ||
if (vectorB.TryGetValue(term, out var value)) | ||
{ | ||
dotProduct += vectorA[term] * value; | ||
} | ||
magnitudeA += Math.Pow(vectorA[term], 2); | ||
} | ||
|
||
var magnitudeB = vectorB.Values.Sum(value => Math.Pow(value, 2)); | ||
|
||
return dotProduct / (Math.Sqrt(magnitudeA) * Math.Sqrt(magnitudeB)); | ||
} | ||
} |
26 changes: 26 additions & 0 deletions
26
src/LinkDotNet.Blog.Web/Features/Services/Similiarity/TextProcessor.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text.RegularExpressions; | ||
|
||
namespace LinkDotNet.Blog.Web.Features.Services.Similiarity; | ||
|
||
public static partial class TextProcessor | ||
{ | ||
private static readonly char[] Separator = [' ']; | ||
|
||
public static IReadOnlyCollection<string> TokenizeAndNormalize(IEnumerable<string> texts) | ||
=> texts.SelectMany(TokenizeAndNormalize).ToList(); | ||
|
||
private static IReadOnlyCollection<string> TokenizeAndNormalize(string text) | ||
{ | ||
ArgumentNullException.ThrowIfNull(text); | ||
|
||
text = text.ToUpperInvariant(); | ||
text = TokenRegex().Replace(text, " "); | ||
return [..text.Split(Separator, StringSplitOptions.RemoveEmptyEntries)]; | ||
} | ||
|
||
[GeneratedRegex(@"[^a-z0-9\s]")] | ||
private static partial Regex TokenRegex(); | ||
} |
57 changes: 57 additions & 0 deletions
57
src/LinkDotNet.Blog.Web/Features/Services/Similiarity/TfIdfVectorizer.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
|
||
namespace LinkDotNet.Blog.Web.Features.Services.Similiarity; | ||
|
||
public class TfIdfVectorizer | ||
{ | ||
private readonly IReadOnlyCollection<IReadOnlyCollection<string>> documents; | ||
private readonly Dictionary<string, double> idfScores; | ||
|
||
public TfIdfVectorizer(IReadOnlyCollection<IReadOnlyCollection<string>> documents) | ||
{ | ||
this.documents = documents; | ||
idfScores = CalculateIdfScores(); | ||
} | ||
|
||
public Dictionary<string, double> ComputeTfIdfVector(IReadOnlyCollection<string> targetDocument) | ||
{ | ||
ArgumentNullException.ThrowIfNull(targetDocument); | ||
|
||
var termFrequency = targetDocument.GroupBy(t => t).ToDictionary(g => g.Key, g => g.Count()); | ||
var tfidfVector = new Dictionary<string, double>(); | ||
|
||
foreach (var term in termFrequency.Keys) | ||
{ | ||
var tf = termFrequency[term] / (double)targetDocument.Count; | ||
var idf = idfScores.TryGetValue(term, out var score) ? score : 0; | ||
tfidfVector[term] = tf * idf; | ||
} | ||
|
||
return tfidfVector; | ||
} | ||
|
||
private Dictionary<string, double> CalculateIdfScores() | ||
{ | ||
var termDocumentFrequency = new Dictionary<string, int>(); | ||
var scores = new Dictionary<string, double>(); | ||
|
||
foreach (var term in documents.Select(document => document.Distinct()).SelectMany(terms => terms)) | ||
{ | ||
if (!termDocumentFrequency.TryGetValue(term, out var value)) | ||
{ | ||
value = 0; | ||
termDocumentFrequency[term] = value; | ||
} | ||
termDocumentFrequency[term] = ++value; | ||
} | ||
|
||
foreach (var term in termDocumentFrequency.Keys) | ||
{ | ||
scores[term] = Math.Log(documents.Count / (double)termDocumentFrequency[term]); | ||
} | ||
|
||
return scores; | ||
} | ||
} |
49 changes: 49 additions & 0 deletions
49
src/LinkDotNet.Blog.Web/Features/ShowBlogPost/Components/SimilarBlogPostSection.razor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
@using LinkDotNet.Blog.Domain | ||
@using LinkDotNet.Blog.Infrastructure.Persistence˘ | ||
@inject IRepository<BlogPost> BlogPostRepository | ||
@inject IRepository<SimilarBlogPost> SimilarBlogPostJobRepository | ||
|
||
@if (similarBlogPosts.Count > 0) | ||
{ | ||
<div class="accordion my-5" id="archiveAccordion"> | ||
<div class="accordion-item"> | ||
<h2 class="accordion-header" id="headingOne"> | ||
<button class="accordion-button collapsed" type="button" data-bs-toggle="collapse" data-bs-target="#collapseOne" aria-expanded="false" aria-controls="collapseOne"> | ||
Want to read more? Check out these related blog posts! | ||
</button> | ||
</h2> | ||
<div id="collapseOne" class="accordion-collapse collapse" aria-labelledby="headingOne" data-bs-parent="#accordionExample"> | ||
<div class="row p-4"> | ||
@foreach (var relatedBlogPost in similarBlogPosts) | ||
{ | ||
<div class="col pt-2"> | ||
<div class="card h-100"> | ||
<div class="card-body"> | ||
<h5 class="card-title fw-bold">@relatedBlogPost.Title</h5> | ||
<p class="card-text">@MarkdownConverter.ToMarkupString(relatedBlogPost.ShortDescription)</p> | ||
</div> | ||
<a href="blogPost/@relatedBlogPost.Id/@relatedBlogPost.Slug" class="stretched-link"></a> | ||
</div> | ||
</div> | ||
} | ||
</div> | ||
</div> | ||
</div> | ||
</div> | ||
} | ||
|
||
@code { | ||
[Parameter] public BlogPost BlogPost { get; set; } | ||
|
||
private IReadOnlyCollection<BlogPost> similarBlogPosts = []; | ||
|
||
protected override async Task OnParametersSetAsync() | ||
{ | ||
var similarBlogPostIds = await SimilarBlogPostJobRepository.GetByIdAsync(BlogPost.Id); | ||
if (similarBlogPostIds is not null) | ||
{ | ||
similarBlogPosts = await BlogPostRepository.GetAllAsync( | ||
b => similarBlogPostIds.SimilarBlogPostIds.Contains(b.Id)); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.