Skip to content

Commit

Permalink
#1-4 ルビ検出の正規表現を厳格に修正
Browse files Browse the repository at this point in the history
  • Loading branch information
miyaji255 committed Apr 5, 2024
1 parent acdfb43 commit ca0a6cf
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
5 changes: 2 additions & 3 deletions Epub/KoeBook.Epub/Services/AnalyzerService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ public partial class AnalyzerService(IScraperSelectorService scrapingService, IE
{
private readonly IScraperSelectorService _scrapingService = scrapingService;
private readonly IEpubDocumentStoreService _epubDocumentStoreService = epubDocumentStoreService;
private readonly ILlmAnalyzerService _llmAnalyzerService = llmAnalyzerService;
private Dictionary<string, string> _rubyReplacements = new Dictionary<string, string>();
private readonly ILlmAnalyzerService _llmAnalyzerService = llmAnalyzerService;

public async ValueTask<BookScripts> AnalyzeAsync(BookProperties bookProperties, string tempDirectory, CancellationToken cancellationToken)
{
Expand Down Expand Up @@ -76,6 +75,6 @@ private static string ReplaceBaseTextWithRuby(string text)
return RubyRegex().Replace(text, m => m.Groups[2].Value);
}

[GeneratedRegex("<ruby><rb>(.*?)</rb><rp></rp><rt>(.*?)</rt><rp></rp></ruby>")]
[GeneratedRegex(@"<ruby>\s*<rb>(.*?)</rb>\s*<rp>\s*[(《\(]\s*</rp>\s*<rt>(.*?)</rt>\s*<rp>\s*[)》\)]\s*</rp>\s*</ruby>", RegexOptions.Multiline)]
private static partial Regex RubyRegex();
}
7 changes: 4 additions & 3 deletions KoeBook.Test/Epub/AnalyzerServiceTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ public class AnalyzerServiceTest
ああ<ruby><rb>漢字</rb><rp>(</rp><rt>かんじ</rt><rp>)</rp></ruby>あああ
ああ<ruby><rb>漢字1</rb><rp>(</rp><rt>かんじ1</rt><rp>)</rp></ruby>あああ
""", "ああかんじあああ\nああかんじあああ\nああかんじ1あああ")]
[InlineData("<ruby><rb>佐久平</rb><rp>《</rp><rt>さくだいら</rt><rp>》</rp></ruby> <ruby><rb>啓介</rb><rp>《</rp><rt>けいすけ</rt><rp>》</rp></ruby>",
"<ruby><rb>佐久平</rb><rp>《</rp><rt>さくだいら</rt><rp>》</rp></ruby> <ruby><rb>啓介</rb><rp>《</rp><rt>けいすけ</rt><rp>》</rp></ruby>")]
[InlineData("<ruby><rb>漢字</rb>\n<rp>(</rp><rt>かんじ</rt><rp>)</rp></ruby>", "<ruby><rb>漢字</rb>\n<rp>(</rp><rt>かんじ</rt><rp>)</rp></ruby>")]
[InlineData("<ruby> <rb>佐久平</rb> <rp>\n《 </rp> <rt>さくだいら</rt> <rp>》</rp> </ruby> <ruby><rb>啓介</rb><rp>《</rp><rt>けいすけ</rt><rp>》</rp></ruby>",
"さくだいら けいすけ")]
[InlineData("<ruby><rb>漢字</rb>\n<rp>(</rp><rt>かんじ</rt><rp>)</rp></ruby>", "かんじ")]
[InlineData("ああ<ruby><rb>漢字</rb><rp>(</rp><rt>かんじ</rt><rp>)</rp></ruby>あああ<ruby><rb>漢字</rb><rp>(</rp><rt>カンジ</rt><rp>)</rp></ruby>", "ああかんじあああカンジ")]
public void ReplaceBaseTextWithRuby(string input, string expected)
{
var result = AnalyzerServiceProxy.ReplaceBaseTextWithRuby(null, input);
Expand Down

0 comments on commit ca0a6cf

Please sign in to comment.