diff --git a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs index f8df995..6a71e12 100644 --- a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs +++ b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs @@ -3,6 +3,7 @@ using AngleSharp.Html.Dom; using AngleSharp.Io; using KoeBook.Core; +using KoeBook.Core.Utilities; using KoeBook.Epub.Contracts.Services; using KoeBook.Epub.Models; using Microsoft.Extensions.DependencyInjection; @@ -15,6 +16,8 @@ public partial class ScrapingAozoraService(ISplitBraceService splitBraceService, private readonly ISplitBraceService _splitBraceService = splitBraceService; private readonly IScrapingClientService _scrapingClientService = scrapingClientService; + private EpubDocument _document; + public bool IsMatchSite(Uri uri) { @@ -589,6 +592,14 @@ private static string TextReplace(string text) return returnText; } + private SplittedLineBuilder ParagraphLineBuilder = new SplittedLineBuilder(); + private SplittedLineBuilder ScriptLineLineBuilder = new SplittedLineBuilder(); + + internal void ProcessChildren(IElement element, List classes, string style) + { + + } + private static string GetCardUrl(string url) { diff --git a/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs b/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs new file mode 100644 index 0000000..4b26bd5 --- /dev/null +++ b/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs @@ -0,0 +1,101 @@ +using System.Text; +using AngleSharp; +using KoeBook.Epub.Models; +using KoeBook.Epub.Services; +using System.Runtime.CompilerServices; +using System.Linq; + +namespace KoeBook.Test.Epub +{ + public class ScrapingAozoraServiceTest + { + private static readonly EpubDocument EmptySingleParagraph = new EpubDocument("", "", "", Guid.NewGuid()) { Chapters = [new Chapter() { Sections = [new Section("") { Elements = [new Paragraph()] }] }] }; + + public static object[][] TestCases() + { + (string, EpubDocument, EpubDocument)[] cases = [ + // レイアウト + // 1.1 改丁 + (ToMainText(@"[#改丁]"), EmptySingleParagraph , new EpubDocument("", "", "", Guid.NewGuid()) { Chapters = [new Chapter() { Sections = [new Section("") { Elements = [new Paragraph() { Text = "[#改丁]", ScriptLine = new Core.Models.ScriptLine("", "", "") }] }] }] }), + ]; + return cases.Select(c => new object[] { c.Item1, c.Item2 }).ToArray(); + } + + /// + /// を"
"で囲む + ///
+ /// divタグで囲むhtmlの要素 + /// divタグで囲まれた + private static string ToMainText(string text) + { + var builder = new StringBuilder(); + builder.Append(@"
"); + builder.Append(text); + builder.Append("
"); + return builder.ToString(); + } + + [Theory] + [MemberData(nameof(TestCases))] + public async void ProcessChildrenTest(string html, EpubDocument initial, EpubDocument expexted) + { + var config = Configuration.Default.WithDefaultLoader(); + using var context = BrowsingContext.New(config); + var doc = await context.OpenAsync(request => request.Content(html)); + var mainText = doc.QuerySelector(".main_text"); + var scraper = new ScrapingAozoraService(new SplitBraceService(), new ScrapingClientService(new httpClientFactory(), TimeProvider.System)); + scraper._document() = initial; + + scraper.ProcessChildren(mainText, [""], ""); + + Assert.True(HaveSmaeText(scraper._document(), expexted)); + } + + /// + /// 2つのEpubdocumentの内容(Guidを除く)内容が一致するかを判定する。 + /// + /// 比較するEpubdocument + /// 比較するEpubdocument + /// + private static bool HaveSmaeText(EpubDocument document, EpubDocument comparison) + { + bool same = true; + + same = (document.Title == comparison.Title); + same = (document.Author == comparison.Author); + same = (document.CssClasses == comparison.CssClasses); + + foreach ((Chapter selfChapter, Chapter comparisonChapter) in document.Chapters.Zip(comparison.Chapters)) + { + same = (selfChapter.Title == comparisonChapter.Title); + + foreach ((Section selfSection, Section comparisonSection) in selfChapter.Sections.Zip(comparisonChapter.Sections)) + { + same = (selfSection.Title == comparisonSection.Title); + + same = selfSection.Elements.Equals(comparisonSection.Elements); + } + } + + return same; + } + } + + internal class httpClientFactory : IHttpClientFactory + { + public HttpClient CreateClient(string name) + { + return httpClient; + } + + private static readonly HttpClient httpClient = new HttpClient(); + + } +} +file static class Proxy +{ + [UnsafeAccessor(UnsafeAccessorKind.Field, Name = "_document")] + public static extern ref EpubDocument _document(this ScrapingAozoraService scraper); + + +}