From a097ce8e6ccc49c74866bab54219412dff46eb49 Mon Sep 17 00:00:00 2001
From: miyaji255 <84168445+miyaji255@users.noreply.github.com>
Date: Wed, 3 Apr 2024 11:02:47 +0900
Subject: [PATCH 1/6] =?UTF-8?q?#1-3=20=E9=9D=92=E7=A9=BA=E6=96=87=E5=BA=AB?=
 =?UTF-8?q?Service=E3=81=AE=E3=83=AA=E3=83=95=E3=82=A1=E3=82=AF=E3=82=BF?=
 =?UTF-8?q?=E3=83=AA=E3=83=B3=E3=82=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../Services/ScrapingAozoraService.cs         | 677 +++++++++---------
 .../Services/ScrapingNaroService.cs           |   4 +-
 Epub/KoeBook.Epub/TagNames.cs                 |  15 +-
 .../Epub/ScrapingAozoraServiceTest.cs         |  30 +
 4 files changed, 363 insertions(+), 363 deletions(-)
 create mode 100644 KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs
diff --git a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
index f8df995..56b8d06 100644
--- a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
+++ b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
@@ -1,4 +1,7 @@
-﻿using AngleSharp;
+﻿using System.Reflection.Metadata;
+using System.Text;
+using System.Xml.Linq;
+using AngleSharp;
 using AngleSharp.Dom;
 using AngleSharp.Html.Dom;
 using AngleSharp.Io;
@@ -23,11 +26,6 @@ public bool IsMatchSite(Uri uri)
 
         public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct)
         {
-            var chapterNum = 0;
-            var sectionNum = 0;
-            var chapterExist = false;
-            var sectionExist = false;
-
             var config = Configuration.Default.WithDefaultLoader();
             using var context = BrowsingContext.New(config);
             var doc = await context.OpenAsync(url, ct).ConfigureAwait(false);
@@ -41,49 +39,9 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
                 ?? throw new EbookException(ExceptionType.WebScrapingFailed, $"著者の取得に失敗しました。\n以下のリンクから正しい小説のリンクを取得してください。\n{GetCardUrl(url)}");
 
             // EpubDocument の生成
-            var document = new EpubDocument(TextReplace(bookTitle.InnerHtml), TextReplace(bookAuther.InnerHtml), coverFilePath, id)
-            {
-                // EpubDocument.Chapters の生成
-                Chapters = new List<Chapter>()
-            };
+            var document = new EpubDocument(TextReplace(bookTitle.InnerHtml), TextReplace(bookAuther.InnerHtml), coverFilePath, id);
 
-            // 目次を取得
-            var contents = doc.QuerySelectorAll(".midashi_anchor");
-
-            // 目次からEpubDocumentを構成
-            List<int> contentsIds = new List<int>() { 0 };
-            // Chapter, Section が存在するとき、それぞれtrue
-            chapterExist = false;
-            sectionExist = false;
-            if (contents.Length != 0)
-            {
-                int previousMidashiId = 0;
-                foreach (var midashi in contents)
-                {
-                    if (midashi.Id != null)
-                    {
-                        var MidashiId = int.Parse(midashi.Id.Replace("midashi", ""));
-                        if ((MidashiId - previousMidashiId) == 100)
-                        {
-                            document.Chapters.Add(new Chapter() { Title = TextProcess(midashi) });
-                            chapterExist = true;
-                        }
-                        if ((MidashiId - previousMidashiId) == 10)
-                        {
-                            document.EnsureChapter();
-                            document.Chapters[^1].Sections.Add(new Section(TextProcess(midashi)));
-                            sectionExist = true;
-                        }
-                        contentsIds.Add(MidashiId);
-                        previousMidashiId = MidashiId;
-                    }
-                }
-            }
-            else
-            {
-                document.Chapters.Add(new Chapter() { Title = null });
-                document.Chapters[^1].Sections.Add(new Section(bookTitle.InnerHtml));
-            }
+            var (contentsIds, chapterExist, sectionExist) = LoadToc(doc, document);
 
             // 本文を取得
             var mainText = doc.QuerySelector(".main_text")!;
@@ -93,8 +51,8 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
             // 直前のNodeを確認した操作で、その内容をParagraphに追加した場合、true
             bool previous = false;
             // 各ChapterとSection のインデックス
-            chapterNum = -1;
-            sectionNum = -1;
+            var chapterNum = -1;
+            var sectionNum = -1;
 
             // 直前のimgタグにaltがなかったときtrueになる。
             bool skipCaption = false;
@@ -102,117 +60,119 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
             foreach (var element in mainText.Children)
             {
                 var nextNode = element.NextSibling;
-                if (element.TagName == "BR")
-                {
-                    if (previous == true)
-                    {
-                        document.EnsureSection(chapterNum);
-                        document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph());
-                    }
-                }
-                else if (element.TagName == "DIV")
+                switch (element.TagName)
                 {
-                    var midashi = element.QuerySelector(".midashi_anchor");
-                    if (midashi != null)
-                    {
-                        if (midashi.Id == null)
-                            throw new EbookException(ExceptionType.WebScrapingFailed, "予期しないHTMLの構造です。\nclass=\"midashi_anchor\"ではなくid=\"midashi___\"が存在します。");
-
-                        if (!int.TryParse(midashi.Id.Replace("midashi", ""), out var midashiId))
-                            throw new EbookException(ExceptionType.WebScrapingFailed, $"予期しないアンカータグが見つかりました。id = {midashi.Id}");
-
-                        if (contentsIds.Contains(midashiId))
+                    case TagNames.A:
+                        if (previous)
+                        {
+                            document.EnsureSection(chapterNum);
+                            document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph());
+                        }
+                        break;
+                    case TagNames.Div:
                         {
-                            var contentsId = contentsIds.IndexOf(midashiId);
-                            switch (contentsIds[contentsId] - contentsIds[contentsId - 1])
+                            var midashi = element.QuerySelector(".midashi_anchor");
+                            if (midashi != null)
                             {
-                                case 100:
-                                    if (chapterNum >= 0 && sectionNum >= 0)
+                                if (midashi.Id == null)
+                                    throw new EbookException(ExceptionType.WebScrapingFailed, "予期しないHTMLの構造です。\nclass=\"midashi_anchor\"ではなくid=\"midashi___\"が存在します。");
+
+                                if (!int.TryParse(midashi.Id.Replace("midashi", ""), out var midashiId))
+                                    throw new EbookException(ExceptionType.WebScrapingFailed, $"予期しないアンカータグが見つかりました。id = {midashi.Id}");
+
+                                if (contentsIds.Contains(midashiId))
+                                {
+                                    var contentsId = contentsIds.IndexOf(midashiId);
+                                    switch (contentsIds[contentsId] - contentsIds[contentsId - 1])
                                     {
-                                        document.Chapters[chapterNum].Sections[sectionNum].Elements.RemoveAt(document.Chapters[chapterNum].Sections[sectionNum].Elements.Count - 1);
+                                        case 100:
+                                            if (chapterNum >= 0 && sectionNum >= 0)
+                                            {
+                                                document.Chapters[chapterNum].Sections[sectionNum].Elements.RemoveAt(document.Chapters[chapterNum].Sections[sectionNum].Elements.Count - 1);
+                                            }
+                                            chapterNum++;
+                                            sectionNum = -1;
+                                            break;
+                                        case 10:
+                                            if (chapterNum == -1)
+                                            {
+                                                chapterNum++;
+                                                sectionNum = -1;
+                                            }
+                                            if (chapterNum >= 0 && sectionNum >= 0)
+                                            {
+                                                document.Chapters[chapterNum].Sections[sectionNum].Elements.RemoveAt(document.Chapters[chapterNum].Sections[sectionNum].Elements.Count - 1);
+                                            }
+                                            sectionNum++;
+                                            break;
+                                        default:
+                                            break;
                                     }
-                                    chapterNum++;
-                                    sectionNum = -1;
-                                    break;
-                                case 10:
+                                }
+                                else //小見出し、行中小見出しの処理
+                                {
                                     if (chapterNum == -1)
                                     {
+                                        if (chapterExist)
+                                        {
+                                            document.Chapters.Insert(0, new Chapter());
+                                        }
                                         chapterNum++;
                                         sectionNum = -1;
                                     }
-                                    if (chapterNum >= 0 && sectionNum >= 0)
+                                    if (sectionNum == -1)
                                     {
-                                        document.Chapters[chapterNum].Sections[sectionNum].Elements.RemoveAt(document.Chapters[chapterNum].Sections[sectionNum].Elements.Count - 1);
+                                        if (sectionExist)
+                                        {
+                                            document.EnsureChapter();
+                                            document.Chapters[^1].Sections.Insert(0, new Section("___"));
+                                        }
+                                        sectionNum++;
                                     }
-                                    sectionNum++;
-                                    break;
-                                default:
-                                    break;
-                            }
-                        }
-                        else //小見出し、行中小見出しの処理
-                        {
-                            if (chapterNum == -1)
-                            {
-                                if (chapterExist)
-                                {
-                                    document.Chapters.Insert(0, new Chapter());
+                                    document.EnsureParagraph(chapterNum, sectionNum);
+                                    AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, element, true);
                                 }
-                                chapterNum++;
-                                sectionNum = -1;
                             }
-                            if (sectionNum == -1)
+                            else
                             {
-                                if (sectionExist)
+                                if (element.ClassName == "caption")
                                 {
-                                    document.EnsureChapter();
-                                    document.Chapters[^1].Sections.Insert(0, new Section("___"));
+                                    // https://www.aozora.gr.jp/annotation/graphics.html#:~:text=%3Cdiv%20class%3D%22caption%22%3E を処理するための部分
+                                    document.EnsureParagraph(chapterNum, sectionNum);
+                                    AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, element, false);
                                 }
-                                sectionNum++;
-                            }
-                            document.EnsureParagraph(chapterNum, sectionNum);
-                            if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
-                            {
-                                paragraph.Text += TextProcess(midashi);
-                                document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph());
-
-                                foreach (var splitText in _splitBraceService.SplitBrace(TextProcess(midashi)))
+                                else
                                 {
-                                    if (document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph1)
+                                    if (chapterNum == -1)
                                     {
-                                        paragraph1.Text += splitText;
+                                        if (chapterExist)
+                                        {
+                                            document.Chapters.Insert(0, new Chapter());
+                                        }
+                                        chapterNum++;
+                                        sectionNum = -1;
                                     }
-                                    document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph());
-                                }
-                            }
-                        }
-                    }
-                    else
-                    {
-                        if (element.ClassName == "caption")
-                        {
-                            // https://www.aozora.gr.jp/annotation/graphics.html#:~:text=%3Cdiv%20class%3D%22caption%22%3E を処理するための部分
-                            document.EnsureParagraph(chapterNum, sectionNum);
-                            var focusElements = document.Chapters[chapterNum].Sections[sectionNum].Elements;
-                            if (focusElements[^1] is Paragraph paragraph)
-                            {
-                                var splitted = _splitBraceService.SplitBrace(TextProcess(element));
-                                var first = true;
-
-                                foreach (var text in splitted)
-                                {
-                                    if (first)
+                                    if (sectionNum == -1)
                                     {
-                                        paragraph.Text += text;
-                                        first = false;
+                                        if (sectionExist)
+                                        {
+                                            document.EnsureChapter();
+                                            document.Chapters[^1].Sections.Insert(0, new Section("___"));
+                                        }
+                                        sectionNum++;
                                     }
-                                    else
-                                        focusElements.Add(new Paragraph() { Text = text });
+                                    document.EnsureParagraph(chapterNum, sectionNum);
+                                    AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, element, true);
                                 }
                             }
+
+                            break;
                         }
-                        else
+
+                    case TagNames.Img:
                         {
+                            var img = (IHtmlImageElement)element;
+
                             if (chapterNum == -1)
                             {
                                 if (chapterExist)
@@ -231,221 +191,115 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
                                 }
                                 sectionNum++;
                             }
-                            document.EnsureParagraph(chapterNum, sectionNum);
-                            if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
+
+                            if (element.ClassName != "gaiji")
                             {
-                                foreach (var splitText in _splitBraceService.SplitBrace(TextProcess(element)))
+                                if (img.Source != null)
                                 {
-                                    if (document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph1)
+                                    // 画像のダウンロード 
+                                    var filePass = Path.Combine(imageDirectory, FileUrlToFileName().Replace(img.Source, "$1"));
+                                    await _scrapingClientService.DownloadToFileAsync(img.Source, filePass, ct).ConfigureAwait(false);
+                                    document.EnsureSection(chapterNum);
+                                    if (document.Chapters[chapterNum].Sections[sectionNum].Elements.Count > 1)
                                     {
-                                        paragraph1.Text += splitText;
+                                        document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Picture(filePass));
                                     }
-                                    document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph());
+                                }
+                                if (img.AlternativeText != null)
+                                {
+                                    document.EnsureParagraph(chapterNum, sectionNum);
+                                    if (document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)
+                                    {
+                                        paragraph.Text += TextReplace(img.AlternativeText);
+                                        document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph());
+                                    }
+                                    skipCaption = false;
+                                }
+                                else
+                                {
+                                    skipCaption = true;
                                 }
                             }
-                        }
-                    }
-                }
-                else if (element.TagName == "IMG")
-                {
-                    if (element is IHtmlImageElement img)
-                    {
-                        if (chapterNum == -1)
-                        {
-                            if (chapterExist)
-                            {
-                                document.Chapters.Insert(0, new Chapter());
-                            }
-                            chapterNum++;
-                            sectionNum = -1;
-                        }
-                        if (sectionNum == -1)
-                        {
-                            if (sectionExist)
-                            {
-                                document.EnsureChapter();
-                                document.Chapters[^1].Sections.Insert(0, new Section("___"));
-                            }
-                            sectionNum++;
+
+                            break;
                         }
 
-                        if (element.ClassName != "gaiji")
+                    case TagNames.Span:
                         {
-                            if (img.Source != null)
+                            if (element.ClassName == "caption")
                             {
-                                // 画像のダウンロード
-                                var loader = context.GetService<IDocumentLoader>();
-                                if (loader != null)
+                                if (skipCaption)
                                 {
-                                    var downloading = loader.FetchAsync(new DocumentRequest(new Url(img.Source)));
-                                    ct.Register(() => downloading.Cancel());
-                                    var response = await downloading.Task.ConfigureAwait(false);
-                                    using var ms = new MemoryStream();
-                                    await response.Content.CopyToAsync(ms, ct).ConfigureAwait(false);
-                                    var filePass = System.IO.Path.Combine(imageDirectory, FileUrlToFileName().Replace(img.Source, "$1"));
-                                    File.WriteAllBytes(filePass, ms.ToArray());
-                                    document.EnsureSection(chapterNum);
-                                    if (document.Chapters[chapterNum].Sections[sectionNum].Elements.Count > 1)
+                                    if (document.Chapters[chapterNum].Sections[sectionNum].Elements[^2] is Paragraph paragraph)
+                                    {
+                                        paragraph.Text = TextProcess(element) + "の画像";
+                                    }
+                                }
+                                else
+                                {
+                                    if (document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)
                                     {
-                                        document.Chapters[chapterNum].Sections[sectionNum].Elements.Insert(document.Chapters[chapterNum].Sections[sectionNum].Elements.Count - 1, new Picture(filePass));
+                                        paragraph.Text = TextProcess(element) + "の画像";
                                     }
                                 }
                             }
-                            if (img.AlternativeText != null)
+                            else if (element.ClassName == "notes")
                             {
-                                document.EnsureParagraph(chapterNum, sectionNum);
-                                if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
+                                switch (element.InnerHtml)
                                 {
-                                    paragraph.Text += TextReplace(img.AlternativeText);
-                                    document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph());
+                                    case "［＃改丁］":
+                                    case "［＃改ページ］":
+                                    case "［＃改見開き］":
+                                    case "［＃改段］":
+                                    case "［＃ページの左右中央］":
+                                        break;
+                                    default:
+                                        document.EnsureParagraph(chapterNum, sectionNum);
+                                        if (document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)
+                                        {
+                                            foreach (var splitText in _splitBraceService.SplitBrace(TextProcess(element)))
+                                            {
+                                                if (document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph1)
+                                                {
+                                                    paragraph1.Text += splitText;
+                                                }
+                                                document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph());
+                                            }
+                                        }
+                                        break;
                                 }
-                                skipCaption = false;
                             }
                             else
                             {
-                                skipCaption = true;
-                            }
-                        }
-                    }
-                }
-                else if (element.TagName == "SPAN")
-                {
-                    if (element.ClassName == "caption")
-                    {
-                        if (skipCaption)
-                        {
-                            if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^2] is Paragraph paragraph))
-                            {
-                                paragraph.Text = TextProcess(element) + "の画像";
-                            }
-                        }
-                        else
-                        {
-                            if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
-                            {
-                                paragraph.Text = TextProcess(element) + "の画像";
-                            }
-                        }
-                    }
-                    else if (element.ClassName == "notes")
-                    {
-                        switch (element.InnerHtml)
-                        {
-                            case "［＃改丁］":
-                                break;
-                            case "［＃改ページ］":
-                                break;
-                            case "［＃改見開き］":
-                                break;
-                            case "［＃改段］":
-                                break;
-                            case "［＃ページの左右中央］":
-                                break;
-                            default:
-                                document.EnsureParagraph(chapterNum, sectionNum);
-                                if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
+                                if (chapterNum == -1)
                                 {
-                                    foreach (var splitText in _splitBraceService.SplitBrace(TextProcess(element)))
+                                    if (chapterExist)
                                     {
-                                        if (document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph1)
-                                        {
-                                            paragraph1.Text += splitText;
-                                        }
-                                        document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph());
+                                        document.Chapters.Insert(0, new Chapter());
                                     }
+                                    chapterNum++;
+                                    sectionNum = -1;
                                 }
-                                break;
-                        }
-                    }
-                    else
-                    {
-                        if (chapterNum == -1)
-                        {
-                            if (chapterExist)
-                            {
-                                document.Chapters.Insert(0, new Chapter());
-                            }
-                            chapterNum++;
-                            sectionNum = -1;
-                        }
-                        if (sectionNum == -1)
-                        {
-                            if (sectionExist)
-                            {
-                                document.EnsureChapter();
-                                document.Chapters[^1].Sections.Insert(0, new Section("___"));
-                            }
-                            sectionNum++;
-                        }
-
-                        document.EnsureParagraph(chapterNum, sectionNum);
-                        var focusElements = document.Chapters[chapterNum].Sections[sectionNum].Elements;
-                        if (focusElements[^1] is Paragraph paragraph)
-                        {
-                            var splitted = _splitBraceService.SplitBrace(TextProcess(element));
-                            var first = true;
-                            foreach (var text in splitted)
-                            {
-                                if (first)
+                                if (sectionNum == -1)
                                 {
-                                    paragraph.Text += text;
-                                    first = false;
+                                    if (sectionExist)
+                                    {
+                                        document.EnsureChapter();
+                                        document.Chapters[^1].Sections.Insert(0, new Section("___"));
+                                    }
+                                    sectionNum++;
                                 }
-                                else
-                                    focusElements.Add(new Paragraph { Text = text });
-                            }
-                        }
-                        // 想定していない構造が見つかったことをログに出力した方が良い？
-                    }
-                }
-                else
-                {
-                    if (chapterNum == -1)
-                    {
-                        if (chapterExist)
-                        {
-                            document.Chapters.Insert(0, new Chapter());
-                        }
-                        chapterNum++;
-                        sectionNum = -1;
-                    }
-                    if (sectionNum == -1)
-                    {
-                        if (sectionExist)
-                        {
-                            document.EnsureChapter();
-                            document.Chapters[^1].Sections.Insert(0, new Section("___"));
-                        }
-                        sectionNum++;
-                    }
-                    document.EnsureParagraph(chapterNum, sectionNum);
-                    var focusElements = document.Chapters[chapterNum].Sections[sectionNum].Elements;
-                    if (focusElements[^1] is Paragraph paragraph)
-                    {
-                        var splitted = _splitBraceService.SplitBrace(TextProcess(element));
-                        var first = true;
-                        foreach (var text in splitted)
-                        {
-                            if (first)
-                            {
-                                paragraph.Text += text;
-                                first = false;
+
+                                document.EnsureParagraph(chapterNum, sectionNum);
+                                AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, element, false);
+                                // 想定していない構造が見つかったことをログに出力した方が良い？
                             }
-                            else
-                                focusElements.Add(new Paragraph { Text = text });
+
+                            break;
                         }
-                    }
-                    // 想定していない構造が見つかったことをログに出力した方が良い？
-                }
 
-                if (nextNode != null)
-                {
-                    if (nextNode.NodeType == NodeType.Text)
-                    {
-                        if (!string.IsNullOrWhiteSpace(nextNode.Text()))
+                    default:
                         {
-                            previous = true;
-
                             if (chapterNum == -1)
                             {
                                 if (chapterExist)
@@ -465,33 +319,53 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
                                 sectionNum++;
                             }
                             document.EnsureParagraph(chapterNum, sectionNum);
-                            var focusElements = document.Chapters[chapterNum].Sections[sectionNum].Elements;
-                            if (focusElements[^1] is Paragraph paragraph)
+
+                            AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, element, false);
+                            break;
+                            // 想定していない構造が見つかったことをログに出力した方が良い？
+                        }
+                }
+
+                if (nextNode is null)
+                    continue;
+
+                if (nextNode.NodeType == NodeType.Text)
+                {
+                    var text = nextNode.Text();
+                    if (!string.IsNullOrWhiteSpace(text))
+                    {
+                        previous = true;
+
+                        if (chapterNum == -1)
+                        {
+                            if (chapterExist)
                             {
-                                var splitted = _splitBraceService.SplitBrace(TextReplace(nextNode.Text()));
-                                var first = true;
-                                foreach (var text in splitted)
-                                {
-                                    if (first)
-                                    {
-                                        paragraph.Text += text;
-                                        first = false;
-                                    }
-                                    else
-                                        focusElements.Add(new Paragraph { Text = text });
-                                }
+                                document.Chapters.Insert(0, new Chapter());
                             }
+                            chapterNum++;
+                            sectionNum = -1;
                         }
-                        else
+                        if (sectionNum == -1)
                         {
-                            previous = false;
+                            if (sectionExist)
+                            {
+                                document.EnsureChapter();
+                                document.Chapters[^1].Sections.Insert(0, new Section("___"));
+                            }
+                            sectionNum++;
                         }
+                        document.EnsureParagraph(chapterNum, sectionNum);
+                        AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, text, false);
                     }
                     else
                     {
                         previous = false;
                     }
                 }
+                else
+                {
+                    previous = false;
+                }
             }
 
             // 末尾の空のparagraphを削除
@@ -500,85 +374,130 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
             return document;
         }
 
-
         private static string TextProcess(IElement element)
         {
-            string text = "";
             if (element.ChildElementCount == 0)
             {
-                text += TextReplace(element.InnerHtml);
+                return TextReplace(element.InnerHtml);
             }
             else
             {
                 var rubies = element.QuerySelectorAll("ruby");
                 if (rubies.Length > 0)
                 {
+                    var resultBuilder = new StringBuilder();
                     if (element.Children[0].PreviousSibling is INode node)
                     {
                         if (node.NodeType == NodeType.Text)
                         {
                             if (!string.IsNullOrWhiteSpace(node.Text()))
                             {
-                                text += TextReplace(node.Text());
+                                resultBuilder.Append(TextReplace(node.Text()));
                             }
                         }
                     }
+
                     foreach (var item in element.Children)
                     {
                         if (item.TagName == "RUBY")
                         {
                             if (item.QuerySelectorAll("img").Length > 0)
                             {
-                                if (item.QuerySelector("rt") != null)
+                                if (item.QuerySelector("rt") is { TextContent: var text })
                                 {
-                                    text += TextReplace(item.QuerySelector("rt")!.TextContent);
+                                    resultBuilder.Append(TextReplace(text));
                                 }
                             }
                             else
                             {
-                                text += TextReplace(item.OuterHtml);
+                                resultBuilder.Append(TextReplace(item.OuterHtml));
                             }
                         }
                         else
                         {
                             if (!string.IsNullOrWhiteSpace(item.TextContent) && (!string.IsNullOrEmpty(item.TextContent)))
                             {
-                                text += TextReplace(item.TextContent);
+                                resultBuilder.Append(TextReplace(item.TextContent));
                             }
                         }
                         if (item.NextSibling != null)
                         {
                             if (!string.IsNullOrWhiteSpace(item.NextSibling.TextContent) && (!string.IsNullOrEmpty(item.NextSibling.TextContent)))
                             {
-                                text += TextReplace(item.NextSibling.Text());
+                                resultBuilder.Append(TextReplace(item.NextSibling.Text()));
                             }
                         }
                     }
+                    return resultBuilder.ToString();
                 }
                 else if (element.TagName == "RUBY")
                 {
                     if (element.QuerySelectorAll("img").Length > 0)
                     {
-                        if (element.QuerySelector("rt") != null)
-                        {
-                            text += TextReplace(element.QuerySelector("rt")!.TextContent);
-                        }
+                        if (element.QuerySelector("rt") is { TextContent: var text })
+                            return TextReplace(text);
+                        else
+                            return "";
                     }
                     else
                     {
-                        text += TextReplace(element.OuterHtml);
+                        return TextReplace(element.OuterHtml);
                     }
                 }
                 else
                 {
-                    text += TextReplace(element.TextContent);
+                    return TextReplace(element.TextContent);
                 }
             }
-            return text;
         }
 
+        private void AddParagraphs(List<Models.Element> focusElements, IElement element, bool lastEmpty)
+        {
+            if (focusElements[^1] is Paragraph paragraph)
+            {
+                var splitted = _splitBraceService.SplitBrace(TextProcess(element));
+                var first = true;
+                foreach (var text in splitted)
+                {
+                    if (first)
+                    {
+                        paragraph.Text += text;
+                        first = false;
+                    }
+                    else
+                        focusElements.Add(new Paragraph { Text = text });
+                }
+
+                if (lastEmpty)
+                    focusElements.Add(new Paragraph());
+            }
+        }
 
-        // ローマ数字、改行の置換をまとめて行う。
+        private void AddParagraphs(List<Models.Element> focusElements, string input, bool lastEmpty)
+        {
+            if (focusElements[^1] is Paragraph paragraph)
+            {
+                var splitted = _splitBraceService.SplitBrace(TextReplace(input));
+                var first = true;
+                foreach (var text in splitted)
+                {
+                    if (first)
+                    {
+                        paragraph.Text += text;
+                        first = false;
+                    }
+                    else
+                        focusElements.Add(new Paragraph { Text = text });
+                }
+
+                if (lastEmpty)
+                    focusElements.Add(new Paragraph());
+            }
+        }
+
+        /// <summary>
+        /// ローマ数字、改行の置換をまとめて行う。
+        /// </summary>
         private static string TextReplace(string text)
         {
             string returnText = text;
@@ -589,6 +508,60 @@ private static string TextReplace(string text)
             return returnText;
         }
 
+        /// <summary>
+        /// 目次からEpubDocuemntを構成します
+        /// </summary>
+        /// <returns>
+        /// <list type="bullet">
+        /// <item>contentsIds: 見出しIDの数字部分。※EpubDocumentのChapter, Sectionとは一致しません</item>
+        /// <item>Chapterが存在するとき</item>
+        /// <item>Sectionが存在するとき</item>
+        /// </list>
+        /// </returns>
+        private static (List<int> contentsIds, bool hasChapter, bool hasSection) LoadToc(IDocument doc, EpubDocument epubDocument)
+        {
+            // 目次を取得
+            var contents = doc.QuerySelectorAll(".midashi_anchor");
+
+            // 目次からEpubDocumentを構成
+            var contentsIds = new List<int>() { 0 };
+            // Chapter, Section が存在するとき、それぞれtrue
+            var hasChapter = false;
+            var hasSection = false;
+            if (contents.Length != 0)
+            {
+                int previousMidashiId = 0;
+                foreach (var midashi in contents)
+                {
+                    if (midashi.Id != null)
+                    {
+                        var midashiId = int.Parse(midashi.Id.Replace("midashi", ""));
+                        if ((midashiId - previousMidashiId) == 100)
+                        {
+                            epubDocument.Chapters.Add(new Chapter() { Title = TextProcess(midashi) });
+                            hasChapter = true;
+                        }
+                        else if ((midashiId - previousMidashiId) == 10)
+                        {
+                            epubDocument.EnsureChapter();
+                            epubDocument.Chapters[^1].Sections.Add(new Section(TextProcess(midashi)));
+                            hasSection = true;
+                        }
+                        contentsIds.Add(midashiId);
+                        previousMidashiId = midashiId;
+                    }
+                }
+            }
+            else
+            {
+                epubDocument.Chapters.Add(new Chapter()
+                {
+                    Title = null,
+                    Sections = [new Section(epubDocument.Title)]
+                });
+            }
+            return (contentsIds, hasChapter, hasSection);
+        }
 
         private static string GetCardUrl(string url)
         {
diff --git a/Epub/KoeBook.Epub/Services/ScrapingNaroService.cs b/Epub/KoeBook.Epub/Services/ScrapingNaroService.cs
index 2fd47d0..6741549 100644
--- a/Epub/KoeBook.Epub/Services/ScrapingNaroService.cs
+++ b/Epub/KoeBook.Epub/Services/ScrapingNaroService.cs
@@ -131,7 +131,7 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
                 {
                     switch (child)
                     {
-                        case { TagName: TagNames.Anchor, Children: [IHtmlImageElement img] } when img.Source is not null:
+                        case { TagName: TagNames.A, Children: [IHtmlImageElement img] } when img.Source is not null:
                             {
                                 // 画像のダウンロード
                                 var filePath = Path.Combine(imageDirectory, new Uri(img.Source, Options.RawUri).Segments[^1].TrimEnd('/'));
@@ -143,7 +143,7 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
                             if (!string.IsNullOrWhiteSpace(item.InnerHtml))
                                 lineBuilder.Append(item.InnerHtml);
                             break;
-                        case { TagName: TagNames.BreakRow }:
+                        case { TagName: TagNames.Br }:
                             foreach (var split in _splitBraceService.SplitBrace(lineBuilder.ToLinesAndClear()))
                             {
                                 section.Elements.Add(new Paragraph() { Text = split });
diff --git a/Epub/KoeBook.Epub/TagNames.cs b/Epub/KoeBook.Epub/TagNames.cs
index e98e4c0..400d52c 100644
--- a/Epub/KoeBook.Epub/TagNames.cs
+++ b/Epub/KoeBook.Epub/TagNames.cs
@@ -1,15 +1,12 @@
-﻿using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-
-namespace KoeBook.Epub
+﻿namespace KoeBook.Epub
 {
     internal static class TagNames
     {
-        public const string Anchor = "A";
+        public const string A = "A";
+        public const string Br = "BR";
+        public const string Div = "Div";
+        public const string Img = "IMG";
         public const string Ruby = "RUBY";
-        public const string BreakRow = "BR";
+        public const string Span = "SPAN";
     }
 }
diff --git a/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs b/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs
new file mode 100644
index 0000000..6cc1054
--- /dev/null
+++ b/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs
@@ -0,0 +1,30 @@
+﻿using System.Runtime.CompilerServices;
+using AngleSharp;
+using AngleSharp.Dom;
+using KoeBook.Epub.Services;
+
+namespace KoeBook.Test.Epub;
+
+public class ScrapingAozoraServiceTest
+{
+    [Theory]
+    [InlineData("", "")]
+    public async Task TextProcess(string input, string expected)
+    {
+        using var context = BrowsingContext.New(Configuration.Default.WithDefaultLoader());
+        using var doc = await context.OpenAsync(req => req.Content(input));
+
+        Assert.NotNull(doc.ParentElement);
+        var result = ScrapingAozora.TextProcess(doc.ParentElement!);
+
+        Assert.Equal(expected, result);
+    }
+}
+
+file static class ScrapingAozora
+{
+    [UnsafeAccessor(UnsafeAccessorKind.StaticMethod)]
+    private static extern string TextProcess(ScrapingAozoraService? _, IElement element);
+
+    public static string TextProcess(IElement element) => TextProcess(null, element);
+}

From 7d7702d2301794f3edc45b6413c5cb462305b341 Mon Sep 17 00:00:00 2001
From: miyaji255 <84168445+miyaji255@users.noreply.github.com>
Date: Wed, 3 Apr 2024 21:12:48 +0900
Subject: [PATCH 2/6] =?UTF-8?q?#1-3=20=E9=9D=92=E7=A9=BA=E6=96=87=E5=BA=AB?=
 =?UTF-8?q?Service=E3=81=AE=E3=83=AA=E3=83=95=E3=82=A1=E3=82=AF=E3=82=BF?=
 =?UTF-8?q?=E3=83=AA=E3=83=B3=E3=82=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../Services/ScrapingAozoraService.cs         | 386 ++++++------------
 KoeBook.Core/Utilities/EnumerableEx.cs        |   5 +
 2 files changed, 139 insertions(+), 252 deletions(-)

diff --git a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
index 56b8d06..6d9e8c9 100644
--- a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
+++ b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
@@ -6,6 +6,7 @@
 using AngleSharp.Html.Dom;
 using AngleSharp.Io;
 using KoeBook.Core;
+using KoeBook.Core.Utilities;
 using KoeBook.Epub.Contracts.Services;
 using KoeBook.Epub.Models;
 using Microsoft.Extensions.DependencyInjection;
@@ -41,7 +42,7 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
             // EpubDocument の生成
             var document = new EpubDocument(TextReplace(bookTitle.InnerHtml), TextReplace(bookAuther.InnerHtml), coverFilePath, id);
 
-            var (contentsIds, chapterExist, sectionExist) = LoadToc(doc, document);
+            var (contentsIds, hasChapter, hasSection) = LoadToc(doc, document);
 
             // 本文を取得
             var mainText = doc.QuerySelector(".main_text")!;
@@ -49,13 +50,13 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
 
             // 本文を分割しながらEpubDocumntに格納
             // 直前のNodeを確認した操作で、その内容をParagraphに追加した場合、true
-            bool previous = false;
+            var previous = false;
             // 各ChapterとSection のインデックス
             var chapterNum = -1;
             var sectionNum = -1;
 
             // 直前のimgタグにaltがなかったときtrueになる。
-            bool skipCaption = false;
+            var skipCaption = false;
 
             foreach (var element in mainText.Children)
             {
@@ -70,306 +71,161 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
                         }
                         break;
                     case TagNames.Div:
+                        var midashi = element.QuerySelector(".midashi_anchor");
+                        if (midashi != null)
                         {
-                            var midashi = element.QuerySelector(".midashi_anchor");
-                            if (midashi != null)
-                            {
-                                if (midashi.Id == null)
-                                    throw new EbookException(ExceptionType.WebScrapingFailed, "予期しないHTMLの構造です。\nclass=\"midashi_anchor\"ではなくid=\"midashi___\"が存在します。");
+                            if (midashi.Id == null)
+                                throw new EbookException(ExceptionType.WebScrapingFailed, "予期しないHTMLの構造です。\nclass=\"midashi_anchor\"ではなくid=\"midashi___\"が存在します。");
 
-                                if (!int.TryParse(midashi.Id.Replace("midashi", ""), out var midashiId))
-                                    throw new EbookException(ExceptionType.WebScrapingFailed, $"予期しないアンカータグが見つかりました。id = {midashi.Id}");
+                            if (!int.TryParse(midashi.Id.Replace("midashi", ""), out var midashiId))
+                                throw new EbookException(ExceptionType.WebScrapingFailed, $"予期しないアンカータグが見つかりました。id = {midashi.Id}");
 
-                                if (contentsIds.Contains(midashiId))
-                                {
-                                    var contentsId = contentsIds.IndexOf(midashiId);
-                                    switch (contentsIds[contentsId] - contentsIds[contentsId - 1])
-                                    {
-                                        case 100:
-                                            if (chapterNum >= 0 && sectionNum >= 0)
-                                            {
-                                                document.Chapters[chapterNum].Sections[sectionNum].Elements.RemoveAt(document.Chapters[chapterNum].Sections[sectionNum].Elements.Count - 1);
-                                            }
-                                            chapterNum++;
-                                            sectionNum = -1;
-                                            break;
-                                        case 10:
-                                            if (chapterNum == -1)
-                                            {
-                                                chapterNum++;
-                                                sectionNum = -1;
-                                            }
-                                            if (chapterNum >= 0 && sectionNum >= 0)
-                                            {
-                                                document.Chapters[chapterNum].Sections[sectionNum].Elements.RemoveAt(document.Chapters[chapterNum].Sections[sectionNum].Elements.Count - 1);
-                                            }
-                                            sectionNum++;
-                                            break;
-                                        default:
-                                            break;
-                                    }
-                                }
-                                else //小見出し、行中小見出しの処理
+                            if (contentsIds.Contains(midashiId))
+                            {
+                                var contentsId = contentsIds.IndexOf(midashiId);
+                                switch (contentsIds[contentsId] - contentsIds[contentsId - 1])
                                 {
-                                    if (chapterNum == -1)
-                                    {
-                                        if (chapterExist)
+                                    case 100:
+                                        if (chapterNum >= 0 && sectionNum >= 0)
                                         {
-                                            document.Chapters.Insert(0, new Chapter());
+                                            document.Chapters[chapterNum].Sections[sectionNum].Elements.RemoveAt(^1);
                                         }
                                         chapterNum++;
                                         sectionNum = -1;
-                                    }
-                                    if (sectionNum == -1)
-                                    {
-                                        if (sectionExist)
+                                        break;
+                                    case 10:
+                                        if (chapterNum == -1)
+                                        {
+                                            chapterNum++;
+                                            sectionNum = -1;
+                                        }
+                                        if (chapterNum >= 0 && sectionNum >= 0)
                                         {
-                                            document.EnsureChapter();
-                                            document.Chapters[^1].Sections.Insert(0, new Section("___"));
+                                            document.Chapters[chapterNum].Sections[sectionNum].Elements.RemoveAt(^1);
                                         }
                                         sectionNum++;
-                                    }
-                                    document.EnsureParagraph(chapterNum, sectionNum);
-                                    AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, element, true);
+                                        break;
+                                    default:
+                                        break;
                                 }
                             }
+                            else //小見出し、行中小見出しの処理
+                            {
+                                (chapterNum, sectionNum) = SetChapterAndSection(document, hasChapter, hasSection, chapterNum, sectionNum);
+                                document.EnsureParagraph(chapterNum, sectionNum);
+                                AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, element, true);
+                            }
+                        }
+                        else
+                        {
+                            if (element.ClassName == "caption")
+                            {
+                                // https://www.aozora.gr.jp/annotation/graphics.html#:~:text=%3Cdiv%20class%3D%22caption%22%3E を処理するための部分
+                                document.EnsureParagraph(chapterNum, sectionNum);
+                                AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, element, false);
+                            }
                             else
                             {
-                                if (element.ClassName == "caption")
-                                {
-                                    // https://www.aozora.gr.jp/annotation/graphics.html#:~:text=%3Cdiv%20class%3D%22caption%22%3E を処理するための部分
-                                    document.EnsureParagraph(chapterNum, sectionNum);
-                                    AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, element, false);
-                                }
-                                else
-                                {
-                                    if (chapterNum == -1)
-                                    {
-                                        if (chapterExist)
-                                        {
-                                            document.Chapters.Insert(0, new Chapter());
-                                        }
-                                        chapterNum++;
-                                        sectionNum = -1;
-                                    }
-                                    if (sectionNum == -1)
-                                    {
-                                        if (sectionExist)
-                                        {
-                                            document.EnsureChapter();
-                                            document.Chapters[^1].Sections.Insert(0, new Section("___"));
-                                        }
-                                        sectionNum++;
-                                    }
-                                    document.EnsureParagraph(chapterNum, sectionNum);
-                                    AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, element, true);
-                                }
+                                (chapterNum, sectionNum) = SetChapterAndSection(document, hasChapter, hasSection, chapterNum, sectionNum);
+                                document.EnsureParagraph(chapterNum, sectionNum);
+                                AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, element, true);
                             }
-
-                            break;
                         }
 
+                        break;
                     case TagNames.Img:
                         {
                             var img = (IHtmlImageElement)element;
 
-                            if (chapterNum == -1)
+                            (chapterNum, sectionNum) = SetChapterAndSection(document, hasChapter, hasSection, chapterNum, sectionNum);
+
+                            if (element.ClassName == "gaiji")
+                                break;
+
+                            if (img.Source != null)
                             {
-                                if (chapterExist)
+                                // 画像のダウンロード 
+                                var filePass = Path.Combine(imageDirectory, FileUrlToFileName().Replace(img.Source, "$1"));
+                                await _scrapingClientService.DownloadToFileAsync(img.Source, filePass, ct).ConfigureAwait(false);
+                                document.EnsureSection(chapterNum);
+                                if (document.Chapters[chapterNum].Sections[sectionNum].Elements.Count > 1)
                                 {
-                                    document.Chapters.Insert(0, new Chapter());
+                                    document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Picture(filePass));
                                 }
-                                chapterNum++;
-                                sectionNum = -1;
                             }
-                            if (sectionNum == -1)
+
+                            if (img.AlternativeText is null)
                             {
-                                if (sectionExist)
-                                {
-                                    document.EnsureChapter();
-                                    document.Chapters[^1].Sections.Insert(0, new Section("___"));
-                                }
-                                sectionNum++;
+                                skipCaption = true;
+                                continue;
                             }
 
-                            if (element.ClassName != "gaiji")
+                            document.EnsureParagraph(chapterNum, sectionNum);
+                            if (document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)
                             {
-                                if (img.Source != null)
-                                {
-                                    // 画像のダウンロード 
-                                    var filePass = Path.Combine(imageDirectory, FileUrlToFileName().Replace(img.Source, "$1"));
-                                    await _scrapingClientService.DownloadToFileAsync(img.Source, filePass, ct).ConfigureAwait(false);
-                                    document.EnsureSection(chapterNum);
-                                    if (document.Chapters[chapterNum].Sections[sectionNum].Elements.Count > 1)
-                                    {
-                                        document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Picture(filePass));
-                                    }
-                                }
-                                if (img.AlternativeText != null)
-                                {
-                                    document.EnsureParagraph(chapterNum, sectionNum);
-                                    if (document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)
-                                    {
-                                        paragraph.Text += TextReplace(img.AlternativeText);
-                                        document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph());
-                                    }
-                                    skipCaption = false;
-                                }
-                                else
-                                {
-                                    skipCaption = true;
-                                }
+                                paragraph.Text += TextReplace(img.AlternativeText);
+                                document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph());
                             }
-
+                            skipCaption = false;
                             break;
                         }
-
                     case TagNames.Span:
+                        if (element.ClassName == "caption")
                         {
-                            if (element.ClassName == "caption")
-                            {
-                                if (skipCaption)
-                                {
-                                    if (document.Chapters[chapterNum].Sections[sectionNum].Elements[^2] is Paragraph paragraph)
-                                    {
-                                        paragraph.Text = TextProcess(element) + "の画像";
-                                    }
-                                }
-                                else
-                                {
-                                    if (document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)
-                                    {
-                                        paragraph.Text = TextProcess(element) + "の画像";
-                                    }
-                                }
-                            }
-                            else if (element.ClassName == "notes")
-                            {
-                                switch (element.InnerHtml)
-                                {
-                                    case "［＃改丁］":
-                                    case "［＃改ページ］":
-                                    case "［＃改見開き］":
-                                    case "［＃改段］":
-                                    case "［＃ページの左右中央］":
-                                        break;
-                                    default:
-                                        document.EnsureParagraph(chapterNum, sectionNum);
-                                        if (document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)
-                                        {
-                                            foreach (var splitText in _splitBraceService.SplitBrace(TextProcess(element)))
-                                            {
-                                                if (document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph1)
-                                                {
-                                                    paragraph1.Text += splitText;
-                                                }
-                                                document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph());
-                                            }
-                                        }
-                                        break;
-                                }
-                            }
-                            else
-                            {
-                                if (chapterNum == -1)
-                                {
-                                    if (chapterExist)
-                                    {
-                                        document.Chapters.Insert(0, new Chapter());
-                                    }
-                                    chapterNum++;
-                                    sectionNum = -1;
-                                }
-                                if (sectionNum == -1)
-                                {
-                                    if (sectionExist)
-                                    {
-                                        document.EnsureChapter();
-                                        document.Chapters[^1].Sections.Insert(0, new Section("___"));
-                                    }
-                                    sectionNum++;
-                                }
-
-                                document.EnsureParagraph(chapterNum, sectionNum);
-                                AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, element, false);
-                                // 想定していない構造が見つかったことをログに出力した方が良い？
-                            }
-
-                            break;
+                            if (document.Chapters[chapterNum].Sections[sectionNum].Elements[skipCaption ? ^2 : ^1] is Paragraph paragraph)
+                                paragraph.Text = TextProcess(element) + "の画像";
                         }
-
-                    default:
+                        else if (element.ClassName == "notes")
                         {
-                            if (chapterNum == -1)
-                            {
-                                if (chapterExist)
-                                {
-                                    document.Chapters.Insert(0, new Chapter());
-                                }
-                                chapterNum++;
-                                sectionNum = -1;
-                            }
-                            if (sectionNum == -1)
+                            switch (element.InnerHtml)
                             {
-                                if (sectionExist)
-                                {
-                                    document.EnsureChapter();
-                                    document.Chapters[^1].Sections.Insert(0, new Section("___"));
-                                }
-                                sectionNum++;
+                                case "［＃改丁］":
+                                case "［＃改ページ］":
+                                case "［＃改見開き］":
+                                case "［＃改段］":
+                                case "［＃ページの左右中央］":
+                                    break;
+                                default:
+                                    document.EnsureParagraph(chapterNum, sectionNum);
+                                    AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, element, true);
+                                    break;
                             }
+                        }
+                        else
+                        {
+                            (chapterNum, sectionNum) = SetChapterAndSection(document, hasChapter, hasSection, chapterNum, sectionNum);
                             document.EnsureParagraph(chapterNum, sectionNum);
-
                             AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, element, false);
-                            break;
                             // 想定していない構造が見つかったことをログに出力した方が良い？
                         }
+
+                        break;
+                    default:
+                        (chapterNum, sectionNum) = SetChapterAndSection(document, hasChapter, hasSection, chapterNum, sectionNum);
+                        document.EnsureParagraph(chapterNum, sectionNum);
+                        AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, element, false);
+                        break;
+                        // 想定していない構造が見つかったことをログに出力した方が良い？
                 }
 
                 if (nextNode is null)
                     continue;
 
-                if (nextNode.NodeType == NodeType.Text)
-                {
-                    var text = nextNode.Text();
-                    if (!string.IsNullOrWhiteSpace(text))
-                    {
-                        previous = true;
-
-                        if (chapterNum == -1)
-                        {
-                            if (chapterExist)
-                            {
-                                document.Chapters.Insert(0, new Chapter());
-                            }
-                            chapterNum++;
-                            sectionNum = -1;
-                        }
-                        if (sectionNum == -1)
-                        {
-                            if (sectionExist)
-                            {
-                                document.EnsureChapter();
-                                document.Chapters[^1].Sections.Insert(0, new Section("___"));
-                            }
-                            sectionNum++;
-                        }
-                        document.EnsureParagraph(chapterNum, sectionNum);
-                        AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, text, false);
-                    }
-                    else
-                    {
-                        previous = false;
-                    }
-                }
-                else
+                if (nextNode.NodeType != NodeType.Text || string.IsNullOrWhiteSpace(nextNode.TextContent))
                 {
                     previous = false;
+                    continue;
                 }
+
+                previous = true;
+
+                (chapterNum, sectionNum) = SetChapterAndSection(document, hasChapter, hasSection, chapterNum, sectionNum);
+                document.EnsureParagraph(chapterNum, sectionNum);
+                AddParagraphs(document.Chapters[chapterNum].Sections[sectionNum].Elements, nextNode.TextContent, false);
             }
 
             // 末尾の空のparagraphを削除
-            document.Chapters[^1].Sections[^1].Elements.RemoveAt(document.Chapters[^1].Sections[^1].Elements.Count - 1);
+            document.Chapters[^1].Sections[^1].Elements.RemoveAt(^1);
 
             return document;
         }
@@ -382,7 +238,7 @@ private static string TextProcess(IElement element)
             }
             else
             {
-                var rubies = element.QuerySelectorAll("ruby");
+                var rubies = element.QuerySelectorAll(TagNames.Ruby);
                 if (rubies.Length > 0)
                 {
                     var resultBuilder = new StringBuilder();
@@ -399,7 +255,7 @@ private static string TextProcess(IElement element)
 
                     foreach (var item in element.Children)
                     {
-                        if (item.TagName == "RUBY")
+                        if (item.TagName == TagNames.Ruby)
                         {
                             if (item.QuerySelectorAll("img").Length > 0)
                             {
@@ -430,7 +286,7 @@ private static string TextProcess(IElement element)
                     }
                     return resultBuilder.ToString();
                 }
-                else if (element.TagName == "RUBY")
+                else if (element.TagName == TagNames.Ruby)
                 {
                     if (element.QuerySelectorAll("img").Length > 0)
                     {
@@ -563,6 +419,32 @@ private static (List<int> contentsIds, bool hasChapter, bool hasSection) LoadToc
             return (contentsIds, hasChapter, hasSection);
         }
 
+        /// <summary>
+        /// 新規状態のときに初期設定を行います
+        /// </summary>
+        private static (int focusChapterIdx, int focusSectionIdx) SetChapterAndSection(EpubDocument document, bool hasChapter, bool hasSection, int chapterNum, int sectionNum)
+        {
+            if (chapterNum == -1)
+            {
+                if (hasChapter)
+                {
+                    document.Chapters.Insert(0, new Chapter());
+                }
+                chapterNum++;
+                sectionNum = -1;
+            }
+            if (sectionNum == -1)
+            {
+                if (hasSection)
+                {
+                    document.EnsureChapter();
+                    document.Chapters[^1].Sections.Insert(0, new Section("___"));
+                }
+                sectionNum++;
+            }
+            return (chapterNum, sectionNum);
+        }
+
         private static string GetCardUrl(string url)
         {
             return UrlBookToCard().Replace(url, "$1card$2$3");
diff --git a/KoeBook.Core/Utilities/EnumerableEx.cs b/KoeBook.Core/Utilities/EnumerableEx.cs
index 4b1ce37..eab16f2 100644
--- a/KoeBook.Core/Utilities/EnumerableEx.cs
+++ b/KoeBook.Core/Utilities/EnumerableEx.cs
@@ -20,4 +20,9 @@ public static class EnumerableEx
             yield return (current, false, !hasNext);
         }
     }
+
+    public static void RemoveAt<T>(this List<T> list, Index index)
+    {
+        list.RemoveAt(index.GetOffset(list.Count));
+    }
 }

From 9a57021815ddd0ea211df34694760db88fb06d35 Mon Sep 17 00:00:00 2001
From: miyaji255 <84168445+miyaji255@users.noreply.github.com>
Date: Wed, 3 Apr 2024 21:27:42 +0900
Subject: [PATCH 3/6] =?UTF-8?q?#1-3=20=E3=83=86=E3=82=B9=E3=83=88=E3=82=92?=
 =?UTF-8?q?=E8=BF=BD=E5=8A=A0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../Epub/ScrapingAozoraServiceTest.cs         | 42 ++++++++++++++++---
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs b/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs
index 6cc1054..f455cdd 100644
--- a/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs
+++ b/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs
@@ -1,6 +1,7 @@
 ﻿using System.Runtime.CompilerServices;
 using AngleSharp;
 using AngleSharp.Dom;
+using KoeBook.Epub.Models;
 using KoeBook.Epub.Services;
 
 namespace KoeBook.Test.Epub;
@@ -11,20 +12,51 @@ public class ScrapingAozoraServiceTest
     [InlineData("", "")]
     public async Task TextProcess(string input, string expected)
     {
-        using var context = BrowsingContext.New(Configuration.Default.WithDefaultLoader());
+        using var context = BrowsingContext.New(Configuration.Default);
         using var doc = await context.OpenAsync(req => req.Content(input));
-
         Assert.NotNull(doc.ParentElement);
-        var result = ScrapingAozora.TextProcess(doc.ParentElement!);
+
+        var result = ScrapingAozora.TextProcess(null, doc.ParentElement!);
 
         Assert.Equal(expected, result);
     }
+
+    [Theory]
+    [InlineData("", new[] { "" })]
+    public async Task AddParagraphs1(string input, string[] expected)
+    {
+        using var context = BrowsingContext.New(Configuration.Default);
+        using var doc = await context.OpenAsync(req => req.Content(input));
+        Assert.NotNull(doc.ParentElement);
+        var epubDocument = new EpubDocument("title", "author", "", default)
+        {
+            Chapters = [new() { Sections = [new("section title") { Elements = [new Paragraph() { Text = "test" }] }] }]
+        };
+
+        Assert.Equal(expected.Length, epubDocument.Chapters[0].Sections[0].Elements.Count);
+        Assert.All(epubDocument.Chapters[0].Sections[0].Elements.Zip(expected), v =>
+        {
+            var (element, expected) = v;
+            var paragraph = Assert.IsType<Paragraph>(element);
+            Assert.Equal(expected, paragraph.Text);
+        });
+    }
 }
 
 file static class ScrapingAozora
 {
     [UnsafeAccessor(UnsafeAccessorKind.StaticMethod)]
-    private static extern string TextProcess(ScrapingAozoraService? _, IElement element);
+    public static extern string TextProcess(ScrapingAozoraService? _, IElement element);
+
+    [UnsafeAccessor(UnsafeAccessorKind.Method)]
+    public static extern void AddParagraphs(ScrapingAozoraService service, List<KoeBook.Epub.Models.Element> focusElements, IElement element, bool lastEmpty);
+
+    [UnsafeAccessor(UnsafeAccessorKind.Method)]
+    public static extern void AddParagraphs(ScrapingAozoraService service, List<KoeBook.Epub.Models.Element> focusElements, string input, bool lastEmpty);
 
-    public static string TextProcess(IElement element) => TextProcess(null, element);
+    [UnsafeAccessor(UnsafeAccessorKind.StaticMethod)]
+    public static extern string TextReplace(ScrapingAozoraService? _, string text);
+
+    [UnsafeAccessor(UnsafeAccessorKind.StaticMethod)]
+    public static extern (List<int> contentsIds, bool hasChapter, bool hasSection) LoadToc(ScrapingAozoraService? _, IDocument doc, EpubDocument epubDocument);
 }

From 88d66f5dcf47dfea50e7d1ba04ce429a136e91e1 Mon Sep 17 00:00:00 2001
From: miyaji255 <84168445+miyaji255@users.noreply.github.com>
Date: Tue, 23 Apr 2024 22:22:45 +0900
Subject: [PATCH 4/6] =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=82=92?=
 =?UTF-8?q?=E9=99=A4=E5=8E=BB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs b/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs
index f455cdd..7a46dc5 100644
--- a/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs
+++ b/KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs
@@ -8,8 +8,8 @@ namespace KoeBook.Test.Epub;
 
 public class ScrapingAozoraServiceTest
 {
-    [Theory]
-    [InlineData("", "")]
+    //[Theory]
+    //[InlineData("", "")]
     public async Task TextProcess(string input, string expected)
     {
         using var context = BrowsingContext.New(Configuration.Default);
@@ -21,8 +21,8 @@ public async Task TextProcess(string input, string expected)
         Assert.Equal(expected, result);
     }
 
-    [Theory]
-    [InlineData("", new[] { "" })]
+    //[Theory]
+    //[InlineData("", new[] { "" })]
     public async Task AddParagraphs1(string input, string[] expected)
     {
         using var context = BrowsingContext.New(Configuration.Default);

From 1a1b3fcf97c978b122f9e9985fd15cc924216e3f Mon Sep 17 00:00:00 2001
From: miyaji255 <84168445+miyaji255@users.noreply.github.com>
Date: Tue, 23 Apr 2024 22:52:50 +0900
Subject: [PATCH 5/6] =?UTF-8?q?#1-3=20=E4=B8=8D=E8=A6=81=E3=81=AAusing?=
 =?UTF-8?q?=E3=82=92=E5=89=8A=E9=99=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
index 6d9e8c9..d3a9b59 100644
--- a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
+++ b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
@@ -1,10 +1,7 @@
-﻿using System.Reflection.Metadata;
-using System.Text;
-using System.Xml.Linq;
+﻿using System.Text;
 using AngleSharp;
 using AngleSharp.Dom;
 using AngleSharp.Html.Dom;
-using AngleSharp.Io;
 using KoeBook.Core;
 using KoeBook.Core.Utilities;
 using KoeBook.Epub.Contracts.Services;

From b301c03fb6a772f2f178fe064a65be534531da1a Mon Sep 17 00:00:00 2001
From: miyaji255 <84168445+miyaji255@users.noreply.github.com>
Date: Wed, 24 Apr 2024 00:01:17 +0900
Subject: [PATCH 6/6] =?UTF-8?q?=E9=9D=92=E7=A9=BA=E6=96=87=E5=BA=AB?=
 =?UTF-8?q?=E5=87=A6=E7=90=86=E3=81=AE=E5=88=86=E5=B2=90=E3=82=92=E4=BF=AE?=
 =?UTF-8?q?=E6=AD=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: TaPet <134124527+TakenPt@users.noreply.github.com>
---
 Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
index d3a9b59..297373d 100644
--- a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
+++ b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
@@ -60,7 +60,7 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
                 var nextNode = element.NextSibling;
                 switch (element.TagName)
                 {
-                    case TagNames.A:
+                    case TagNames.Br:
                         if (previous)
                         {
                             document.EnsureSection(chapterNum);