Skip to content

Commit

Permalink
#17-1 EpubDocumentExceptionの削除
Browse files Browse the repository at this point in the history
  • Loading branch information
aiueo-1234 committed Mar 15, 2024
1 parent fa3f28c commit 29394aa
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 30 deletions.
13 changes: 0 additions & 13 deletions Epub/KoeBook.Epub/EpubDocumentException.cs

This file was deleted.

9 changes: 5 additions & 4 deletions Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using AngleSharp.Io;
using KoeBook.Core;
using KoeBook.Epub.Contracts.Services;
using KoeBook.Epub.Models;
using static KoeBook.Epub.Utility.ScrapingHelper;
Expand Down Expand Up @@ -29,11 +30,11 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP

// title の取得
var bookTitle = doc.QuerySelector(".title")
?? throw new EpubDocumentException($"Failed to get title properly.\nYou may be able to get proper URL at {GetCardUrl(url)}");
?? throw new EbookException(ExceptionType.WebScrapingFailed, $"Failed to get title properly.\nYou may be able to get proper URL at {GetCardUrl(url)}");

// auther の取得
var bookAuther = doc.QuerySelector(".author")
?? throw new EpubDocumentException($"Failed to get auther properly.\nYou may be able to get proper URL at {GetCardUrl(url)}");
?? throw new EbookException(ExceptionType.WebScrapingFailed, $"Failed to get auther properly.\nYou may be able to get proper URL at {GetCardUrl(url)}");

// EpubDocument の生成
var document = new EpubDocument(TextReplace(bookTitle.InnerHtml), TextReplace(bookAuther.InnerHtml), coverFilePath, id)
Expand Down Expand Up @@ -111,10 +112,10 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
if (midashi != null)
{
if (midashi.Id == null)
throw new EpubDocumentException("Unecpected structure of HTML File: div tag with class=\"midashi_anchor\", but id=\"midashi___\" exist");
throw new EbookException(ExceptionType.WebScrapingFailed, "Unecpected structure of HTML File: div tag with class=\"midashi_anchor\", but id=\"midashi___\" exist");

if (!int.TryParse(midashi.Id.Replace("midashi", ""), out var midashiId))
throw new EpubDocumentException($"Unexpected id of Anchor tag was found: id = {midashi.Id}");
throw new EbookException(ExceptionType.WebScrapingFailed, $"Unexpected id of Anchor tag was found: id = {midashi.Id}");

if (contentsIds.Contains(midashiId))
{
Expand Down
27 changes: 14 additions & 13 deletions Epub/KoeBook.Epub/Services/ScrapingNaroService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using AngleSharp.Io;
using KoeBook.Core;
using KoeBook.Epub.Contracts.Services;
using KoeBook.Epub.Models;
using static KoeBook.Epub.Utility.ScrapingHelper;
Expand All @@ -26,12 +27,12 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP

// title の取得
var bookTitleElement = doc.QuerySelector(".novel_title")
?? throw new EpubDocumentException($"Failed to get title properly.\nUrl may be not collect");
?? throw new EbookException(ExceptionType.WebScrapingFailed, $"Failed to get title properly.\nUrl may be not collect");
var bookTitle = bookTitleElement.InnerHtml;

// auther の取得
var bookAutherElement = doc.QuerySelector(".novel_writername")
?? throw new EpubDocumentException($"Failed to get auther properly.\nUrl may be not collect");
?? throw new EbookException(ExceptionType.WebScrapingFailed, $"Failed to get auther properly.\nUrl may be not collect");
var bookAuther = string.Empty;
if (bookAutherElement.QuerySelector("a") is IHtmlAnchorElement bookAutherAnchorElement)
{
Expand All @@ -54,13 +55,13 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
var result = await client.SendAsync(message, ct).ConfigureAwait(false);
var test = await result.Content.ReadAsStringAsync(ct).ConfigureAwait(false);
if (!result.IsSuccessStatusCode)
throw new EpubDocumentException("Url may be not Correct");
throw new EbookException(ExceptionType.WebScrapingFailed, "Url may be not Correct");

var content = await result.Content.ReadFromJsonAsync<BookInfo[]>(ct).ConfigureAwait(false);
if (content != null)
{
if (content[1].noveltype == null)
throw new EpubDocumentException("faild to get data by Narou API");
throw new EbookException(ExceptionType.WebScrapingFailed, "faild to get data by Narou API");

if (content[1].noveltype == 2)
{
Expand All @@ -73,7 +74,7 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
}

if (allNum == 0)
throw new EpubDocumentException("faild to get data by Narou API");
throw new EbookException(ExceptionType.WebScrapingFailed, "faild to get data by Narou API");
}

var document = new EpubDocument(bookTitle, bookAuther, coverFilePath, id);
Expand All @@ -91,7 +92,7 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
foreach (var sectionWithChapterTitle in SectionWithChapterTitleList)
{
if (sectionWithChapterTitle == null)
throw new EpubDocumentException("failed to get page");
throw new EbookException(ExceptionType.WebScrapingFailed, "failed to get page");

if (sectionWithChapterTitle.title != null)
{
Expand Down Expand Up @@ -162,20 +163,20 @@ private static async Task<SectionWithChapterTitle> ReadPageAsync(string url, boo
}

if (sectionTitleElement == null)
throw new EpubDocumentException("Can not find title of page");
throw new EbookException(ExceptionType.WebScrapingFailed, "Can not find title of page");

var sectionTitle = sectionTitleElement.InnerHtml;

var section = new Section(sectionTitleElement.InnerHtml);


var main_text = doc.QuerySelector("#novel_honbun")
?? throw new EpubDocumentException("There is no honbun.");
?? throw new EbookException(ExceptionType.WebScrapingFailed, "There is no honbun.");

foreach (var item in main_text.Children)
{
if (item is not IHtmlParagraphElement)
throw new EpubDocumentException("Unexpected structure");
throw new EbookException(ExceptionType.WebScrapingFailed, "Unexpected structure");

if (item.ChildElementCount == 0)
{
Expand All @@ -192,12 +193,12 @@ private static async Task<SectionWithChapterTitle> ReadPageAsync(string url, boo
if (item.Children[0] is IHtmlAnchorElement aElement)
{
if (aElement.ChildElementCount != 1)
throw new EpubDocumentException("Unexpected structure");
throw new EbookException(ExceptionType.WebScrapingFailed, "Unexpected structure");

if (aElement.Children[0] is IHtmlImageElement img)
{
if (img.Source == null)
throw new EpubDocumentException("Unexpected structure");
throw new EbookException(ExceptionType.WebScrapingFailed, "Unexpected structure");

// 画像のダウンロード
var loader = context.GetService<IDocumentLoader>();
Expand Down Expand Up @@ -225,7 +226,7 @@ private static async Task<SectionWithChapterTitle> ReadPageAsync(string url, boo
}
}
else if (item.Children[0] is not IHtmlBreakRowElement)
throw new EpubDocumentException("Unexpected structure");
throw new EbookException(ExceptionType.WebScrapingFailed, "Unexpected structure");
}
else
{
Expand All @@ -240,7 +241,7 @@ private static async Task<SectionWithChapterTitle> ReadPageAsync(string url, boo
}

if (!isAllRuby)
throw new EpubDocumentException("Unexpected structure");
throw new EbookException(ExceptionType.WebScrapingFailed, "Unexpected structure");

if (!string.IsNullOrWhiteSpace(item.InnerHtml))
{
Expand Down

0 comments on commit 29394aa

Please sign in to comment.