Skip to content

Commit

Permalink
Merge branch 'main' into feat/#8
Browse files Browse the repository at this point in the history
  • Loading branch information
aiueo-1234 committed Mar 10, 2024
2 parents 26f5f82 + 8ad8c91 commit aaf47f2
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 22 deletions.
43 changes: 27 additions & 16 deletions Epub/KoeBook.Epub/Services/ScrapingNaroService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,23 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
var doc = await context.OpenAsync(url, ct).ConfigureAwait(false);

// title の取得
var bookTitle = doc.QuerySelector(".novel_title")
var bookTitleElement = doc.QuerySelector(".novel_title")
?? throw new EpubDocumentException($"Failed to get title properly.\nUrl may be not collect");
var bookTitle = bookTitleElement.InnerHtml;

// auther の取得
var bookAuther = doc.QuerySelector(".novel_writername a")
var bookAutherElement = doc.QuerySelector(".novel_writername")
?? throw new EpubDocumentException($"Failed to get auther properly.\nUrl may be not collect");
var bookAuther = string.Empty;
if (bookAutherElement.QuerySelector("a") is IHtmlAnchorElement bookAutherAnchorElement)
{
bookAuther = bookAutherAnchorElement.InnerHtml;
}
else
{
bookAuther = bookAutherElement.InnerHtml.Replace("作者:", "");
}

bool isRensai = true;
int allNum = 0;

Expand Down Expand Up @@ -65,14 +76,13 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
throw new EpubDocumentException("faild to get data by Narou API");
}

var document = new EpubDocument(bookTitle.InnerHtml, bookAuther.InnerHtml, coverFilePath, id);
if (isRensai)
var document = new EpubDocument(bookTitle, bookAuther, coverFilePath, id);
if (isRensai) // 連載の時
{
List<SectionWithChapterTitle> SectionWithChapterTitleList = new List<SectionWithChapterTitle>();
for (int i = 1; i <= allNum; i++)
{
Console.WriteLine(i);
await Task.Delay(500, ct);
await Task.Delay(1500, ct);
var pageUrl = Path.Combine(url, i.ToString());
var load = await ReadPageAsync(pageUrl, isRensai, imageDirectory, ct).ConfigureAwait(false);
SectionWithChapterTitleList.Add(load);
Expand Down Expand Up @@ -106,7 +116,7 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
}
}
}
else
else // 短編の時
{
var load = await ReadPageAsync(url, isRensai, imageDirectory, ct).ConfigureAwait(false);
if (load != null)
Expand All @@ -122,19 +132,22 @@ public record BookInfo(int? allcount, int? noveltype, int? general_all_no);

private record SectionWithChapterTitle(string? title, Section section);

private async Task<SectionWithChapterTitle> ReadPageAsync(string url, bool isRensai, string imageDirectory, CancellationToken ct)
private static async Task<SectionWithChapterTitle> ReadPageAsync(string url, bool isRensai, string imageDirectory, CancellationToken ct)
{
var config = Configuration.Default.WithDefaultLoader();
using var context = BrowsingContext.New(config);
var doc = await context.OpenAsync(url, ct).ConfigureAwait(false);

var chapterTitleElement = doc.QuerySelector(".chapter_title");
string? chapterTitle = null;
if (chapterTitleElement != null)
if (!isRensai)
{
if (chapterTitleElement.InnerHtml != null)
var chapterTitleElement = doc.QuerySelector(".chapter_title");
if (chapterTitleElement != null)
{
chapterTitle = chapterTitleElement.InnerHtml;
if (chapterTitleElement.InnerHtml != null)
{
chapterTitle = chapterTitleElement.InnerHtml;
}
}
}

Expand All @@ -148,11 +161,10 @@ private async Task<SectionWithChapterTitle> ReadPageAsync(string url, bool isRen
sectionTitleElement = doc.QuerySelector(".novel_title");
}

string sectionTitle = "";
if (sectionTitleElement == null)
throw new EpubDocumentException("Can not find title of page");

sectionTitle = sectionTitleElement.InnerHtml;
var sectionTitle = sectionTitleElement.InnerHtml;

var section = new Section(sectionTitleElement.InnerHtml);

Expand Down Expand Up @@ -223,6 +235,7 @@ private async Task<SectionWithChapterTitle> ReadPageAsync(string url, bool isRen
if (tags.TagName != "RUBY")
{
isAllRuby = false;
break;
}
}

Expand All @@ -241,8 +254,6 @@ private async Task<SectionWithChapterTitle> ReadPageAsync(string url, bool isRen
return new SectionWithChapterTitle(chapterTitle, section);
}



[System.Text.RegularExpressions.GeneratedRegex(@"https://.{5,7}.syosetu.com/(.{7}).?")]
private static partial System.Text.RegularExpressions.Regex UrlToNcode();

Expand Down
10 changes: 5 additions & 5 deletions Epub/KoeBook.Epub/Utility/ScrapingHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@ public static class ScrapingHelper
{
public static List<string> SplitBrace(string text)
{
if (text.Length == 1 && (text == "" || text == ""))
if (text.Length == 1 && (text == "" || text == "" || text == "" || text == ""))
return [text];

var bracket = 0;
var brackets = new int[text.Length];
for (var i = 0; i < text.Length; i++)
{
var c = text[i];
if (c == '「') bracket++;
else if (c == '」') bracket--;
if (c == '「' || c == '『') bracket++;
else if (c == '」' || c == '』') bracket--;
brackets[i] = bracket;
}

Expand All @@ -23,12 +23,12 @@ public static List<string> SplitBrace(string text)
for (var i = 0; i < brackets.Length; i++)
{
brackets[i] -= mn;
if (text[i] == '「' && brackets[i] == 1 && i != 0 && startIdx != i)
if ((text[i] == '「' || text[i] == '『') && brackets[i] == 1 && i != 0 && startIdx != i)
{
result.Add(text[startIdx..i]);
startIdx = i;
}
if (text[i] == '」' && brackets[i] == 0)
if ((text[i] == '」' || text[i] == '』') && brackets[i] == 0)
{
result.Add(text[startIdx..(i + 1)]);
startIdx = i + 1;
Expand Down
47 changes: 46 additions & 1 deletion KoeBook.Test/Epub/ScrapingHelperTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ public class ScrapingHelperTest
public static object[][] TestCases()
{
(string, List<string>)[] cases = [
// '「''」'のみの場合のケース
("", [""]),
("", [""]),
("a", ["a"]),
Expand All @@ -29,7 +30,51 @@ public static object[][] TestCases()
("abc「abc「abc」abc", ["abc", "「abc「abc」abc"]),
("abc「abc」abc」abc", ["abc「abc」abc」", "abc"]),
("abc「abc「abc", ["abc", "「abc「abc"]),
("abc」abc」abc", ["abc」abc」", "abc"])
("abc」abc」abc", ["abc」abc」", "abc"]),
// '『''』'のみの場合のケース
("", [""]),
("", [""]),
("a", ["a"]),
("abc『abc』abc", ["abc", "『abc』", "abc"]),
("abc『abc』", ["abc", "『abc』"]),
("『abc』abc", ["『abc』", "abc",]),
("abc『abc』", ["abc", "『abc』"]),
("『abc』", ["『abc』",]),
("abc『abc", ["abc", "『abc"]),
("abc『", ["abc", ""]),
("『abc", ["『abc"]),
("abc』abc", ["abc』", "abc"]),
("abc』", ["abc』"]),
("』abc", ["", "abc"]),
("abc『abc』abc『abc』abc", ["abc", "『abc』", "abc", "『abc』", "abc"]),
("『abc』abc『abc』abc", ["『abc』", "abc", "『abc』", "abc"]),
("abc『abc』『abc』abc", ["abc", "『abc』", "『abc』", "abc"]),
("abc『abc』abc『abc』", ["abc", "『abc』", "abc", "『abc』"]),
("abc『abc『abc』abc』abc", ["abc", "『abc『abc』abc』", "abc"]),
("abc『abc『abc』abc", ["abc", "『abc『abc』abc"]),
("abc『abc』abc』abc", ["abc『abc』abc』", "abc"]),
("abc『abc『abc", ["abc", "『abc『abc"]),
("abc』abc』abc", ["abc』abc』", "abc"]),
// '「''」''『''』'が混在するパターン
("abc「abc」abc『abc』abc", ["abc", "「abc」", "abc", "『abc』", "abc"]),
("abc『abc』abc「abc」abc", ["abc", "『abc』", "abc", "「abc」", "abc"]),
("「abc」abc『abc』abc", ["「abc」", "abc", "『abc』", "abc"]),
("『abc』abc「abc」abc", ["『abc』", "abc", "「abc」", "abc"]),
("abc「abc」『abc』abc", ["abc", "「abc」", "『abc』", "abc"]),
("abc『abc』「abc」abc", ["abc", "『abc』", "「abc」", "abc"]),
("abc「abc」abc『abc』", ["abc", "「abc」", "abc", "『abc』"]),
("abc『abc』abc「abc」", ["abc", "『abc』", "abc", "「abc」"]),
("abc「abc『abc』abc」abc", ["abc", "「abc『abc』abc」", "abc"]),
("abc『abc「abc」abc』abc", ["abc", "『abc「abc」abc』", "abc"]),
("abc「abc『abc』abc", ["abc", "「abc『abc』abc"]),
("abc『abc「abc」abc", ["abc", "『abc「abc」abc"]),
("abc「abc」abc』abc", ["abc「abc」abc』", "abc"]),
("abc『abc』abc」abc", ["abc『abc』abc」", "abc"]),
("abc「abc『abc", ["abc", "「abc『abc"]),
("abc『abc「abc", ["abc", "『abc「abc"]),
("abc」abc』abc", ["abc」abc』", "abc"]),
("abc』abc」abc", ["abc』abc」", "abc"]),
("abc』abc』abc", ["abc』abc』", "abc"])
];
return cases.Select(c => new object[] { c.Item1, c.Item2 }).ToArray();
}
Expand Down

0 comments on commit aaf47f2

Please sign in to comment.