Skip to content

Commit

Permalink
Merge branch 'main' into herring/claudia
Browse files Browse the repository at this point in the history
  • Loading branch information
aiueo-1234 committed Mar 22, 2024
2 parents 2b65adc + f84e751 commit 6124b4f
Show file tree
Hide file tree
Showing 9 changed files with 232 additions and 38 deletions.
51 changes: 50 additions & 1 deletion Epub/KoeBook.Epub.sln
Original file line number Diff line number Diff line change
@@ -1,25 +1,74 @@
Microsoft Visual Studio Solution File, Format Version 12.00

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.10.34607.79
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "KoeBook.Epub", "KoeBook.Epub\KoeBook.Epub.csproj", "{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "EpubConsoleApp", "EpubConsoleApp\EpubConsoleApp.csproj", "{DE1F3CE9-4F3B-4428-9293-D18446F333D8}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "KoeBook.Core", "..\KoeBook.Core\KoeBook.Core.csproj", "{9F11B1EF-8330-400E-A60C-CFC98E827AA7}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Debug|arm64 = Debug|arm64
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|Any CPU = Release|Any CPU
Release|arm64 = Release|arm64
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}.Debug|arm64.ActiveCfg = Debug|Any CPU
{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}.Debug|arm64.Build.0 = Debug|Any CPU
{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}.Debug|x64.ActiveCfg = Debug|Any CPU
{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}.Debug|x64.Build.0 = Debug|Any CPU
{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}.Debug|x86.ActiveCfg = Debug|Any CPU
{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}.Debug|x86.Build.0 = Debug|Any CPU
{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}.Release|Any CPU.Build.0 = Release|Any CPU
{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}.Release|arm64.ActiveCfg = Release|Any CPU
{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}.Release|arm64.Build.0 = Release|Any CPU
{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}.Release|x64.ActiveCfg = Release|Any CPU
{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}.Release|x64.Build.0 = Release|Any CPU
{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}.Release|x86.ActiveCfg = Release|Any CPU
{E8E224F2-D1E9-437E-8222-FF2F33DB3FCD}.Release|x86.Build.0 = Release|Any CPU
{DE1F3CE9-4F3B-4428-9293-D18446F333D8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{DE1F3CE9-4F3B-4428-9293-D18446F333D8}.Debug|Any CPU.Build.0 = Debug|Any CPU
{DE1F3CE9-4F3B-4428-9293-D18446F333D8}.Debug|arm64.ActiveCfg = Debug|Any CPU
{DE1F3CE9-4F3B-4428-9293-D18446F333D8}.Debug|arm64.Build.0 = Debug|Any CPU
{DE1F3CE9-4F3B-4428-9293-D18446F333D8}.Debug|x64.ActiveCfg = Debug|Any CPU
{DE1F3CE9-4F3B-4428-9293-D18446F333D8}.Debug|x64.Build.0 = Debug|Any CPU
{DE1F3CE9-4F3B-4428-9293-D18446F333D8}.Debug|x86.ActiveCfg = Debug|Any CPU
{DE1F3CE9-4F3B-4428-9293-D18446F333D8}.Debug|x86.Build.0 = Debug|Any CPU
{DE1F3CE9-4F3B-4428-9293-D18446F333D8}.Release|Any CPU.ActiveCfg = Release|Any CPU
{DE1F3CE9-4F3B-4428-9293-D18446F333D8}.Release|Any CPU.Build.0 = Release|Any CPU
{DE1F3CE9-4F3B-4428-9293-D18446F333D8}.Release|arm64.ActiveCfg = Release|Any CPU
{DE1F3CE9-4F3B-4428-9293-D18446F333D8}.Release|arm64.Build.0 = Release|Any CPU
{DE1F3CE9-4F3B-4428-9293-D18446F333D8}.Release|x64.ActiveCfg = Release|Any CPU
{DE1F3CE9-4F3B-4428-9293-D18446F333D8}.Release|x64.Build.0 = Release|Any CPU
{DE1F3CE9-4F3B-4428-9293-D18446F333D8}.Release|x86.ActiveCfg = Release|Any CPU
{DE1F3CE9-4F3B-4428-9293-D18446F333D8}.Release|x86.Build.0 = Release|Any CPU
{9F11B1EF-8330-400E-A60C-CFC98E827AA7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{9F11B1EF-8330-400E-A60C-CFC98E827AA7}.Debug|Any CPU.Build.0 = Debug|Any CPU
{9F11B1EF-8330-400E-A60C-CFC98E827AA7}.Debug|arm64.ActiveCfg = Debug|arm64
{9F11B1EF-8330-400E-A60C-CFC98E827AA7}.Debug|arm64.Build.0 = Debug|arm64
{9F11B1EF-8330-400E-A60C-CFC98E827AA7}.Debug|x64.ActiveCfg = Debug|x64
{9F11B1EF-8330-400E-A60C-CFC98E827AA7}.Debug|x64.Build.0 = Debug|x64
{9F11B1EF-8330-400E-A60C-CFC98E827AA7}.Debug|x86.ActiveCfg = Debug|x86
{9F11B1EF-8330-400E-A60C-CFC98E827AA7}.Debug|x86.Build.0 = Debug|x86
{9F11B1EF-8330-400E-A60C-CFC98E827AA7}.Release|Any CPU.ActiveCfg = Release|Any CPU
{9F11B1EF-8330-400E-A60C-CFC98E827AA7}.Release|Any CPU.Build.0 = Release|Any CPU
{9F11B1EF-8330-400E-A60C-CFC98E827AA7}.Release|arm64.ActiveCfg = Release|arm64
{9F11B1EF-8330-400E-A60C-CFC98E827AA7}.Release|arm64.Build.0 = Release|arm64
{9F11B1EF-8330-400E-A60C-CFC98E827AA7}.Release|x64.ActiveCfg = Release|x64
{9F11B1EF-8330-400E-A60C-CFC98E827AA7}.Release|x64.Build.0 = Release|x64
{9F11B1EF-8330-400E-A60C-CFC98E827AA7}.Release|x86.ActiveCfg = Release|x86
{9F11B1EF-8330-400E-A60C-CFC98E827AA7}.Release|x86.Build.0 = Release|x86
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
18 changes: 18 additions & 0 deletions Epub/KoeBook.Epub/Contracts/Services/IScrapingClientService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
using System.Net.Http.Headers;

namespace KoeBook.Epub.Contracts.Services;

public interface IScrapingClientService
{
/// <summary>
/// スクレイピングでGETする用
/// APIを叩く際は不要
/// </summary>
Task<string> GetAsStringAsync(string url, CancellationToken ct);

/// <summary>
/// スクレイピングでGETする用
/// APIを叩く際は不要
/// </summary>
Task<ContentDispositionHeaderValue?> GetAsStreamAsync(string url, Stream destination, CancellationToken ct);
}
13 changes: 0 additions & 13 deletions Epub/KoeBook.Epub/EpubDocumentException.cs

This file was deleted.

6 changes: 3 additions & 3 deletions Epub/KoeBook.Epub/Services/EpubGenerateService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ namespace KoeBook.Epub.Services;

public class EpubGenerateService(ISoundGenerationService soundGenerationService, IEpubDocumentStoreService epubDocumentStoreService, IEpubCreateService epubCreateService) : IEpubGenerateService
{
private ISoundGenerationService _soundGenerationService = soundGenerationService;
private IEpubDocumentStoreService _documentStoreService = epubDocumentStoreService;
private IEpubCreateService _createService = epubCreateService;
private readonly ISoundGenerationService _soundGenerationService = soundGenerationService;
private readonly IEpubDocumentStoreService _documentStoreService = epubDocumentStoreService;
private readonly IEpubCreateService _createService = epubCreateService;

public async ValueTask<string> GenerateEpubAsync(BookScripts bookScripts, string tempDirectory, CancellationToken cancellationToken)
{
Expand Down
14 changes: 9 additions & 5 deletions Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using AngleSharp.Io;
using KoeBook.Core;
using KoeBook.Epub.Contracts.Services;
using KoeBook.Epub.Models;
using Microsoft.Extensions.DependencyInjection;
using static KoeBook.Epub.Utility.ScrapingHelper;


namespace KoeBook.Epub.Services
{
public partial class ScrapingAozoraService : IScrapingService
public partial class ScrapingAozoraService([FromKeyedServices(nameof(ScrapingAozoraService))] IScrapingClientService scrapingClientService) : IScrapingService
{
private readonly IScrapingClientService _scrapingClientService = scrapingClientService;

public bool IsMatchSite(Uri uri)
{
return uri.Host == "www.aozora.gr.jp";
Expand All @@ -29,11 +33,11 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP

// title の取得
var bookTitle = doc.QuerySelector(".title")
?? throw new EpubDocumentException($"Failed to get title properly.\nYou may be able to get proper URL at {GetCardUrl(url)}");
?? throw new EbookException(ExceptionType.WebScrapingFailed, $"タイトルの取得に失敗しました。\n以下のリンクから正しい小説のリンクを取得してください。\n{GetCardUrl(url)}");

// auther の取得
var bookAuther = doc.QuerySelector(".author")
?? throw new EpubDocumentException($"Failed to get auther properly.\nYou may be able to get proper URL at {GetCardUrl(url)}");
?? throw new EbookException(ExceptionType.WebScrapingFailed, $"著者の取得に失敗しました。\n以下のリンクから正しい小説のリンクを取得してください。\n{GetCardUrl(url)}");

// EpubDocument の生成
var document = new EpubDocument(TextReplace(bookTitle.InnerHtml), TextReplace(bookAuther.InnerHtml), coverFilePath, id)
Expand Down Expand Up @@ -111,10 +115,10 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
if (midashi != null)
{
if (midashi.Id == null)
throw new EpubDocumentException("Unecpected structure of HTML File: div tag with class=\"midashi_anchor\", but id=\"midashi___\" exist");
throw new EbookException(ExceptionType.WebScrapingFailed, "予期しないHTMLの構造です。\nclass=\"midashi_anchor\"ではなくid=\"midashi___\"が存在します。");

if (!int.TryParse(midashi.Id.Replace("midashi", ""), out var midashiId))
throw new EpubDocumentException($"Unexpected id of Anchor tag was found: id = {midashi.Id}");
throw new EbookException(ExceptionType.WebScrapingFailed, $"予期しないアンカータグが見つかりました。id = {midashi.Id}");

if (contentsIds.Contains(midashiId))
{
Expand Down
116 changes: 116 additions & 0 deletions Epub/KoeBook.Epub/Services/ScrapingClientService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
using System.Net.Http.Headers;
using KoeBook.Epub.Contracts.Services;

namespace KoeBook.Epub.Services;

public sealed class ScrapingClientService : IScrapingClientService, IDisposable
{
private readonly IHttpClientFactory _httpClientFactory;
private readonly PeriodicTimer _periodicTimer;
private readonly Queue<Func<HttpClient, Task>> _actionQueue = [];
private bool _workerActivated;

public ScrapingClientService(IHttpClientFactory httpClientFactory, TimeProvider timeProvider)
{
_httpClientFactory = httpClientFactory;
_periodicTimer = new(TimeSpan.FromSeconds(10), timeProvider);
}

public Task<string> GetAsStringAsync(string url, CancellationToken ct)
{
var taskCompletion = new TaskCompletionSource<string>();

lock (_actionQueue)
_actionQueue.Enqueue(async httpClient =>
{
if (ct.IsCancellationRequested)
taskCompletion.SetCanceled(ct);
try
{
var response = await httpClient.GetAsync(url, ct).ConfigureAwait(false);
taskCompletion.SetResult(await response.Content.ReadAsStringAsync(ct).ConfigureAwait(false));
}
catch (Exception ex)
{
taskCompletion.SetException(ex);
}
});

EnsureWorkerActivated();

return taskCompletion.Task;
}

public Task<ContentDispositionHeaderValue?> GetAsStreamAsync(string url, Stream destination, CancellationToken ct)
{
var taskCompletion = new TaskCompletionSource<ContentDispositionHeaderValue?>();

lock (_actionQueue)
_actionQueue.Enqueue(async httpClient =>
{
if (ct.IsCancellationRequested)
taskCompletion.SetCanceled(ct);
try
{
var response = await httpClient.GetAsync(url, ct).ConfigureAwait(false);
await response.Content.CopyToAsync(destination, ct).ConfigureAwait(false);
taskCompletion.SetResult(response.Content.Headers.ContentDisposition);
}
catch (Exception ex)
{
taskCompletion.SetException(ex);
}
});

EnsureWorkerActivated();

return taskCompletion.Task;
}

/// <summary>
/// <see cref="Worker"/>が起動していない場合は起動します
/// </summary>
private void EnsureWorkerActivated()
{
bool activateWorker;
lock (_actionQueue) activateWorker = !_workerActivated;

if (activateWorker)
Worker();
}

/// <summary>
/// <see cref="_actionQueue"/>のConsumer
/// 別スレッドでループさせるためにvoid
/// </summary>
private async void Worker()
{
lock (_actionQueue)
_workerActivated = true;

try
{
while (await _periodicTimer.WaitForNextTickAsync().ConfigureAwait(false) && _actionQueue.Count > 0)
{
Func<HttpClient, Task>? action;
lock (_actionQueue)
if (!_actionQueue.TryDequeue(out action))
continue;

await action(_httpClientFactory.CreateClient()).ConfigureAwait(ConfigureAwaitOptions.SuppressThrowing);
}
}
finally
{
lock (_actionQueue)
_workerActivated = false;
}
}

public void Dispose()
{
_periodicTimer.Dispose();
}
}
Loading

0 comments on commit 6124b4f

Please sign in to comment.