Skip to content

Commit

Permalink
Merge pull request #20 from OUCC/feat/#19
Browse files Browse the repository at this point in the history
#19 スクレイピング用クライアントを作成
  • Loading branch information
miyaji255 authored Mar 16, 2024
2 parents 059eaf5 + e93ac8e commit f84e751
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 3 deletions.
18 changes: 18 additions & 0 deletions Epub/KoeBook.Epub/Contracts/Services/IScrapingClientService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
using System.Net.Http.Headers;

namespace KoeBook.Epub.Contracts.Services;

public interface IScrapingClientService
{
/// <summary>
/// スクレイピングでGETする用
/// APIを叩く際は不要
/// </summary>
Task<string> GetAsStringAsync(string url, CancellationToken ct);

/// <summary>
/// スクレイピングでGETする用
/// APIを叩く際は不要
/// </summary>
Task<ContentDispositionHeaderValue?> GetAsStreamAsync(string url, Stream destination, CancellationToken ct);
}
5 changes: 4 additions & 1 deletion Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@
using KoeBook.Core;
using KoeBook.Epub.Contracts.Services;
using KoeBook.Epub.Models;
using Microsoft.Extensions.DependencyInjection;
using static KoeBook.Epub.Utility.ScrapingHelper;


namespace KoeBook.Epub.Services
{
public partial class ScrapingAozoraService : IScrapingService
public partial class ScrapingAozoraService([FromKeyedServices(nameof(ScrapingAozoraService))] IScrapingClientService scrapingClientService) : IScrapingService
{
private readonly IScrapingClientService _scrapingClientService = scrapingClientService;

public bool IsMatchSite(Uri uri)
{
return uri.Host == "www.aozora.gr.jp";
Expand Down
116 changes: 116 additions & 0 deletions Epub/KoeBook.Epub/Services/ScrapingClientService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
using System.Net.Http.Headers;
using KoeBook.Epub.Contracts.Services;

namespace KoeBook.Epub.Services;

public sealed class ScrapingClientService : IScrapingClientService, IDisposable
{
private readonly IHttpClientFactory _httpClientFactory;
private readonly PeriodicTimer _periodicTimer;
private readonly Queue<Func<HttpClient, Task>> _actionQueue = [];
private bool _workerActivated;

public ScrapingClientService(IHttpClientFactory httpClientFactory, TimeProvider timeProvider)
{
_httpClientFactory = httpClientFactory;
_periodicTimer = new(TimeSpan.FromSeconds(10), timeProvider);
}

public Task<string> GetAsStringAsync(string url, CancellationToken ct)
{
var taskCompletion = new TaskCompletionSource<string>();

lock (_actionQueue)
_actionQueue.Enqueue(async httpClient =>
{
if (ct.IsCancellationRequested)
taskCompletion.SetCanceled(ct);
try
{
var response = await httpClient.GetAsync(url, ct).ConfigureAwait(false);
taskCompletion.SetResult(await response.Content.ReadAsStringAsync(ct).ConfigureAwait(false));
}
catch (Exception ex)
{
taskCompletion.SetException(ex);
}
});

EnsureWorkerActivated();

return taskCompletion.Task;
}

public Task<ContentDispositionHeaderValue?> GetAsStreamAsync(string url, Stream destination, CancellationToken ct)
{
var taskCompletion = new TaskCompletionSource<ContentDispositionHeaderValue?>();

lock (_actionQueue)
_actionQueue.Enqueue(async httpClient =>
{
if (ct.IsCancellationRequested)
taskCompletion.SetCanceled(ct);
try
{
var response = await httpClient.GetAsync(url, ct).ConfigureAwait(false);
await response.Content.CopyToAsync(destination, ct).ConfigureAwait(false);
taskCompletion.SetResult(response.Content.Headers.ContentDisposition);
}
catch (Exception ex)
{
taskCompletion.SetException(ex);
}
});

EnsureWorkerActivated();

return taskCompletion.Task;
}

/// <summary>
/// <see cref="Worker"/>が起動していない場合は起動します
/// </summary>
private void EnsureWorkerActivated()
{
bool activateWorker;
lock (_actionQueue) activateWorker = !_workerActivated;

if (activateWorker)
Worker();
}

/// <summary>
/// <see cref="_actionQueue"/>のConsumer
/// 別スレッドでループさせるためにvoid
/// </summary>
private async void Worker()
{
lock (_actionQueue)
_workerActivated = true;

try
{
while (await _periodicTimer.WaitForNextTickAsync().ConfigureAwait(false) && _actionQueue.Count > 0)
{
Func<HttpClient, Task>? action;
lock (_actionQueue)
if (!_actionQueue.TryDequeue(out action))
continue;

await action(_httpClientFactory.CreateClient()).ConfigureAwait(ConfigureAwaitOptions.SuppressThrowing);
}
}
finally
{
lock (_actionQueue)
_workerActivated = false;
}
}

public void Dispose()
{
_periodicTimer.Dispose();
}
}
4 changes: 3 additions & 1 deletion Epub/KoeBook.Epub/Services/ScrapingNaroService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
using KoeBook.Core;
using KoeBook.Epub.Contracts.Services;
using KoeBook.Epub.Models;
using Microsoft.Extensions.DependencyInjection;
using static KoeBook.Epub.Utility.ScrapingHelper;

namespace KoeBook.Epub.Services
{
public partial class ScrapingNaroService(IHttpClientFactory httpClientFactory) : IScrapingService
public partial class ScrapingNaroService(IHttpClientFactory httpClientFactory, [FromKeyedServices(nameof(ScrapingNaroService))] IScrapingClientService scrapingClientService) : IScrapingService
{
private readonly IHttpClientFactory _httpCliantFactory = httpClientFactory;
private readonly IScrapingClientService _scrapingClientService = scrapingClientService;

public bool IsMatchSite(Uri uri)
{
Expand Down
9 changes: 8 additions & 1 deletion KoeBook/App.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ public App()
.UseContentRoot(AppContext.BaseDirectory)
.ConfigureServices((context, services) =>
{
// System
services.AddSingleton(TimeProvider.System);
// Default Activation Handler
services.AddTransient<ActivationHandler<LaunchActivatedEventArgs>, DefaultActivationHandler>();
Expand Down Expand Up @@ -99,7 +102,11 @@ public App()
services.AddSingleton<ILlmAnalyzerService, ChatGptAnalyzerService>();
services.AddSingleton<OpenAI.Interfaces.IOpenAIService, MyOpenAiService>();
services.AddSingleton<IScraperSelectorService, ScraperSelectorService>()
// Epub Services
services
.AddKeyedSingleton<IScrapingClientService, ScrapingClientService>(nameof(ScrapingAozoraService))
.AddKeyedSingleton<IScrapingClientService, ScrapingClientService>(nameof(ScrapingNaroService))
.AddSingleton<IScraperSelectorService, ScraperSelectorService>()
.AddSingleton<IScrapingService, ScrapingAozoraService>()
.AddSingleton<IScrapingService, ScrapingNaroService>();
services.AddSingleton<IEpubCreateService, EpubCreateService>();
Expand Down

0 comments on commit f84e751

Please sign in to comment.