Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#41 S3へのアップロードを実装 #48

Merged
merged 21 commits into from
May 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
59a2420
#41 S3へのアップロードを実装
miyaji255 May 2, 2024
8b2008b
#49 style bert vit2周りの細かい修正
aiueo-1234 May 2, 2024
2a6c2fd
#49 フォーマット
aiueo-1234 May 2, 2024
372cf8c
Merge pull request #50 from OUCC/feat/#49
aiueo-1234 May 2, 2024
53ca423
#41 State更新の条件を追加
miyaji255 May 2, 2024
cbbbb66
#51 音声合成にかける文字数を制限する&wavからmp3へ変換
TakenPt May 2, 2024
acccb50
#51 生成ファイルの再利用
TakenPt May 2, 2024
0b333f2
#51 カバーファイルのパス代入忘れの修正
aiueo-1234 May 2, 2024
a984402
#51 coverfilepathの削除
aiueo-1234 May 3, 2024
cab481f
#41 生成後のファイルを出力するように変更
miyaji255 May 3, 2024
8a6b5d3
#51 ストリーム周りの修正
aiueo-1234 May 3, 2024
5c96b1e
#51 テストの修正
aiueo-1234 May 3, 2024
dd4a5d2
Merge pull request #52 from OUCC/feat/#51
miyaji255 May 3, 2024
2c95e98
CIの対象にreleaseブランチを追加
miyaji255 May 3, 2024
274fc3d
Merge pull request #53 from OUCC/feat/add-ci-target
miyaji255 May 3, 2024
3377342
Merge branch 'main' into feat/#41
miyaji255 May 3, 2024
8c993e4
fmt
miyaji255 May 3, 2024
749863e
#54 mp3とwavでtotaltimeが変わることへの対処
aiueo-1234 May 3, 2024
04f04ec
#54 Claudeが生成するプロンプトへの対処を追加
aiueo-1234 May 3, 2024
fb35843
Merge pull request #55 from OUCC/feat/#54
aiueo-1234 May 3, 2024
d9130ba
Merge branch 'main' into feat/#41
miyaji255 May 3, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: CI

on:
push:
branches: [main]
branches: [main, release/*]
pull_request:
branches: [main]
branches: [main, release/*]

permissions:
contents: read
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ public interface IScraperSelectorService
/// </summary>
public bool IsMatchSites(string url);

public ValueTask<EpubDocument> ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct);
public ValueTask<EpubDocument> ScrapingAsync(string url, string tempDirectory, Guid id, CancellationToken ct);
}
2 changes: 1 addition & 1 deletion Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ public interface IScrapingService
{
public bool IsMatchSite(Uri url);

public ValueTask<EpubDocument> ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct);
public ValueTask<EpubDocument> ScrapingAsync(string url, string tempDirectory, Guid id, CancellationToken ct);
}
2 changes: 1 addition & 1 deletion Epub/KoeBook.Epub/Models/EpubDocument.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

namespace KoeBook.Epub.Models;

public class EpubDocument(string title, string author, string coverFilePath, Guid id)
public class EpubDocument(string title, string author, Guid id, string coverFilePath = "")
{
public string Title { get; set; } = title;
public string Author { get; set; } = author;
Expand Down
2 changes: 1 addition & 1 deletion Epub/KoeBook.Epub/Services/AiStoryAnalyzerService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public partial class AiStoryAnalyzerService(ISplitBraceService splitBraceService

public EpubDocument CreateEpubDocument(AiStory aiStory, Guid id)
{
return new EpubDocument(aiStory.Title, "AI", "", id)
return new EpubDocument(aiStory.Title, "AI", id)
{
Chapters = [new Chapter()
{
Expand Down
3 changes: 2 additions & 1 deletion Epub/KoeBook.Epub/Services/AnalyzerService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public async ValueTask<BookScripts> AnalyzeAsync(BookProperties bookProperties,
switch (bookProperties)
{
case { SourceType: SourceType.Url or SourceType.FilePath, Source: string uri }:
document = await _scrapingService.ScrapingAsync(uri, coverFilePath, tempDirectory, bookProperties.Id, cancellationToken);
document = await _scrapingService.ScrapingAsync(uri, tempDirectory, bookProperties.Id, cancellationToken);
break;
case { SourceType: SourceType.AiStory, Source: AiStory aiStory }:
document = _aiStoryAnalyzerService.CreateEpubDocument(aiStory, bookProperties.Id);
Expand All @@ -45,6 +45,7 @@ public async ValueTask<BookScripts> AnalyzeAsync(BookProperties bookProperties,
}

_createCoverFileService.Create(document.Title, document.Author, coverFilePath);
document.CoverFilePath = coverFilePath;
}
catch (EbookException) { throw; }
catch (Exception ex)
Expand Down
12 changes: 10 additions & 2 deletions Epub/KoeBook.Epub/Services/EpubGenerateService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using KoeBook.Core.Models;
using KoeBook.Epub.Contracts.Services;
using KoeBook.Epub.Models;
using NAudio.Wave;

namespace KoeBook.Epub.Services;

Expand All @@ -18,9 +19,16 @@ public async ValueTask<string> GenerateEpubAsync(BookScripts bookScripts, string

var document = _documentStoreService.Documents.Single(d => d.Id == bookScripts.BookProperties.Id);

foreach (var scriptLine in bookScripts.ScriptLines)
for (var i = 0; i < bookScripts.ScriptLines.Length; i++)
{
scriptLine.Audio = new Audio(await _soundGenerationService.GenerateLineSoundAsync(scriptLine, bookScripts.Options, cancellationToken).ConfigureAwait(false));
var scriptLine = bookScripts.ScriptLines[i];
var wavData = await _soundGenerationService.GenerateLineSoundAsync(scriptLine, bookScripts.Options, cancellationToken).ConfigureAwait(false);
using var ms = new MemoryStream(wavData);
using var reader = new WaveFileReader(ms);
var tmpMp3Path = Path.Combine(tempDirectory, $"{document.Title}{i}.mp3");
MediaFoundationEncoder.EncodeToMp3(reader, tmpMp3Path);
using var mp3Stream = new Mp3FileReader(tmpMp3Path);
scriptLine.Audio = new Audio(mp3Stream.TotalTime, tmpMp3Path);
}

if (await _createService.TryCreateEpubAsync(document, tempDirectory, cancellationToken).ConfigureAwait(false))
Expand Down
4 changes: 2 additions & 2 deletions Epub/KoeBook.Epub/Services/ScraperSelectorService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ public bool IsMatchSites(string url)
}
}

public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct)
public async ValueTask<EpubDocument> ScrapingAsync(string url, string tempDirectory, Guid id, CancellationToken ct)
{
var uri = new Uri(url);

foreach (var service in _scrapingServices)
{
if (service.IsMatchSite(uri))
return await service.ScrapingAsync(url, coverFillePath, tempDirectory, id, ct);
return await service.ScrapingAsync(url, tempDirectory, id, ct);
}

throw new ArgumentException("対応するURLではありません");
Expand Down
4 changes: 2 additions & 2 deletions Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public bool IsMatchSite(Uri uri)
return uri.Host == "www.aozora.gr.jp";
}

public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct)
public async ValueTask<EpubDocument> ScrapingAsync(string url, string imageDirectory, Guid id, CancellationToken ct)
{
var config = Configuration.Default.WithDefaultLoader();
using var context = BrowsingContext.New(config);
Expand All @@ -37,7 +37,7 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
?? throw new EbookException(ExceptionType.WebScrapingFailed, $"著者の取得に失敗しました。\n以下のリンクから正しい小説のリンクを取得してください。\n{GetCardUrl(url)}");

// EpubDocument の生成
var document = new EpubDocument(TextReplace(bookTitle.InnerHtml), TextReplace(bookAuther.InnerHtml), coverFilePath, id);
var document = new EpubDocument(TextReplace(bookTitle.InnerHtml), TextReplace(bookAuther.InnerHtml), id);

var (contentsIds, hasChapter, hasSection) = LoadToc(doc, document);

Expand Down
4 changes: 2 additions & 2 deletions Epub/KoeBook.Epub/Services/ScrapingNaroService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public bool IsMatchSite(Uri uri)
return uri.Host == "ncode.syosetu.com";
}

public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct)
public async ValueTask<EpubDocument> ScrapingAsync(string url, string imageDirectory, Guid id, CancellationToken ct)
{
var ncode = GetNcode(url);
var novelInfo = await GetNovelInfoAsync(ncode, ct).ConfigureAwait(false);
Expand Down Expand Up @@ -53,7 +53,7 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
? bookAuthorTag.InnerHtml
: bookAuthorElement.InnerHtml.Replace("作者:", "");

var document = new EpubDocument(bookTitle, bookAuthor, coverFilePath, id);
var document = new EpubDocument(bookTitle, bookAuthor, id);
if (novelInfo.IsSerial) // 連載の時
{
async IAsyncEnumerable<(string? title, Section section)> LoadDetailsAsync(IBrowsingContext context, NovelInfo novelInfo, string imageDirectory, [EnumeratorCancellation] CancellationToken ct)
Expand Down
6 changes: 6 additions & 0 deletions KoeBook.Core/Contracts/Services/IS3UploadService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace KoeBook.Core.Contracts.Services;

public interface IS3UploadService
{
ValueTask<string> UploadFileAsync(string filePath, string title, CancellationToken cancellationToken);
}
3 changes: 3 additions & 0 deletions KoeBook.Core/EbookException.cs
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,7 @@ public enum ExceptionType

[EnumMember(Value = "表紙の画像の生成に失敗しました")]
CreateCoverFileFailed,

[EnumMember(Value = "ファイルのアップロードに失敗しました")]
S3UploadFailed,
}
2 changes: 2 additions & 0 deletions KoeBook.Core/KoeBook.Core.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="AWSSDK.Extensions.NETCore.Setup" Version="3.7.300" />
<PackageReference Include="AWSSDK.S3" Version="3.7.307.25" />
<PackageReference Include="Betalgo.OpenAI" Version="8.1.1" />
<PackageReference Include="Claudia" Version="1.2.0" />
<PackageReference Include="FastEnum" Version="1.8.0" />
Expand Down
24 changes: 7 additions & 17 deletions KoeBook.Core/Models/Audio.cs
Original file line number Diff line number Diff line change
@@ -1,25 +1,15 @@
using NAudio.Wave;
using System.IO;
using NAudio.Wave;

namespace KoeBook.Epub.Models;

public sealed class Audio
public sealed class Audio(TimeSpan totalTIme, string tempFilePath)
{
public TimeSpan TotalTime { get; }
private readonly byte[] _mp3Data;
public TimeSpan TotalTime { get; } = totalTIme;
public string TempFilePath { get; } = tempFilePath;

public Audio(byte[] mp3Data)
public FileStream GetStream()
{
_mp3Data = mp3Data;
using var ms = new MemoryStream();
ms.Write(_mp3Data.AsSpan());
ms.Flush();
ms.Position = 0;
using var reader = new Mp3FileReader(ms);
TotalTime = reader.TotalTime;
}

public MemoryStream GetStream()
{
return new MemoryStream(_mp3Data);
return new FileStream(TempFilePath, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, true);
}
}
7 changes: 6 additions & 1 deletion KoeBook.Core/Services/ClaudeAnalyzerService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ private Dictionary<string, string> ExtractCharacterVoiceMapping(string response,
.Select(l =>
{
var characterId = l[1..l.IndexOf('.')];
var voiceTypeSpan = l.AsSpan()[(l.IndexOf(':') + 2)..];
var voiceTypeSpan = l.AsSpan()[(l.IndexOf(':') + 2)..].Trim();
// ボイス割り当てが複数あたったときに先頭のものを使う(例:群衆 AdultMan, AdultWoman)
var separatorIndex = voiceTypeSpan.IndexOfAny(_searchValues);
if (separatorIndex > 0)
Expand Down Expand Up @@ -174,10 +174,15 @@ private static (Character[], Dictionary<string, string>) ExtractCharacterList(st
var voiceIdLine = zippedLine.First.AsSpan();
voiceIdLine = voiceIdLine[(voiceIdLine.IndexOf(' ') + 2)..];//cまで無視
voiceIdLine = voiceIdLine[..voiceIdLine.IndexOf(' ')];// 二人以上話す時には先頭のものを使う
if (voiceIdLine[^1] == '.')// idに"."がつくことがあるので削除する
{
voiceIdLine = voiceIdLine[..^1];
}
if (characterId2Name.TryGetValue(voiceIdLine.ToString(), out var characterName))
{
zippedLine.Second.Character = characterName;
}
else { throw new EbookException(ExceptionType.ClaudeTalkerAndStyleSettingFailed); }
return 0;
}).Count();
if (voiceIdLinesCount != scriptLines.Length)
Expand Down
28 changes: 28 additions & 0 deletions KoeBook.Core/Services/S3UploadService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
using Amazon.S3;
using Amazon.S3.Transfer;
using KoeBook.Core.Contracts.Services;

namespace KoeBook.Core.Services;

public class S3UploadService(IAmazonS3 s3Client) : IS3UploadService
{
private readonly IAmazonS3 _s3Client = s3Client;

public async ValueTask<string> UploadFileAsync(string filePath, string title, CancellationToken cancellationToken)
{
try
{
// 設定に移すのが面倒なので固定値
const string S3BucketName = "koebook-gakusai-storage";
var guid = Guid.NewGuid();
var fileTransferUtility = new TransferUtility(_s3Client);
await fileTransferUtility.UploadAsync(filePath, S3BucketName, $"{guid}/{title}.epub", cancellationToken);

return $"http://storage.koebook.oucc.org/{guid}/{Uri.EscapeDataString(title)}.epub";
}
catch (AmazonS3Exception e)
{
throw new EbookException(ExceptionType.S3UploadFailed, innerException: e);
}
}
}
2 changes: 1 addition & 1 deletion KoeBook.Core/Services/SoundGenerationSelectorService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public async ValueTask InitializeAsync(CancellationToken cancellationToken)
.GetFromJsonAsync<Dictionary<string, ModelInfo>>("/models/info", ExceptionType.InitializeFailed, cancellationToken)
.ConfigureAwait(false);

Models = models.Select(kvp => new SoundModel(kvp.Key, kvp.Value.FirstSpk, kvp.Value.Styles)).ToArray();
Models = models.Select(kvp => new SoundModel(kvp.Key, kvp.Value.FirstSpk.Replace(" ", ""), kvp.Value.Styles)).ToArray();
}
catch (EbookException e) when (e.ExceptionType == ExceptionType.UnknownStyleBertVitsRoot) { }
}
Expand Down
92 changes: 85 additions & 7 deletions KoeBook.Core/Services/SoundGenerationService.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
using System.Web;
using System.Buffers;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Web;
using KoeBook.Core.Contracts.Services;
using KoeBook.Core.Models;
using NAudio.Wave;

namespace KoeBook.Core.Services;

Expand All @@ -17,11 +21,85 @@ public async ValueTask<byte[]> GenerateLineSoundAsync(ScriptLine scriptLine, Boo
var soundModel = _soundGenerationSelectorService.Models.FirstOrDefault(m => m.Name == model)
?? throw new EbookException(ExceptionType.SoundGenerationFailed);
var style = soundModel.Styles.Contains(scriptLine.Style) ? scriptLine.Style : soundModel.Styles[0];
var queryCollection = HttpUtility.ParseQueryString(string.Empty);
queryCollection.Add("text", scriptLine.Text);
queryCollection.Add("model_id", soundModel.Id);
queryCollection.Add("style", scriptLine.Style);
return await _styleBertVitsClientService
.GetAsByteArrayAsync($"/voice/{queryCollection}", ExceptionType.SoundGenerationFailed, cancellationToken).ConfigureAwait(false);
using var msWriter = new MemoryStream();
WaveFileWriter? writer = null;
byte[] dataBuffer = ArrayPool<byte>.Shared.Rent(1024);
try
{
await foreach (var voice in GenerateSoundAsync(scriptLine.Text, style, soundModel.Id, cancellationToken))
{
if (voice.Length > dataBuffer.Length)
{
ArrayPool<byte>.Shared.Return(dataBuffer);
dataBuffer = ArrayPool<byte>.Shared.Rent(voice.Length);
}
using var msReader = new MemoryStream(voice);
using var reader = new WaveFileReader(msReader);
var read = await reader.ReadAsync(dataBuffer, cancellationToken);
if (writer is null)
{
writer = new WaveFileWriter(msWriter, reader.WaveFormat);
}
await writer.WriteAsync(dataBuffer.AsMemory()[..read], cancellationToken);
}
if (writer is null)
{
throw new EbookException(ExceptionType.SoundGenerationFailed);
}
await writer.FlushAsync(cancellationToken);
return msWriter.ToArray();
}
catch { throw; }
finally
{
ArrayPool<byte>.Shared?.Return(dataBuffer);
writer?.Dispose();
}
}

private async IAsyncEnumerable<byte[]> GenerateSoundAsync(string text, string style, string modelId, [EnumeratorCancellation] CancellationToken cancellationToken)
{
foreach (var l in SplitPeriod(text, 300))
{
var queryCollection = HttpUtility.ParseQueryString(string.Empty);
queryCollection.Add("text", l);
queryCollection.Add("model_id", modelId);
queryCollection.Add("style", style);
yield return await _styleBertVitsClientService
.GetAsByteArrayAsync($"/voice?{queryCollection}", ExceptionType.SoundGenerationFailed, cancellationToken).ConfigureAwait(false);
}
}

private IEnumerable<string> SplitPeriod(string text, int limit)
{
if (text.Length < limit)
{
yield return text;
}
else
{
List<int> periodList = [0];
var textSpan = text.AsSpan();
var chunk = textSpan[..limit];
while (true)
{
var periodIndex = periodList[^1] + chunk.LastIndexOf('。') + 1;
periodList.Add(periodIndex);
var nextEnd = periodIndex + limit;
if (nextEnd < textSpan.Length)
{
chunk = textSpan[periodIndex..nextEnd];
}
else
{
periodList.Add(textSpan.Length);
break;
}
}
for (var i = 1; i < periodList.Count; i++)
{
yield return text[periodList[i - 1]..periodList[i]];
}
}
}
}
6 changes: 3 additions & 3 deletions KoeBook.Test/Epub/EpubDocumentTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ public class EpubDocumentTest
[Fact]
public void EnsureChapter()
{
var document = new EpubDocument("title", "author", "cover", default);
var document = new EpubDocument("title", "author", default);

Assert.Empty(document.Chapters);

Expand All @@ -29,7 +29,7 @@ public void EnsureChapter()
[Fact]
public void EnsureSection()
{
var document = new EpubDocument("title", "author", "cover", default);
var document = new EpubDocument("title", "author", default);

Assert.Empty(document.Chapters);

Expand Down Expand Up @@ -77,7 +77,7 @@ public void EnsureSection()
[Fact]
public void EnsureParagraph()
{
var document = new EpubDocument("title", "author", "cover", default);
var document = new EpubDocument("title", "author", default);

Assert.Empty(document.Chapters);

Expand Down
Loading
Loading