Skip to content

Commit

Permalink
Merge pull request #52 from OUCC/feat/#51
Browse files Browse the repository at this point in the history
音声合成の生成時に長文を分割するように変更
  • Loading branch information
miyaji255 authored May 3, 2024
2 parents 372cf8c + 5c96b1e commit dd4a5d2
Show file tree
Hide file tree
Showing 13 changed files with 117 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ public interface IScraperSelectorService
/// </summary>
public bool IsMatchSites(string url);

public ValueTask<EpubDocument> ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct);
public ValueTask<EpubDocument> ScrapingAsync(string url, string tempDirectory, Guid id, CancellationToken ct);
}
2 changes: 1 addition & 1 deletion Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ public interface IScrapingService
{
public bool IsMatchSite(Uri url);

public ValueTask<EpubDocument> ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct);
public ValueTask<EpubDocument> ScrapingAsync(string url, string tempDirectory, Guid id, CancellationToken ct);
}
2 changes: 1 addition & 1 deletion Epub/KoeBook.Epub/Models/EpubDocument.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

namespace KoeBook.Epub.Models;

public class EpubDocument(string title, string author, string coverFilePath, Guid id)
public class EpubDocument(string title, string author, Guid id, string coverFilePath = "")
{
public string Title { get; set; } = title;
public string Author { get; set; } = author;
Expand Down
2 changes: 1 addition & 1 deletion Epub/KoeBook.Epub/Services/AiStoryAnalyzerService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public partial class AiStoryAnalyzerService(ISplitBraceService splitBraceService

public EpubDocument CreateEpubDocument(AiStory aiStory, Guid id)
{
return new EpubDocument(aiStory.Title, "AI", "", id)
return new EpubDocument(aiStory.Title, "AI", id)
{
Chapters = [new Chapter()
{
Expand Down
3 changes: 2 additions & 1 deletion Epub/KoeBook.Epub/Services/AnalyzerService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public async ValueTask<BookScripts> AnalyzeAsync(BookProperties bookProperties,
switch (bookProperties)
{
case { SourceType: SourceType.Url or SourceType.FilePath, Source: string uri }:
document = await _scrapingService.ScrapingAsync(uri, coverFilePath, tempDirectory, bookProperties.Id, cancellationToken);
document = await _scrapingService.ScrapingAsync(uri, tempDirectory, bookProperties.Id, cancellationToken);
break;
case { SourceType: SourceType.AiStory, Source: AiStory aiStory }:
document = _aiStoryAnalyzerService.CreateEpubDocument(aiStory, bookProperties.Id);
Expand All @@ -45,6 +45,7 @@ public async ValueTask<BookScripts> AnalyzeAsync(BookProperties bookProperties,
}

_createCoverFileService.Create(document.Title, document.Author, coverFilePath);
document.CoverFilePath = coverFilePath;
}
catch (EbookException) { throw; }
catch (Exception ex)
Expand Down
11 changes: 9 additions & 2 deletions Epub/KoeBook.Epub/Services/EpubGenerateService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using KoeBook.Core.Models;
using KoeBook.Epub.Contracts.Services;
using KoeBook.Epub.Models;
using NAudio.Wave;

namespace KoeBook.Epub.Services;

Expand All @@ -18,9 +19,15 @@ public async ValueTask<string> GenerateEpubAsync(BookScripts bookScripts, string

var document = _documentStoreService.Documents.Single(d => d.Id == bookScripts.BookProperties.Id);

foreach (var scriptLine in bookScripts.ScriptLines)
for (var i = 0; i < bookScripts.ScriptLines.Length; i++)
{
scriptLine.Audio = new Audio(await _soundGenerationService.GenerateLineSoundAsync(scriptLine, bookScripts.Options, cancellationToken).ConfigureAwait(false));
var scriptLine = bookScripts.ScriptLines[i];
var wavData = await _soundGenerationService.GenerateLineSoundAsync(scriptLine, bookScripts.Options, cancellationToken).ConfigureAwait(false);
using var ms = new MemoryStream(wavData);
using var reader = new WaveFileReader(ms);
var tmpMp3Path = Path.Combine(tempDirectory, $"{document.Title}{i}.mp3");
MediaFoundationEncoder.EncodeToMp3(reader, tmpMp3Path);
scriptLine.Audio = new Audio(reader.TotalTime, tmpMp3Path);
}

if (await _createService.TryCreateEpubAsync(document, tempDirectory, cancellationToken).ConfigureAwait(false))
Expand Down
4 changes: 2 additions & 2 deletions Epub/KoeBook.Epub/Services/ScraperSelectorService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ public bool IsMatchSites(string url)
}
}

public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct)
public async ValueTask<EpubDocument> ScrapingAsync(string url, string tempDirectory, Guid id, CancellationToken ct)
{
var uri = new Uri(url);

foreach (var service in _scrapingServices)
{
if (service.IsMatchSite(uri))
return await service.ScrapingAsync(url, coverFillePath, tempDirectory, id, ct);
return await service.ScrapingAsync(url, tempDirectory, id, ct);
}

throw new ArgumentException("対応するURLではありません");
Expand Down
4 changes: 2 additions & 2 deletions Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public bool IsMatchSite(Uri uri)
return uri.Host == "www.aozora.gr.jp";
}

public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct)
public async ValueTask<EpubDocument> ScrapingAsync(string url, string imageDirectory, Guid id, CancellationToken ct)
{
var config = Configuration.Default.WithDefaultLoader();
using var context = BrowsingContext.New(config);
Expand All @@ -37,7 +37,7 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
?? throw new EbookException(ExceptionType.WebScrapingFailed, $"著者の取得に失敗しました。\n以下のリンクから正しい小説のリンクを取得してください。\n{GetCardUrl(url)}");

// EpubDocument の生成
var document = new EpubDocument(TextReplace(bookTitle.InnerHtml), TextReplace(bookAuther.InnerHtml), coverFilePath, id);
var document = new EpubDocument(TextReplace(bookTitle.InnerHtml), TextReplace(bookAuther.InnerHtml), id);

var (contentsIds, hasChapter, hasSection) = LoadToc(doc, document);

Expand Down
4 changes: 2 additions & 2 deletions Epub/KoeBook.Epub/Services/ScrapingNaroService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public bool IsMatchSite(Uri uri)
return uri.Host == "ncode.syosetu.com";
}

public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct)
public async ValueTask<EpubDocument> ScrapingAsync(string url, string imageDirectory, Guid id, CancellationToken ct)
{
var ncode = GetNcode(url);
var novelInfo = await GetNovelInfoAsync(ncode, ct).ConfigureAwait(false);
Expand Down Expand Up @@ -53,7 +53,7 @@ public async ValueTask<EpubDocument> ScrapingAsync(string url, string coverFileP
? bookAuthorTag.InnerHtml
: bookAuthorElement.InnerHtml.Replace("作者:", "");

var document = new EpubDocument(bookTitle, bookAuthor, coverFilePath, id);
var document = new EpubDocument(bookTitle, bookAuthor, id);
if (novelInfo.IsSerial) // 連載の時
{
async IAsyncEnumerable<(string? title, Section section)> LoadDetailsAsync(IBrowsingContext context, NovelInfo novelInfo, string imageDirectory, [EnumeratorCancellation] CancellationToken ct)
Expand Down
24 changes: 7 additions & 17 deletions KoeBook.Core/Models/Audio.cs
Original file line number Diff line number Diff line change
@@ -1,25 +1,15 @@
using NAudio.Wave;
using System.IO;
using NAudio.Wave;

namespace KoeBook.Epub.Models;

public sealed class Audio
public sealed class Audio(TimeSpan totalTIme, string tempFilePath)
{
public TimeSpan TotalTime { get; }
private readonly byte[] _mp3Data;
public TimeSpan TotalTime { get; } = totalTIme;
public string TempFilePath { get; } = tempFilePath;

public Audio(byte[] mp3Data)
public FileStream GetStream()
{
_mp3Data = mp3Data;
using var ms = new MemoryStream();
ms.Write(_mp3Data.AsSpan());
ms.Flush();
ms.Position = 0;
using var reader = new Mp3FileReader(ms);
TotalTime = reader.TotalTime;
}

public MemoryStream GetStream()
{
return new MemoryStream(_mp3Data);
return new FileStream(TempFilePath, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, true);
}
}
92 changes: 85 additions & 7 deletions KoeBook.Core/Services/SoundGenerationService.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
using System.Web;
using System.Buffers;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Web;
using KoeBook.Core.Contracts.Services;
using KoeBook.Core.Models;
using NAudio.Wave;

namespace KoeBook.Core.Services;

Expand All @@ -17,11 +21,85 @@ public async ValueTask<byte[]> GenerateLineSoundAsync(ScriptLine scriptLine, Boo
var soundModel = _soundGenerationSelectorService.Models.FirstOrDefault(m => m.Name == model)
?? throw new EbookException(ExceptionType.SoundGenerationFailed);
var style = soundModel.Styles.Contains(scriptLine.Style) ? scriptLine.Style : soundModel.Styles[0];
var queryCollection = HttpUtility.ParseQueryString(string.Empty);
queryCollection.Add("text", scriptLine.Text);
queryCollection.Add("model_id", soundModel.Id);
queryCollection.Add("style", style);
return await _styleBertVitsClientService
.GetAsByteArrayAsync($"/voice?{queryCollection}", ExceptionType.SoundGenerationFailed, cancellationToken).ConfigureAwait(false);
using var msWriter = new MemoryStream();
WaveFileWriter? writer = null;
byte[] dataBuffer = ArrayPool<byte>.Shared.Rent(1024);
try
{
await foreach (var voice in GenerateSoundAsync(scriptLine.Text, style, soundModel.Id, cancellationToken))
{
if (voice.Length > dataBuffer.Length)
{
ArrayPool<byte>.Shared.Return(dataBuffer);
dataBuffer = ArrayPool<byte>.Shared.Rent(voice.Length);
}
using var msReader = new MemoryStream(voice);
using var reader = new WaveFileReader(msReader);
var read = await reader.ReadAsync(dataBuffer, cancellationToken);
if (writer is null)
{
writer = new WaveFileWriter(msWriter, reader.WaveFormat);
}
await writer.WriteAsync(dataBuffer.AsMemory()[..read], cancellationToken);
}
if (writer is null)
{
throw new EbookException(ExceptionType.SoundGenerationFailed);
}
await writer.FlushAsync(cancellationToken);
return msWriter.ToArray();
}
catch { throw; }
finally
{
ArrayPool<byte>.Shared?.Return(dataBuffer);
writer?.Dispose();
}
}

private async IAsyncEnumerable<byte[]> GenerateSoundAsync(string text, string style, string modelId, [EnumeratorCancellation] CancellationToken cancellationToken)
{
foreach (var l in SplitPeriod(text, 300))
{
var queryCollection = HttpUtility.ParseQueryString(string.Empty);
queryCollection.Add("text", l);
queryCollection.Add("model_id", modelId);
queryCollection.Add("style", style);
yield return await _styleBertVitsClientService
.GetAsByteArrayAsync($"/voice?{queryCollection}", ExceptionType.SoundGenerationFailed, cancellationToken).ConfigureAwait(false);
}
}

private IEnumerable<string> SplitPeriod(string text, int limit)
{
if (text.Length < limit)
{
yield return text;
}
else
{
List<int> periodList = [0];
var textSpan = text.AsSpan();
var chunk = textSpan[..limit];
while (true)
{
var periodIndex = periodList[^1] + chunk.LastIndexOf('。') + 1;
periodList.Add(periodIndex);
var nextEnd = periodIndex + limit;
if (nextEnd < textSpan.Length)
{
chunk = textSpan[periodIndex..nextEnd];
}
else
{
periodList.Add(textSpan.Length);
break;
}
}
for (var i = 1; i < periodList.Count; i++)
{
yield return text[periodList[i - 1]..periodList[i]];
}
}
}
}
6 changes: 3 additions & 3 deletions KoeBook.Test/Epub/EpubDocumentTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ public class EpubDocumentTest
[Fact]
public void EnsureChapter()
{
var document = new EpubDocument("title", "author", "cover", default);
var document = new EpubDocument("title", "author", default);

Assert.Empty(document.Chapters);

Expand All @@ -29,7 +29,7 @@ public void EnsureChapter()
[Fact]
public void EnsureSection()
{
var document = new EpubDocument("title", "author", "cover", default);
var document = new EpubDocument("title", "author", default);

Assert.Empty(document.Chapters);

Expand Down Expand Up @@ -77,7 +77,7 @@ public void EnsureSection()
[Fact]
public void EnsureParagraph()
{
var document = new EpubDocument("title", "author", "cover", default);
var document = new EpubDocument("title", "author", default);

Assert.Empty(document.Chapters);

Expand Down
2 changes: 1 addition & 1 deletion KoeBook.Test/Epub/ScrapingAozoraServiceTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public async Task AddParagraphs1(string input, string[] expected)
using var context = BrowsingContext.New(Configuration.Default);
using var doc = await context.OpenAsync(req => req.Content(input));
Assert.NotNull(doc.ParentElement);
var epubDocument = new EpubDocument("title", "author", "", default)
var epubDocument = new EpubDocument("title", "author", default)
{
Chapters = [new() { Sections = [new("section title") { Elements = [new Paragraph() { Text = "test" }] }] }]
};
Expand Down

0 comments on commit dd4a5d2

Please sign in to comment.