diff --git a/src/MarkItDown/Converters/DocxConverter.cs b/src/MarkItDown/Converters/DocxConverter.cs index 038c6b726..a5d95fc04 100644 --- a/src/MarkItDown/Converters/DocxConverter.cs +++ b/src/MarkItDown/Converters/DocxConverter.cs @@ -89,22 +89,19 @@ public async Task ConvertAsync(Stream stream, StreamInf } } - private static async Task ExtractTextFromDocxAsync(Stream stream, CancellationToken cancellationToken) + private static Task ExtractTextFromDocxAsync(Stream stream, CancellationToken cancellationToken) { var result = new StringBuilder(); - await Task.Run(() => - { - using var wordDocument = WordprocessingDocument.Open(stream, false); - var body = wordDocument.MainDocumentPart?.Document?.Body; + using var wordDocument = WordprocessingDocument.Open(stream, false); + var body = wordDocument.MainDocumentPart?.Document?.Body; - if (body != null) - { - ProcessBodyElements(body, result, cancellationToken); - } - }, cancellationToken); + if (body != null) + { + ProcessBodyElements(body, result, cancellationToken); + } - return result.ToString().Trim(); + return Task.FromResult(result.ToString().Trim()); } private static void ProcessBodyElements(Body body, StringBuilder result, CancellationToken cancellationToken) diff --git a/src/MarkItDown/Converters/PdfConverter.cs b/src/MarkItDown/Converters/PdfConverter.cs index c53c90af9..ec641df12 100644 --- a/src/MarkItDown/Converters/PdfConverter.cs +++ b/src/MarkItDown/Converters/PdfConverter.cs @@ -271,33 +271,30 @@ private sealed class PdfPigTextExtractor : IPdfTextExtractor { public Task ExtractTextAsync(byte[] pdfBytes, CancellationToken cancellationToken) { - return Task.Run(() => - { - var builder = new StringBuilder(); - - using var pdfDocument = PdfDocument.Open(pdfBytes); + var builder = new StringBuilder(); - for (var pageNumber = 1; pageNumber <= pdfDocument.NumberOfPages; pageNumber++) - { - cancellationToken.ThrowIfCancellationRequested(); - var page = pdfDocument.GetPage(pageNumber); - var pageText = page.Text; + using var pdfDocument = PdfDocument.Open(pdfBytes); - if (string.IsNullOrWhiteSpace(pageText)) - { - continue; - } + for (var pageNumber = 1; pageNumber <= pdfDocument.NumberOfPages; pageNumber++) + { + cancellationToken.ThrowIfCancellationRequested(); + var page = pdfDocument.GetPage(pageNumber); + var pageText = page.Text; - if (builder.Length > 0) - { - builder.AppendLine("\n---\n"); - } + if (string.IsNullOrWhiteSpace(pageText)) + { + continue; + } - builder.AppendLine(pageText.Trim()); + if (builder.Length > 0) + { + builder.AppendLine("\n---\n"); } - return builder.ToString(); - }, cancellationToken); + builder.AppendLine(pageText.Trim()); + } + + return Task.FromResult(builder.ToString()); } } @@ -322,34 +319,31 @@ public Task> RenderImagesAsync(byte[] pdfBytes, Cancellati [SupportedOSPlatform("ios")] private static Task> RenderOnSupportedPlatformsAsync(byte[] pdfBytes, CancellationToken cancellationToken) { - return Task.Run(() => + var images = new List(); + var options = new RenderOptions { - var images = new List(); - var options = new RenderOptions - { - Dpi = 144, - WithAnnotations = true, - WithAspectRatio = true, - AntiAliasing = PdfAntiAliasing.All, - }; + Dpi = 144, + WithAnnotations = true, + WithAspectRatio = true, + AntiAliasing = PdfAntiAliasing.All, + }; #pragma warning disable CA1416 - foreach (var bitmap in Conversion.ToImages(pdfBytes, password: null, options)) + foreach (var bitmap in Conversion.ToImages(pdfBytes, password: null, options)) + { + cancellationToken.ThrowIfCancellationRequested(); + using var bmp = bitmap; + using var data = bmp.Encode(SKEncodedImageFormat.Png, quality: 90); + if (data is null) { - cancellationToken.ThrowIfCancellationRequested(); - using var bmp = bitmap; - using var data = bmp.Encode(SKEncodedImageFormat.Png, quality: 90); - if (data is null) - { - continue; - } - - images.Add(Convert.ToBase64String(data.Span)); + continue; } + + images.Add(Convert.ToBase64String(data.Span)); + } #pragma warning restore CA1416 - return (IReadOnlyList)images; - }, cancellationToken); + return Task.FromResult>(images); } } } diff --git a/src/MarkItDown/Converters/PptxConverter.cs b/src/MarkItDown/Converters/PptxConverter.cs index e2463155d..baacc072e 100644 --- a/src/MarkItDown/Converters/PptxConverter.cs +++ b/src/MarkItDown/Converters/PptxConverter.cs @@ -91,31 +91,28 @@ public async Task ConvertAsync(Stream stream, StreamInf } } - private static async Task ExtractContentFromPptxAsync(Stream stream, CancellationToken cancellationToken) + private static Task ExtractContentFromPptxAsync(Stream stream, CancellationToken cancellationToken) { var result = new StringBuilder(); - await Task.Run(() => + using var presentationDocument = PresentationDocument.Open(stream, false); + var presentationPart = presentationDocument.PresentationPart; + + if (presentationPart?.Presentation?.SlideIdList != null) { - using var presentationDocument = PresentationDocument.Open(stream, false); - var presentationPart = presentationDocument.PresentationPart; + var slideCount = 0; - if (presentationPart?.Presentation?.SlideIdList != null) + foreach (var slideId in presentationPart.Presentation.SlideIdList.Elements()) { - var slideCount = 0; + cancellationToken.ThrowIfCancellationRequested(); - foreach (var slideId in presentationPart.Presentation.SlideIdList.Elements()) - { - cancellationToken.ThrowIfCancellationRequested(); - - slideCount++; - var slidePart = (SlidePart)presentationPart.GetPartById(slideId.RelationshipId!); - ProcessSlide(slidePart, slideCount, result); - } + slideCount++; + var slidePart = (SlidePart)presentationPart.GetPartById(slideId.RelationshipId!); + ProcessSlide(slidePart, slideCount, result); } - }, cancellationToken); + } - return result.ToString().Trim(); + return Task.FromResult(result.ToString().Trim()); } private static void ProcessSlide(SlidePart slidePart, int slideNumber, StringBuilder result) diff --git a/src/MarkItDown/Converters/XlsxConverter.cs b/src/MarkItDown/Converters/XlsxConverter.cs index 1825fc97e..e6c85b411 100644 --- a/src/MarkItDown/Converters/XlsxConverter.cs +++ b/src/MarkItDown/Converters/XlsxConverter.cs @@ -89,28 +89,25 @@ public async Task ConvertAsync(Stream stream, StreamInf } } - private static async Task ExtractDataFromXlsxAsync(Stream stream, CancellationToken cancellationToken) + private static Task ExtractDataFromXlsxAsync(Stream stream, CancellationToken cancellationToken) { var result = new StringBuilder(); - await Task.Run(() => + using var spreadsheetDocument = SpreadsheetDocument.Open(stream, false); + var workbookPart = spreadsheetDocument.WorkbookPart; + + if (workbookPart?.Workbook?.Sheets != null) { - using var spreadsheetDocument = SpreadsheetDocument.Open(stream, false); - var workbookPart = spreadsheetDocument.WorkbookPart; - - if (workbookPart?.Workbook?.Sheets != null) + foreach (var sheet in workbookPart.Workbook.Sheets.Elements()) { - foreach (var sheet in workbookPart.Workbook.Sheets.Elements()) - { - cancellationToken.ThrowIfCancellationRequested(); - - var worksheetPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id!); - ProcessWorksheet(worksheetPart, sheet.Name?.Value ?? "Sheet", result, workbookPart); - } + cancellationToken.ThrowIfCancellationRequested(); + + var worksheetPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id!); + ProcessWorksheet(worksheetPart, sheet.Name?.Value ?? "Sheet", result, workbookPart); } - }, cancellationToken); + } - return result.ToString().Trim(); + return Task.FromResult(result.ToString().Trim()); } private static void ProcessWorksheet(WorksheetPart worksheetPart, string sheetName, StringBuilder result, WorkbookPart workbookPart)