Azure-Samples · TakaValley · May 22, 2024 · May 23, 2024
diff --git a/.NET(v4.0)/ConvertHtmlToPdf/ConvertHtmlToPdf.csproj b/.NET(v4.0)/ConvertHtmlToPdf/ConvertHtmlToPdf.csproj
@@ -0,0 +1,19 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net8.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="itext7.bouncy-castle-adapter" Version="8.0.4" />
+    <PackageReference Include="itext7.pdfhtml" Version="5.0.4" />
+  </ItemGroup>
+
+  <Target Name="PostBuild" AfterTargets="PostBuildEvent">
+    <Exec Command="xcopy /Y &quot;$(ProjectDir)..\..\Data\other-doc-type\web-page.html&quot; &quot;$(TargetDir)HtmlFile\&quot;" />
+  </Target>
+
+</Project>
diff --git a/.NET(v4.0)/ConvertHtmlToPdf/Program.cs b/.NET(v4.0)/ConvertHtmlToPdf/Program.cs
@@ -0,0 +1,32 @@
+// coding: utf - 8
+// --------------------------------------------------------------------------
+// Copyright(c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License. See License.txt in the project root for
+// license information.
+// --------------------------------------------------------------------------
+using System.Diagnostics;
+using System.Text;
+using iText.Html2pdf;
+
+// function to convert Html string to Pdf document
+Action<string, string> ConvertHtmlToPdf = (string htmlString, string outputFilePath) =>
+{
+    using var htmlStream = new MemoryStream(Encoding.UTF8.GetBytes(htmlString));
+    using var pdfFileStream = new FileStream(outputFilePath, FileMode.OpenOrCreate, FileAccess.Write);
+    HtmlConverter.ConvertToPdf(htmlStream, pdfFileStream);
+};
+
+var baseDir = Environment.CurrentDirectory;
+var htmlContent = File.ReadAllText($"{baseDir}\\HtmlFile\\web-page.html");
+var pdfOutputFolder = $"{baseDir}\\Output";
+if (!Directory.Exists(pdfOutputFolder))
+{
+    Directory.CreateDirectory(pdfOutputFolder);
+}
+
+var pdfOutputPath = $"{pdfOutputFolder}\\converted.pdf";
+ConvertHtmlToPdf(htmlContent, pdfOutputPath);
+
+Console.WriteLine($"Pdf convert successfully in {pdfOutputPath}");
+Process.Start(new ProcessStartInfo(pdfOutputPath) { UseShellExecute = true });
+
diff --git a/.NET(v4.0)/sdk-samples.sln b/.NET(v4.0)/sdk-samples.sln
@@ -9,7 +9,11 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Sample Code Snippet For Doc
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CodeSnippetForApiVer_2024-02-29_Preview", "CodeSnippetForApiVer_2024-02-29_Preview\CodeSnippetForApiVer_2024-02-29_Preview.csproj", "{EFCE60E7-6544-443C-8C68-53BFC9447B51}"
 EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CodeSnippetForApiVer_2023-10-31_Preview", "CodeSnippetForApiVer_2023-10-31_Preview\CodeSnippetForApiVer_2023-10-31_Preview.csproj", "{BC2D5C4C-C70E-4FB1-AD1E-1505F97FF231}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CodeSnippetForApiVer_2023-10-31_Preview", "CodeSnippetForApiVer_2023-10-31_Preview\CodeSnippetForApiVer_2023-10-31_Preview.csproj", "{BC2D5C4C-C70E-4FB1-AD1E-1505F97FF231}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ConvertHtmlToPdf", "ConvertHtmlToPdf\ConvertHtmlToPdf.csproj", "{BD96BDFF-643C-446C-815B-A5E51FC48166}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "FileConversion", "FileConversion", "{EC23A1E8-D9A3-44E2-B63C-567C9145C165}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -29,13 +33,18 @@ Global
 		{BC2D5C4C-C70E-4FB1-AD1E-1505F97FF231}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{BC2D5C4C-C70E-4FB1-AD1E-1505F97FF231}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{BC2D5C4C-C70E-4FB1-AD1E-1505F97FF231}.Release|Any CPU.Build.0 = Release|Any CPU
+		{BD96BDFF-643C-446C-815B-A5E51FC48166}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{BD96BDFF-643C-446C-815B-A5E51FC48166}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{BD96BDFF-643C-446C-815B-A5E51FC48166}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{BD96BDFF-643C-446C-815B-A5E51FC48166}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
 	EndGlobalSection
 	GlobalSection(NestedProjects) = preSolution
 		{EFCE60E7-6544-443C-8C68-53BFC9447B51} = {E9CBB1EB-6D19-4981-A5F2-EFEA3CF0F0A7}
 		{BC2D5C4C-C70E-4FB1-AD1E-1505F97FF231} = {E9CBB1EB-6D19-4981-A5F2-EFEA3CF0F0A7}
+		{BD96BDFF-643C-446C-815B-A5E51FC48166} = {EC23A1E8-D9A3-44E2-B63C-567C9145C165}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {5CBC567B-C775-4E10-BCB4-E019111881C4}

diff --git a/Data/other-doc-type/web-page.html b/Data/other-doc-type/web-page.html
diff --git a/Doc/Convert Office File To Pdf.md b/Doc/Convert Office File To Pdf.md
@@ -0,0 +1,83 @@
+# Convert Office File To Pdf Format By Microsoft Graph API
+
+## Overview
+Microsoft Graph API Provide a good way to convert office file to pdf format. Let’s take a quick look at the functional core. The API of [Download a file in another format](https://learn.microsoft.com/en-us/graph/api/driveitem-get-content-format?view=graph-rest-1.0&tabs=http) support a lot of file type, briefly as below:
+
+| Format value | Description | Supported source extensions |
+| ------------ | ----------- | --------------------------- |
+| pdf | Converts the item into PDF format. | csv, doc, docx, odp, ods, odt, pot, potm, potx, pps, ppsx, ppsxm, ppt, pptm, pptx, rtf, xls, xlsx |
+| html | Converts the item into HTML format. | loop, fluid, wbtx |
+
+It almost supports convert all the MS Office document formats to pdf with good quality. Before using this API, you need to understand the authentication and authorization concepts in the Microsoft identity platform. 
+
+## Prerequisites
+- A Microsoft Entra ID tenant. If you don't have a tenant, create a [free Azure account to get free subscription](https://azure.microsoft.com/free/?WT.mc_id=A261C142F).
+- An account that has at least the [Cloud Application Administrator](https://learn.microsoft.com/en-us/entra/identity/role-based-access-control/permissions-reference?toc=%2Fgraph%2Ftoc.json#cloud-application-administrator) role.
+- The <a name="drive">drive</a> resource to storage file, it could be [OneDrive](https://www.microsoft.com/en-us/microsoft-365/onedrive/online-cloud-storage/), [OneDrive for business](https://www.microsoft.com/en-us/microsoft-365/onedrive/onedrive-for-business), or [Sharepoint](https://www.microsoft.com/en-us/microsoft-365/sharepoint/collaboration). For enterprise scenario, suggest [Sharepoint](https://www.microsoft.com/en-us/microsoft-365/sharepoint/collaboration).
+
+## Process
+There is a comprehensive documentation about [Microsoft Graph API](https://learn.microsoft.com/en-us/graph/overview). Due to Microsoft Graph API has a wide function range, it only summarizes the topics related to Microsoft Office file conversion, in this article.
+
+### 1. Register the app in Microsoft Identity Platform
+To call Microsoft Graph, an app must obtain an access token from the Microsoft identity platform. To register the app in Microsoft Identity Platform, reference the steps from https://learn.microsoft.com/en-us/graph/auth-register-app-v2.
+
+### 2. Authentication and authorization basics
+As previous step introduced, the app could obtain the access token. The access token includes information about whether the app is authorized to access Microsoft Graph on behalf of a signed-in user or with its own identity. 
+- Access scenarios introduce : https://learn.microsoft.com/en-us/graph/auth/auth-concepts#access-scenarios. For enterprise scenario, suggest **Get access without a user**
+- Get access without a user : https://learn.microsoft.com/en-us/graph/auth-v2-service?tabs=http
+    + Extension - OAuth 2.0 client credentials flow : https://learn.microsoft.com/en-us/entra/identity-platform/v2-oauth2-client-creds-grant-flow
+    + Extension - OAuth 2.0 client credentials flow by SDK : https://learn.microsoft.com/en-us/graph/sdks/choose-authentication-providers?view=graph-rest-1.0#client-credentials-provider
+
+### 3. Create temporary folder by Microsoft Graph API
+After getting the access token, it could use the Microsoft Graph API now. 
+You'd better to create a temporary folder to store the Microsoft Office file by *[API: Create a new folder in a drive](https://learn.microsoft.com/en-us/graph/api/driveitem-post-children?view=graph-rest-1.0&tabs=http)*. For temporary folder, it's a file transfer station. Once the conversion is complete, this temporary folder and sub files can be deleted easily.
+
+
+### 4. Upload the Microsoft Office file to drive by Microsoft Graph API
+When the temporary folder is ready, could upload the Microsoft Office file to the temporary folder in <a href="#drive">drive</a> by *[API: Upload or replace the contents of a driveItem](https://learn.microsoft.com/en-us/graph/api/driveitem-put-content?view=graph-rest-1.0&tabs=http)*. Up to this step, you have made the full preparations for Microsoft Office file conversion.
+
+### 5. Convert the Microsoft Office file to pdf format by Microsoft Graph API
+Here's the key step, just calling the *[API: Download a file in another format](https://learn.microsoft.com/en-us/graph/api/driveitem-get-content-format?view=graph-rest-1.0&tabs=http)* to convert the Microsoft Office file to pdf format and save it to your local. <br>
+Notice: the parameters of ***format*** should be ***pdf***
+
+- #### Sample code for Python:
+    ~~~
+    query_params = ContentRequestBuilder.ContentRequestBuilderGetQueryParameters(
+        format="pdf",
+    )
+    request_config = ContentRequestBuilder.ContentRequestBuilderGetRequestConfiguration(
+        query_parameters=query_params
+    )
+    pdf_bytes = (
+        await graph_client.drives.by_drive_id(user_drive_id)
+        .items.by_drive_item_id(file_item.id)
+        .content.get(request_config)
+    )
+
+    pdf_abspath = os.path.abspath(
+        os.path.join(
+            save_dir_path,
+            f"./{file_name}.pdf",
+        )
+    )
+    ~~~
+
+- #### Sample code for C#:
+    ~~~
+    using var pdfStream = await graphClient.Drives[driveId].Items[fileItem.Id].Content.GetAsync((requestConfiguration) =>
+    {
+        requestConfiguration.QueryParameters.Format = "pdf";
+    });
+
+    if (pdfStream != null)
+    {
+        var savePdfFileName = $"{saveDirPath}\\{fileName}.pdf";
+        using FileStream saveStream = File.Create(savePdfFileName);
+        pdfStream.CopyTo(saveStream);
+    }
+    ~~~
+
+### 6. Delete temporary folder by Microsoft Graph API
+After converting Microsoft Office file to pdf format successfully, it's better to delete the temporary folder which used to store Microsoft Office file to keep the drive clear. You could implement this behavior by ***[API:Delete a DriveItem](https://learn.microsoft.com/en-us/graph/api/driveitem-delete?view=graph-rest-1.0&tabs=http)***.
+
+This is an introduction for complete process of converting Microsoft Office file to pdf format. You could integrate the above related APIs in your exist system to achieve this feature.
diff --git a/Python(v4.0)/Others/sample_convert_html_to_pdf.py b/Python(v4.0)/Others/sample_convert_html_to_pdf.py
@@ -0,0 +1,69 @@
+# coding: utf-8
+
+# --------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+"""
+FILE: sample_convert_html_to_pdf.py
+
+DESCRIPTION:
+    This sample demonstrates how to convert a html file to the pdf file.
+PREREQUISITES:
+    Before using this function, need to install following required component:
+        1).Install python-pdfkit:
+            "$ pip install pdfkit"
+        2).Install wkhtmltopdf:
+            -Debian/Ubuntu:
+                "$ sudo apt-get install wkhtmltopdf"
+            -macOS:
+                "$ brew install homebrew/cask/wkhtmltopdf"
+            -Windows and other options: check https://wkhtmltopdf.org/downloads.html for wkhtmltopdf binary installers
+    More information about pdfkit, reference from https://pypi.org/project/pdfkit/.
+USAGE:
+    python sample_convert_html_to_pdf.py
+"""
+
+import os
+import pdfkit
+
+
+def convert_html_file_to_pdf(html_path, save_pdf_path):
+    with open(html_path, "r", encoding="utf-8") as f:
+        htmlStr = f.read()
+
+        directory_to_save_pdf = os.path.dirname(save_pdf_path)
+        if not os.path.isdir(directory_to_save_pdf):
+            os.makedirs(directory_to_save_pdf)
+
+        return pdfkit.from_string(htmlStr, save_pdf_path)
+
+
+if __name__ == "__main__":
+    current_file_path = os.path.abspath(__file__)
+    current_folder_path = os.path.dirname(current_file_path)
+
+    path_of_sample_html = os.path.abspath(
+        os.path.join(
+            current_file_path,
+            "..",
+            "..",
+            "..",
+            "./Data/other-doc-type/web-page.html",
+        )
+    )
+
+    path_to_save_pdf = os.path.abspath(
+        os.path.join(
+            current_folder_path,
+            "./result/converted.pdf",
+        )
+    )
+
+    isSuccessful = convert_html_file_to_pdf(path_of_sample_html, path_to_save_pdf)
+    if isSuccessful:
+        print(f"Convert pdf successfully in {path_to_save_pdf}")
+    else:
+        print("Something wrong. Check the html file please.")