diff --git a/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/OCR with multiple languages.csproj b/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/OCR with multiple languages.csproj deleted file mode 100644 index f9b56dd4..00000000 --- a/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/OCR with multiple languages.csproj +++ /dev/null @@ -1,48 +0,0 @@ - - - - Exe - net8.0 - OCR_with_multiple_langauages - enable - enable - - - - - - - - - Always - - - Always - - - Always - - - Always - - - Always - - - Always - - - Always - - - Always - - - Always - - - Always - - - - diff --git a/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/Program.cs b/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/Program.cs deleted file mode 100644 index 8884bfa3..00000000 --- a/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/Program.cs +++ /dev/null @@ -1,34 +0,0 @@ -using Syncfusion.OCRProcessor; -using Syncfusion.Pdf.Parsing; - -// Initialize the OCR processor within a using block to ensure resources are properly disposed -using (OCRProcessor ocrProcessor = new OCRProcessor()) -{ - // Set the Unicode font for the OCR processor using a TrueType font file - ocrProcessor.UnicodeFont = new Syncfusion.Pdf.Graphics.PdfTrueTypeFont( - new FileStream(Path.GetFullPath(@"Data/arialuni.ttf"), FileMode.Open),12 ); - - // Load the PDF document - PdfLoadedDocument loadedDocument = new PdfLoadedDocument(Path.GetFullPath(@"Data/Input.pdf")); - - // Configure OCR settings - OCRSettings ocrSettings = new OCRSettings(); - - // Specify the languages to be used for OCR - ocrSettings.Language = "eng+deu+ara+ell+fra"; - - // Apply the OCR settings to the OCR processor - ocrProcessor.Settings = ocrSettings; - - // Perform OCR on the loaded PDF document, providing the path to the tessdata directory - ocrProcessor.PerformOCR(loadedDocument, "tessdata"); - - // Save the OCR-processed document - loadedDocument.Save(Path.GetFullPath(@"Output/Output.pdf")); - - // Close the loaded document and commit changes - loadedDocument.Close(true); -} - - - diff --git a/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/deu.traineddata b/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/deu.traineddata deleted file mode 100644 index 36f623a0..00000000 Binary files a/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/deu.traineddata and /dev/null differ diff --git a/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/eng.traineddata b/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/eng.traineddata deleted file mode 100644 index f4744c20..00000000 Binary files a/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/eng.traineddata and /dev/null differ diff --git a/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages.sln b/OCR/.NET/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages.sln similarity index 51% rename from OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages.sln rename to OCR/.NET/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages.sln index 106cd236..0a5a5dbd 100644 --- a/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages.sln +++ b/OCR/.NET/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages.sln @@ -1,9 +1,9 @@  Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 17 -VisualStudioVersion = 17.11.35222.181 +VisualStudioVersion = 17.14.36616.10 d17.14 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OCR with multiple langauages", "OCR with multiple languages\OCR with multiple languages.csproj", "{31453861-AEBA-4C3D-89DD-BE4CF523F717}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Perform-OCR-on-PDF-with-multiple-languages", "Perform-OCR-on-PDF-with-multiple-languages\Perform-OCR-on-PDF-with-multiple-languages.csproj", "{0DB5D151-C60A-434B-B709-DC9111D1CC8F}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -11,15 +11,15 @@ Global Release|Any CPU = Release|Any CPU EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution - {31453861-AEBA-4C3D-89DD-BE4CF523F717}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {31453861-AEBA-4C3D-89DD-BE4CF523F717}.Debug|Any CPU.Build.0 = Debug|Any CPU - {31453861-AEBA-4C3D-89DD-BE4CF523F717}.Release|Any CPU.ActiveCfg = Release|Any CPU - {31453861-AEBA-4C3D-89DD-BE4CF523F717}.Release|Any CPU.Build.0 = Release|Any CPU + {0DB5D151-C60A-434B-B709-DC9111D1CC8F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {0DB5D151-C60A-434B-B709-DC9111D1CC8F}.Debug|Any CPU.Build.0 = Debug|Any CPU + {0DB5D151-C60A-434B-B709-DC9111D1CC8F}.Release|Any CPU.ActiveCfg = Release|Any CPU + {0DB5D151-C60A-434B-B709-DC9111D1CC8F}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {E1E0ADD2-5280-4B74-8CF7-D831048BE8DA} + SolutionGuid = {6F527749-5F41-444D-BBAE-1676D3E57CF2} EndGlobalSection EndGlobal diff --git a/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/Data/arialuni.ttf b/OCR/.NET/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages/Data/ARIALUNI.ttf similarity index 100% rename from OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/Data/arialuni.ttf rename to OCR/.NET/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages/Data/ARIALUNI.ttf diff --git a/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/Data/Input.pdf b/OCR/.NET/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages/Data/Input.pdf similarity index 100% rename from OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/Data/Input.pdf rename to OCR/.NET/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages/Data/Input.pdf diff --git a/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/Output/.gitkeep b/OCR/.NET/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages/Output/gitkeep.txt similarity index 100% rename from OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/Output/.gitkeep rename to OCR/.NET/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages/Output/gitkeep.txt diff --git a/OCR/.NET/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages.csproj b/OCR/.NET/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages.csproj new file mode 100644 index 00000000..c2c79df1 --- /dev/null +++ b/OCR/.NET/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages.csproj @@ -0,0 +1,15 @@ + + + + Exe + net8.0 + Perform_OCR_on_PDF_with_multiple_languages + enable + enable + + + + + + + diff --git a/OCR/.NET/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages/Program.cs b/OCR/.NET/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages/Program.cs new file mode 100644 index 00000000..6608d460 --- /dev/null +++ b/OCR/.NET/Perform-OCR-on-PDF-with-multiple-languages/Perform-OCR-on-PDF-with-multiple-languages/Program.cs @@ -0,0 +1,20 @@ +using Syncfusion.OCRProcessor; +using Syncfusion.Pdf.Graphics; +using Syncfusion.Pdf.Parsing; + +// Load the PDF document +using (PdfLoadedDocument loadedDocument = new PdfLoadedDocument(Path.GetFullPath(@"Data/Input.pdf"))) +{ + // Initialize OCR processor + OCRProcessor processor = new OCRProcessor(); + //Sets Unicode font to preserve the Unicode characters in a PDF document. + processor.UnicodeFont = new PdfTrueTypeFont(Path.GetFullPath(@"Data/ARIALUNI.ttf"), 8); + // Set OCR language + processor.Settings.Language = "eng+deu+ara+ell+fra"; // English, German, Arabic, Greek, French + // Set the path to the Tesseract language data folder + processor.TessDataPath = Path.GetFullPath(@"../../Tessdata"); + // Perform OCR + processor.PerformOCR(loadedDocument); + // Save the PDF document + loadedDocument.Save(Path.GetFullPath(@"Output/Output.pdf")); +} diff --git a/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/ara.traineddata b/OCR/.NET/Tessdata/ara.traineddata similarity index 100% rename from OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/ara.traineddata rename to OCR/.NET/Tessdata/ara.traineddata diff --git a/OCR/.NET/Tessdata/deu.traineddata b/OCR/.NET/Tessdata/deu.traineddata index b70f9ebd..36f623a0 100644 Binary files a/OCR/.NET/Tessdata/deu.traineddata and b/OCR/.NET/Tessdata/deu.traineddata differ diff --git a/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/ell.traineddata b/OCR/.NET/Tessdata/ell.traineddata similarity index 100% rename from OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/ell.traineddata rename to OCR/.NET/Tessdata/ell.traineddata diff --git a/OCR/.NET/Tessdata/eng.traineddata b/OCR/.NET/Tessdata/eng.traineddata index 64ae74e1..f4744c20 100644 Binary files a/OCR/.NET/Tessdata/eng.traineddata and b/OCR/.NET/Tessdata/eng.traineddata differ diff --git a/OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/fra.traineddata b/OCR/.NET/Tessdata/fra.traineddata similarity index 100% rename from OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/fra.traineddata rename to OCR/.NET/Tessdata/fra.traineddata diff --git a/OCR/.NET/Tessdata/tha.traineddata b/OCR/.NET/Tessdata/tha.traineddata new file mode 100644 index 00000000..fa80ee4b Binary files /dev/null and b/OCR/.NET/Tessdata/tha.traineddata differ