From 0a284f41b77528defadbd9a76d06faff2460a035 Mon Sep 17 00:00:00 2001 From: xf0e Date: Sun, 3 Oct 2021 11:53:45 +0000 Subject: [PATCH] Development (#42) * [CGL] corrected comment on CheckForAcceptRequest * [CODING] all incoming request will (even erroneous) will get and log corresponding requestID. [CODING] resource manager now additionally relies on onw size of request queues. Should avoid cases there someone places many request at once * [CODING] more error logs now contains requestID for trouble shooting * [CODING] investigating code for deadlock if multiple requests are placed at same time. do not use in prod * [DEBUGGING] further investigation of a possible deadlock in addNewRequest * [BUGFIX] further investigation of a hang bug * [CODING] /ocr-status will now return a valid JSON with status "not found" and CODE 200 instead of 404 Status code * [CGL] RequestID will now be shown in the logs instead of requestID * [BUILD] removed netgo build flag from Makefile * Makefile now builds static linked executables * [BUGFIX] further investigation of a hang bug * [BUGFIX] further investigation of a hang bug, removed mutex l * [BUGFIX] further investigation of a hang bug, removed mutex l * [BUGFIX] further investigation of a hang bug, removed mutex l * [BUGFIX] further investigation of a hang bug, removed mutex l * [BUGFIX] further investigation of a hang bug, removed mutex l * [BUGFIX] further working on fixing hang bug on many simultaneous requests * [BUGFIX] further working on fixing hang bug on many simultaneous requests * [BUGFIX] further working on fixing hang bug on many simultaneous requests * [BUGFIX] corrected detection of an invalid reply_to address * [BUGFIX] fixed a bug there a deferred request with reply_to not set was returned without of request ID, so the requester didn't know which request to ask for * [BUGFIX] fixed a bug there the deferred requests were still tracked till timeout even if client hat successful downloaded them already * [BUGFIX] fixed race conditions on request counter and res manager [TODO] fix goroutine leak at [chan send, 3 minutes] ocr_rpc_client.go:221 * [BUGFIX] fixed race conditions request counter * [CGL] fixed comments * [CODING] added todo for fixing leaking go routines * [BUGFIX] go routines are not leaking anymore. There now a bug if "deferred": true, eply_to":"" are not set. The in-flight request queue won't be cleaned up for those requests. ocr_resultorage:72 needs to be considered * [CODING] better logging upon shutdown signal * [CODING] working on proper timeout cancel * [CODING] working on proper timeout cancel * [CODING] better logging in status handler * [CODING] correct handling of goroutines with replyto not set and deferred is true * [CGL] just some CGL * [CODING] go mod tidy * [CODING] updated dependency * [FEATURE] if the first tiff to pdf converter in sandwich engine fails, the second one will be used in order process the request * [CODING] removed unneeded logging * [BUGFIX] proper file name on converter switch * [CODING] flag result_optimize is less aggressive. for gs the dCompatibilityLevel level is now 1.7 and dPDFSETTINGS=/prepress. This will result in bigger pdf with more quality Co-authored-by: Artem Mil --- ocr_util.go | 1 + sandwich_engine.go | 26 +++++++++++++++++++++----- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/ocr_util.go b/ocr_util.go index 8622e14..814e3f9 100644 --- a/ocr_util.go +++ b/ocr_util.go @@ -126,6 +126,7 @@ func convertImageToPdf(inputFilename string) string { cmd := exec.Command("convert", inputFilename, tmpFileImgToPdf) _, err := cmd.CombinedOutput() if err != nil { + log.Debug().Str("component", "OCR_IMAGECONVERT").Interface("tiff2pdf_args", cmd.Args) log.Warn().Str("component", "OCR_IMAGECONVERT").Err(err). Msg("error exec convert for transforming TIFF to PDF") return "" diff --git a/sandwich_engine.go b/sandwich_engine.go index 09f5598..ed4dcfa 100644 --- a/sandwich_engine.go +++ b/sandwich_engine.go @@ -328,20 +328,36 @@ func (t SandwichEngine) processImageFile(inputFilename, uplFileType string, engi var cmdArgs []string ocrLayerFile := "" + alternativeConverter := "" + originalInputfileName := inputFilename logger.Info().Str("file_name", inputFilename).Msg("input file name") if uplFileType == "TIFF" { switch engineArgs.t2pConverter { case "convert": + alternativeConverter = "tiff2pdf" inputFilename = convertImageToPdf(inputFilename) case "tiff2pdf": + alternativeConverter = "convert" inputFilename = tiff2Pdf(inputFilename) } + /* if the first converter fails, we will automatically try the second one. + If the second one fails, we will break up processing and return an error to a caller */ if inputFilename == "" { - err := fmt.Errorf("can not convert input image to intermediate pdf") - logger.Error().Err(err).Caller().Msg("Error exec " + engineArgs.t2pConverter) - return OcrResult{Status: "error"}, err + err := fmt.Errorf("can not convert input image to intermediate pdf, usually this is caused by a damaged input file") + logger.Error().Err(err).Caller().Msg("Error exec " + engineArgs.t2pConverter + "Try to switch the image converter to " + alternativeConverter) + switch alternativeConverter { + case "convert": + inputFilename = convertImageToPdf(originalInputfileName) + case "tiff2pdf": + inputFilename = tiff2Pdf(originalInputfileName) + } + if inputFilename == "" { + err := fmt.Errorf("entirely failed to convert the input image to intermediate pdf, usually this is caused by a damaged input file") + logger.Error().Err(err).Caller().Msg("Error exec " + alternativeConverter) + return OcrResult{Status: "error"}, err + } } } @@ -412,8 +428,8 @@ func (t SandwichEngine) processImageFile(inputFilename, uplFileType string, engi compressedArgs = append( compressedArgs, "-sDEVICE=pdfwrite", - "-dCompatibilityLevel=1.5", - "-dPDFSETTINGS=/screen", + "-dCompatibilityLevel=1.7", + "-dPDFSETTINGS=/prepress", "-dNOPAUSE", "-dBATCH", "-dQUIET",