Skip to content

Commit

Permalink
handle ocr to bytes with rpc
Browse files Browse the repository at this point in the history
  • Loading branch information
gmuselli committed May 7, 2017
1 parent 33dd6ef commit 4fd4a2a
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 9 deletions.
19 changes: 19 additions & 0 deletions ocr_request.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package ocrworker

import "fmt"
import "encoding/base64"

type OcrRequest struct {
ImgUrl string `json:"img_url"`
Expand All @@ -27,7 +28,25 @@ func (ocrRequest *OcrRequest) nextPreprocessor(processorRoutingKey string) strin
ocrRequest.PreprocessorChain = s
return x
}
}

func (ocrRequest *OcrRequest) decodeBase64() error {

bytes, decodeError := base64.StdEncoding.DecodeString(ocrRequest.ImgBase64)

if decodeError != nil {
return decodeError
}

ocrRequest.ImgBytes = bytes
ocrRequest.ImgBase64 = ""

return nil
}

func (ocrRequest *OcrRequest) hasBase64() error {

return ocrRequest.ImgBase64 != ""
}

func (ocrRequest *OcrRequest) downloadImgUrl() error {
Expand Down
25 changes: 19 additions & 6 deletions ocr_rpc_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,25 @@ func (c *OcrRpcClient) DecodeImage(ocrRequest OcrRequest) (OcrResult, error) {
// any preprocessors. if rabbitmq isn't in same data center
// as open-ocr, it will be expensive in terms of bandwidth
// to have image binary in messages
if ocrRequest.ImgBytes == nil && ocrRequest.ImgBase64 == "" {
// if we already have image bytes, ignore image url
err = ocrRequest.downloadImgUrl()
if err != nil {
logg.LogTo("OCR_CLIENT", "Error downloading img url: %v", err)
return OcrResult{}, err
if ocrRequest.ImgBytes == nil {

// if we do not have bytes use base 64 file by converting it to bytes
if ocrRequest.hasBase64() {

logg.LogTo("OCR_CLIENT", "OCR request has base 64 convert it to bytes")

err = ocrRequest.decodeBase64()
if err != nil {
logg.LogTo("OCR_CLIENT", "Error decoding base64: %v", err)
return OcrResult{}, err
}
} else {
// if we do not have base 64 or bytes download the file
err = ocrRequest.downloadImgUrl()
if err != nil {
logg.LogTo("OCR_CLIENT", "Error downloading img url: %v", err)
return OcrResult{}, err
}
}
}

Expand Down
6 changes: 3 additions & 3 deletions tesseract_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,10 @@ func (t TesseractEngineArgs) Export() []string {
func (t TesseractEngine) ProcessRequest(ocrRequest OcrRequest) (OcrResult, error) {

tmpFileName, err := func() (string, error) {
if ocrRequest.ImgUrl != "" {
return t.tmpFileFromImageUrl(ocrRequest.ImgUrl)
} else if ocrRequest.ImgBase64 != "" {
if ocrRequest.ImgBase64 != "" {
return t.tmpFileFromImageBase64(ocrRequest.ImgBase64)
} else if ocrRequest.ImgUrl != "" {
return t.tmpFileFromImageUrl(ocrRequest.ImgUrl)
} else {
return t.tmpFileFromImageBytes(ocrRequest.ImgBytes)
}
Expand Down

0 comments on commit 4fd4a2a

Please sign in to comment.