Skip to content

Commit

Permalink
fix unused import, fix type, make Newline removal configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
Johannes Bareuther committed Jul 31, 2024
1 parent cbf94df commit 2c59b0e
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 5 deletions.
6 changes: 4 additions & 2 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,17 @@ type TesConfig struct {
NatsConnectRetries int `env:"TES_NATS_CONNECT_RETRIES" default:"10"`
// if true, disable HTTP Server in favor of NATS Microservice interface
NoHttp bool `env:"TES_NO_HTTP" default:"false"`
// if true, extracted text will be compacted by replacing newlines with whitespace
RemoveNewlines bool `env:"TES_REMOVE_NEWLINES" default:"true"`
// How many replicas of the bucket to create. Default: 1
Replicas int `env:"TES_REPLICAS" default:"1"`
// HTTP listen address and/or port. Default: ':8080'
SrvAddr string `env:"TES_HOST_PORT" default:":8080"`
}

// NewTesConigFromEnv returns a service config object
// NewTesConfigFromEnv returns a service config object
// populated with defaults and values from environment vars
func NewTesConigFromEnv() TesConfig {
func NewTesConfigFromEnv() TesConfig {
var cfg TesConfig
if err := env.Load(&cfg, nil); err != nil {
logger.Error("Loading config failed", "err", err)
Expand Down
6 changes: 4 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

"github.com/gin-contrib/expvar"
"github.com/gin-gonic/gin"
"github.com/johbar/text-extraction-service/v2/pkg/dehyphenator"
"github.com/johbar/text-extraction-service/v2/pkg/docparser"
sloggin "github.com/samber/slog-gin"
)
Expand All @@ -24,13 +25,14 @@ var (
)

func main() {
tesConfig = NewTesConfigFromEnv()
dehyphenator.RemoveNewlines = tesConfig.RemoveNewlines
args := os.Args
// one shot mode: don't start a server, just process a single file provided on the command line
if len(args) > 1 {
PrintMetadataAndTextToStdout(args[1])
return
}
tesConfig = NewTesConigFromEnv()
closeDocChan = make(chan Document, 100)
saveExtractedDocChan = make(chan *ExtractedDocument, 100)
go saveAndCloseExtracedDocs()
Expand Down Expand Up @@ -70,7 +72,7 @@ func main() {
logger.Info("Service started with no HTTP endpoints. Waiting for interrupt.")
<-wait
}
logger.Info("Using PDF implementation", "lib", pdfImplementation)
logger.Info("PDF implementation", "lib", pdfImplementation)
if !docparser.Initialized {
logger.Warn("wvWare is not in PATH! We will not be able to extract legacy MS Word documents.")
}
Expand Down
1 change: 0 additions & 1 deletion poppler.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ package main
import (
"errors"
"io"
"log/slog"
"strconv"
"strings"
"time"
Expand Down

0 comments on commit 2c59b0e

Please sign in to comment.