From 1e32009f81f4cfd535fce33a9c89cabefbd545d4 Mon Sep 17 00:00:00 2001 From: Mark van der Velden Date: Mon, 29 Aug 2022 10:32:08 +0200 Subject: [PATCH] Cleanup and improving bad mx host check (#17) Reporting on incorrect MX hosts (introducing `misconfigured_mx` in the suggest response). Also: documentation and CI improvements --- .circleci/config.yml | 18 ++++-- README.md | 15 +++-- cmd/eri-cli/README.md | 5 ++ cmd/web/config/config.go | 4 +- cmd/web/erihttp/handlers/compression_test.go | 10 +-- cmd/web/erihttp/types.go | 1 + cmd/web/erihttp/util.go | 3 +- cmd/web/graphql.go | 3 +- cmd/web/handlers.go | 30 +++++---- cmd/web/handlers_test.go | 68 +++++++++++++------- cmd/web/hitlist/hitlist_test.go | 6 +- cmd/web/main.go | 4 +- cmd/web/services/suggest.go | 3 + cmd/web/util.go | 1 - cmd/web/util_test.go | 37 +++++++++++ validator/checks.go | 24 ++----- validator/checks_test.go | 2 +- validator/util.go | 14 +++- validator/util_test.go | 34 ++++++---- validator/validations/flags.go | 27 ++++---- validator/validations/flags_test.go | 1 + validator/validations/validations.go | 4 +- validator/validations/validations_test.go | 40 ++++++------ validator/validator.go | 54 +++++++++------- validator/validator_test.go | 4 +- 25 files changed, 255 insertions(+), 157 deletions(-) create mode 100644 cmd/web/util_test.go diff --git a/.circleci/config.yml b/.circleci/config.yml index 9150185..43e8e2b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -7,23 +7,25 @@ jobs: working_directory: /home/circleci/eri docker: # specify the version - - image: circleci/golang:1.17 + - image: cimg/go:1.19 environment: BINARY_NAME: "eri-linux-amd64" TEST_RESULTS: "/tmp/test-results" + GOFLAGS: "-buildvcs=false -trimpath" steps: - checkout - run: mkdir -p ${TEST_RESULTS} - run: go install github.com/jstemmer/go-junit-report@latest - - run: curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.43.0 + - run: go install github.com/mattn/goveralls@latest + - run: curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.49.0 - run: name: Build command: | TAG=${CIRCLE_TAG:-dev} - CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o "${BINARY_NAME}" -a -ldflags="-w -s -X main.Version=${TAG}" ./cmd/web + GOFLAGS="-buildvcs=false -trimpath" CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o "${BINARY_NAME}" -a -ldflags="-w -s -X main.Version=${TAG}" ./cmd/web - run: # Check if we have updates to minor/patch level packages we're explicitly referencing @@ -40,8 +42,7 @@ jobs: name: Test command: | go test -v ./... | go-junit-report > ${TEST_RESULTS}/report.xml - go test -cover -coverprofile=${TEST_RESULTS}/coverage.txt -covermode=atomic ./... - go test -race ./... + go test -cover -race -covermode=atomic -coverprofile=${TEST_RESULTS}/coverage.txt ./... go tool cover -html=${TEST_RESULTS}/coverage.txt -o ${TEST_RESULTS}/coverage.html - store_test_results: @@ -51,9 +52,9 @@ jobs: path: "/tmp/test-results" - run: - name: Codecov upload + name: Coveralls upload command: | - bash <(curl -s https://codecov.io/bash) -f ${TEST_RESULTS}/coverage.txt + goveralls -coverprofile=${TEST_RESULTS}/coverage.txt -service=circle-ci -repotoken=${COVERALLS_TOKEN} workflows: @@ -61,6 +62,9 @@ workflows: build-test: jobs: - lint-and-test: + context: + - org-global + - "Public repos" filters: branches: only: /.*/ diff --git a/README.md b/README.md index 5ba330d..c18990e 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ # ERI -Email Recipient Inspector is a project for preventing email typos. It's a self-learning service, a library or a command line utility. The services can help your uses to prevent mistakes when entering their email address. The library allows you to incorporate the features in your own business layer and the cli can be used as a convenient way to test domains or e-mail addresses. +Email Recipient Inspector is a project for preventing email typos. It's a self-learning service, a library or a command line utility. The services can help your uses to prevent mistakes when entering their email address. The library allows you to incorporate the features in your own business layer and the cli can be used as a convenient way to test domains or email addresses. # ERI as command line utility ## Installation @@ -93,9 +93,16 @@ The local part (left of the `@`) remains completely untouched. It's simply echoe "alternatives": [ "john.doe@example.org" ], - "malformed_syntax": false + "malformed_syntax": false, + "misconfigured_mx": false } ``` +##### The advisory fields +Please take note: These fields are advisory. Email delivery is still possible (even though unlikely) when these advisory fields are false. For example the recipient "root" on a local system is considered invalid. For web-use, however, It'll be mostly correct. + + - `malformed_syntax` (bool) is an indication of the syntax. The check is fairly liberal. If `true`, chances are pretty good the email will never work.` _Note: this is permanent_. + - `misconfigured_mx` (bool) is an indication of a misconfigured MX. If `true`, it's unlikely that the host can accept email. _Note: this can be temporary!_. + ### /autocomplete The autocomplete endpoint returns a list of domains matching the prefix. To prevent leaking sensitive information, ERI is configured with a threshold to limit exposure of rarely used domains. @@ -171,7 +178,7 @@ For more help, see the package: https://github.com/Dynom/ERI-js # Integration ## Data scrubbing -When integrating ERI in your application, the initial results might be poor. When you change the validation mechanism (to include ERI) your data might still be too "dirty" to work with. After feeding your existing e-mail addresses into ERI you might want to cleanup the data first. The autocomplete endpoint might give odd results (e.g.: hotmail.com.com). Scrubbing this data from ERI's hitlist table and with the new mechanisms in place should prevent those addresses to end up into your backend in the future, but without the scrubbing you'll stay in a less-than-ideal situation. +When integrating ERI in your application, the initial results might be poor. When you change the validation mechanism (to include ERI) your data might still be too "dirty" to work with. After feeding your existing email addresses into ERI you might want to cleanup the data first. The autocomplete endpoint might give odd results (e.g.: hotmail.com.com). Scrubbing this data from ERI's hitlist table and with the new mechanisms in place should prevent those addresses to end up into your backend in the future, but without the scrubbing you'll stay in a less-than-ideal situation. ## To proxy or to expose directly While ERI is designed to be exposed publicly, you might have different ideas about how to protect your backend services. Adding a proxy is a good alternative, and it allows you to fine-tune the rate-limiter to that specific use-case. @@ -201,7 +208,7 @@ Mailcheck works completely in JavaScript, with the option to use only known TLDs # Email delivery nuances -Ever since the first e-mail got sent in 1971 a lot has happened with electronic mail. In modern days email is seen as "the" way to identify and communicate with people online. Because of this, many people will easily give away their email addresses and people receive many, many emails. It's hard to read it all, not even counting the spam. Looking specifically at my own behaviour, I don't even open email unless I think it's important, just by scanning the sender and the subject of the email. +Ever since the first email got sent in 1971 a lot has happened with electronic mail. In modern days email is seen as "the" way to identify and communicate with people online. Because of this, many people will easily give away their email addresses and people receive many, many emails. It's hard to read it all, not even counting the spam. Looking specifically at my own behaviour, I don't even open email unless I think it's important, just by scanning the sender and the subject of the email. With this in mind, even with a perfect validator, and a brilliantly composed and relevant email, it's still possible your email won't be read. ERI is designed to help out the user willing to trust you with their email address. ERI is not designed as a marketing tool to help optimise email delivery. diff --git a/cmd/eri-cli/README.md b/cmd/eri-cli/README.md index 19b4124..603b30c 100644 --- a/cmd/eri-cli/README.md +++ b/cmd/eri-cli/README.md @@ -94,3 +94,8 @@ bzcat emails.bz2 | \ jq .email | \ xargs ./updateStatus.sh ``` + +Using Shell process substitution +```bash +eri-cli check --input-is-email < <( echo "john@example.org" ) | jq .valid +``` \ No newline at end of file diff --git a/cmd/web/config/config.go b/cmd/web/config/config.go index a1d130d..bcf1023 100644 --- a/cmd/web/config/config.go +++ b/cmd/web/config/config.go @@ -4,7 +4,7 @@ import ( "encoding" "errors" "fmt" - "io/ioutil" + "os" "strings" "time" @@ -26,7 +26,7 @@ func NewConfig(fileName string) (Config, error) { // Not reading a config file on startup, might not show any feedback c.Log.Level = logrus.TraceLevel.String() - b, err := ioutil.ReadFile(fileName) + b, err := os.ReadFile(fileName) if err != nil { return c, fmt.Errorf("unable to open %q, reason: %w", fileName, err) } diff --git a/cmd/web/erihttp/handlers/compression_test.go b/cmd/web/erihttp/handlers/compression_test.go index 206ed4b..5912302 100644 --- a/cmd/web/erihttp/handlers/compression_test.go +++ b/cmd/web/erihttp/handlers/compression_test.go @@ -1,7 +1,7 @@ package handlers import ( - "io/ioutil" + "io" "net/http" "net/http/httptest" "strings" @@ -44,9 +44,9 @@ func TestWithGzipHandler(t *testing.T) { mux := http.NewServeMux() mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { - b, err := ioutil.ReadAll(r.Body) + b, err := io.ReadAll(r.Body) if err != nil { - t.Errorf("ioutil.ReadAll(r.Body) Setting up the test failed %s", err) + t.Errorf("io.ReadAll(r.Body) Setting up the test failed %s", err) t.FailNow() } @@ -88,9 +88,9 @@ func TestWithGzipHandler(t *testing.T) { } defer res.Body.Close() - b, err := ioutil.ReadAll(res.Body) + b, err := io.ReadAll(res.Body) if err != nil { - t.Errorf("ioutil.ReadAll(res.Body) Setting up the test failed %s", err) + t.Errorf("io.ReadAll(res.Body) Setting up the test failed %s", err) t.FailNow() } diff --git a/cmd/web/erihttp/types.go b/cmd/web/erihttp/types.go index b2bda74..9f17ca5 100644 --- a/cmd/web/erihttp/types.go +++ b/cmd/web/erihttp/types.go @@ -33,6 +33,7 @@ func (r *AutoCompleteResponse) PrepareResponse() { type SuggestResponse struct { Alternatives []string `json:"alternatives"` MalformedSyntax bool `json:"malformed_syntax"` + MisconfiguredMX bool `json:"misconfigured_mx"` Error string `json:"error,omitempty"` } diff --git a/cmd/web/erihttp/util.go b/cmd/web/erihttp/util.go index 10c72b2..2391d0b 100644 --- a/cmd/web/erihttp/util.go +++ b/cmd/web/erihttp/util.go @@ -3,7 +3,6 @@ package erihttp import ( "fmt" "io" - "io/ioutil" "net/http" ) @@ -37,7 +36,7 @@ func GetBodyFromHTTPRequest(r *http.Request, maxBodySize int64) ([]byte, error) return empty, fmt.Errorf("%w %q", ErrUnsupportedContentType, ct) } - b, err := ioutil.ReadAll(io.LimitReader(r.Body, maxBodySize+1)) + b, err := io.ReadAll(io.LimitReader(r.Body, maxBodySize+1)) if err != nil { return empty, ErrInvalidRequest } diff --git a/cmd/web/graphql.go b/cmd/web/graphql.go index 358a040..87ba3f6 100644 --- a/cmd/web/graphql.go +++ b/cmd/web/graphql.go @@ -69,7 +69,8 @@ func NewGraphQLSchema(conf config.Config, suggestSvc *services.SuggestSvc, autoc return erihttp.SuggestResponse{ Alternatives: result.Alternatives, - MalformedSyntax: sugErr == validator.ErrEmailAddressSyntax, + MalformedSyntax: errors.Is(sugErr, validator.ErrEmailAddressSyntax), + MisconfiguredMX: !result.HasValidMX, }, err }, Description: "Get suggestions", diff --git a/cmd/web/handlers.go b/cmd/web/handlers.go index 804f354..bb24fa9 100644 --- a/cmd/web/handlers.go +++ b/cmd/web/handlers.go @@ -23,7 +23,12 @@ const ( failedResponseError = "Generating response failed." ) -func NewAutoCompleteHandler(logger logrus.FieldLogger, svc *services.AutocompleteSvc, maxSuggestions uint64, maxBodySize uint64) http.HandlerFunc { +type marshalFn func(v interface{}) ([]byte, error) + +func NewAutoCompleteHandler(logger logrus.FieldLogger, svc *services.AutocompleteSvc, maxSuggestions uint64, maxBodySize uint64, jsonMarshaller marshalFn) http.HandlerFunc { + if jsonMarshaller == nil { + jsonMarshaller = json.Marshal + } logger = logger.WithField("handler", "auto complete") return func(w http.ResponseWriter, r *http.Request) { @@ -81,7 +86,7 @@ func NewAutoCompleteHandler(logger logrus.FieldLogger, svc *services.Autocomplet return } - response, err := json.Marshal(erihttp.AutoCompleteResponse{ + response, err := jsonMarshaller(erihttp.AutoCompleteResponse{ Suggestions: result.Suggestions, }) @@ -107,8 +112,12 @@ func NewAutoCompleteHandler(logger logrus.FieldLogger, svc *services.Autocomplet } } -// NewSuggestHandler constructs a HTTP handler that deals with suggestion requests -func NewSuggestHandler(logger logrus.FieldLogger, svc *services.SuggestSvc, maxBodySize uint64) http.HandlerFunc { +// NewSuggestHandler constructs an HTTP handler that deals with suggestion requests +func NewSuggestHandler(logger logrus.FieldLogger, svc *services.SuggestSvc, maxBodySize uint64, jsonMarshaller marshalFn) http.HandlerFunc { + if jsonMarshaller == nil { + jsonMarshaller = json.Marshal + } + log := logger.WithField("handler", "suggest") return func(w http.ResponseWriter, r *http.Request) { var err error @@ -136,18 +145,15 @@ func NewSuggestHandler(logger logrus.FieldLogger, svc *services.SuggestSvc, maxB } var alts = []string{req.Email} - var sugErr error - { - var result services.SuggestResult - result, sugErr = svc.Suggest(r.Context(), req.Email) - if len(result.Alternatives) > 0 { - alts = append(alts[0:0], result.Alternatives...) - } + result, sugErr := svc.Suggest(r.Context(), req.Email) + if len(result.Alternatives) > 0 { + alts = append(alts[0:0], result.Alternatives...) } sr := erihttp.SuggestResponse{ Alternatives: alts, MalformedSyntax: errors.Is(sugErr, validator.ErrEmailAddressSyntax), + MisconfiguredMX: !result.HasValidMX, } if sugErr != nil { @@ -159,7 +165,7 @@ func NewSuggestHandler(logger logrus.FieldLogger, svc *services.SuggestSvc, maxB sr.Error = sugErr.Error() } - response, err := json.Marshal(sr) + response, err := jsonMarshaller(sr) if err != nil { log.WithFields(logrus.Fields{ diff --git a/cmd/web/handlers_test.go b/cmd/web/handlers_test.go index 03b6f2f..490b237 100644 --- a/cmd/web/handlers_test.go +++ b/cmd/web/handlers_test.go @@ -4,8 +4,8 @@ import ( "bytes" "context" "encoding/json" + "fmt" "io" - "io/ioutil" "net/http" "net/http/httptest" "strings" @@ -78,13 +78,14 @@ func TestNewAutoCompleteHandler(t *testing.T) { requestBody io.Reader ctx context.Context want wants + marshaller marshalFn }{ { name: "correct POST body", requestBody: bytes.NewReader(validRequestBody), ctx: context.Background(), want: wants{ - statusCode: 200, + statusCode: http.StatusOK, }, }, { @@ -92,7 +93,7 @@ func TestNewAutoCompleteHandler(t *testing.T) { requestBody: strings.NewReader("burp"), ctx: context.Background(), want: wants{ - statusCode: 400, + statusCode: http.StatusBadRequest, }, }, { @@ -100,7 +101,7 @@ func TestNewAutoCompleteHandler(t *testing.T) { requestBody: nil, ctx: context.Background(), want: wants{ - statusCode: 400, + statusCode: http.StatusBadRequest, }, }, { @@ -108,7 +109,7 @@ func TestNewAutoCompleteHandler(t *testing.T) { requestBody: strings.NewReader(strings.Repeat(".", int(maxBodySize)+1)), ctx: context.Background(), want: wants{ - statusCode: 400, + statusCode: http.StatusBadRequest, }, }, { @@ -116,7 +117,7 @@ func TestNewAutoCompleteHandler(t *testing.T) { requestBody: bytes.NewReader(validRequestBody[0 : len(validRequestBody)-1]), // stripping off the '}' ctx: context.Background(), want: wants{ - statusCode: 400, + statusCode: http.StatusBadRequest, }, }, { @@ -124,7 +125,7 @@ func TestNewAutoCompleteHandler(t *testing.T) { requestBody: bytes.NewReader(emptyArgumentValidStructureRequestBody), ctx: context.Background(), want: wants{ - statusCode: 400, + statusCode: http.StatusBadRequest, }, }, { @@ -132,7 +133,7 @@ func TestNewAutoCompleteHandler(t *testing.T) { requestBody: bytes.NewReader(tooLongArgumentValidStructureRequestBody), ctx: context.Background(), want: wants{ - statusCode: 400, + statusCode: http.StatusBadRequest, }, }, { @@ -140,7 +141,18 @@ func TestNewAutoCompleteHandler(t *testing.T) { requestBody: bytes.NewReader(validRequestBody), ctx: expiredContext, want: wants{ - statusCode: 200, + statusCode: http.StatusOK, + }, + }, + { + name: "Unable to marshal response", + requestBody: bytes.NewReader(validRequestBody), + ctx: context.Background(), + marshaller: func(v interface{}) ([]byte, error) { + return nil, fmt.Errorf("test failure") + }, + want: wants{ + statusCode: http.StatusInternalServerError, }, }, } @@ -151,7 +163,7 @@ func TestNewAutoCompleteHandler(t *testing.T) { t.Run(tt.name, func(t *testing.T) { hook.Reset() - handlerFunc := NewAutoCompleteHandler(logger, svc, 10, maxBodySize) + handlerFunc := NewAutoCompleteHandler(logger, svc, 10, maxBodySize, tt.marshaller) rec := httptest.NewRecorder() req := httptest.NewRequest(http.MethodPost, "/", tt.requestBody) @@ -163,7 +175,7 @@ func TestNewAutoCompleteHandler(t *testing.T) { if tt.want.statusCode != rec.Code { t.Errorf("NewAutoCompleteHandler() = %+v, want %+v", rec, tt.want) - b, _ := ioutil.ReadAll(rec.Result().Body) + b, _ := io.ReadAll(rec.Result().Body) t.Logf("Body: %s", b) for _, l := range hook.AllEntries() { t.Logf("Logs: %s", l.Message) @@ -238,6 +250,7 @@ func TestNewSuggestHandler(t *testing.T) { name string requestBody io.Reader ctx context.Context + marshaller marshalFn want wants }{ { @@ -245,7 +258,7 @@ func TestNewSuggestHandler(t *testing.T) { requestBody: bytes.NewReader(validRequestBody), ctx: context.Background(), want: wants{ - statusCode: 200, + statusCode: http.StatusOK, }, }, { @@ -253,7 +266,7 @@ func TestNewSuggestHandler(t *testing.T) { requestBody: strings.NewReader("burp"), ctx: context.Background(), want: wants{ - statusCode: 400, + statusCode: http.StatusBadRequest, }, }, { @@ -261,7 +274,7 @@ func TestNewSuggestHandler(t *testing.T) { requestBody: nil, ctx: context.Background(), want: wants{ - statusCode: 400, + statusCode: http.StatusBadRequest, }, }, { @@ -269,7 +282,7 @@ func TestNewSuggestHandler(t *testing.T) { requestBody: strings.NewReader(strings.Repeat(".", int(maxBodySize)+1)), ctx: context.Background(), want: wants{ - statusCode: 400, + statusCode: http.StatusBadRequest, }, }, { @@ -277,7 +290,7 @@ func TestNewSuggestHandler(t *testing.T) { requestBody: bytes.NewReader(validRequestBody[0 : len(validRequestBody)-1]), // stripping off the '}' ctx: context.Background(), want: wants{ - statusCode: 400, + statusCode: http.StatusBadRequest, }, }, { @@ -285,7 +298,7 @@ func TestNewSuggestHandler(t *testing.T) { requestBody: bytes.NewReader(emptyArgumentValidStructureRequestBody), ctx: context.Background(), want: wants{ - statusCode: 200, + statusCode: http.StatusOK, }, }, { @@ -293,7 +306,18 @@ func TestNewSuggestHandler(t *testing.T) { requestBody: bytes.NewReader(validRequestBody), ctx: expiredContext, want: wants{ - statusCode: 200, + statusCode: http.StatusOK, + }, + }, + { + name: "Unable to marshal response", + requestBody: bytes.NewReader(validRequestBody), + ctx: context.Background(), + marshaller: func(v interface{}) ([]byte, error) { + return nil, fmt.Errorf("test failure") + }, + want: wants{ + statusCode: http.StatusInternalServerError, }, }, } @@ -311,7 +335,7 @@ func TestNewSuggestHandler(t *testing.T) { t.Run(tt.name, func(t *testing.T) { hook.Reset() - handlerFunc := NewSuggestHandler(logger, svc, maxBodySize) + handlerFunc := NewSuggestHandler(logger, svc, maxBodySize, tt.marshaller) rec := httptest.NewRecorder() req := httptest.NewRequest(http.MethodPost, "/", tt.requestBody) @@ -323,7 +347,7 @@ func TestNewSuggestHandler(t *testing.T) { if tt.want.statusCode != rec.Code { t.Errorf("NewSuggestHandler() = %+v, want %+v", rec, tt.want) - b, _ := ioutil.ReadAll(rec.Result().Body) + b, _ := io.ReadAll(rec.Result().Body) t.Logf("Body: %s", b) for _, l := range hook.AllEntries() { t.Logf("Logs: %s", l.Message) @@ -359,7 +383,7 @@ func TestNewSuggestHandler(t *testing.T) { // Building the service svc := services.NewSuggestService(myFinder, val, nil, logger) - handlerFunc := NewSuggestHandler(logger, svc, maxBodySize) + handlerFunc := NewSuggestHandler(logger, svc, maxBodySize, nil) // Setting up the request req := httptest.NewRequest(http.MethodPost, "/", createSuggestRequestBytesReader(t, "nonexisting@exampleorg")) @@ -384,7 +408,7 @@ func TestNewSuggestHandler(t *testing.T) { } func restoreSuggestResponse(t *testing.T, r io.Reader) erihttp.SuggestResponse { - responseStr, err := ioutil.ReadAll(r) + responseStr, err := io.ReadAll(r) if err != nil { t.Fatal(err) } diff --git a/cmd/web/hitlist/hitlist_test.go b/cmd/web/hitlist/hitlist_test.go index 4ef275d..fde09ad 100644 --- a/cmd/web/hitlist/hitlist_test.go +++ b/cmd/web/hitlist/hitlist_test.go @@ -14,7 +14,7 @@ import ( func Test_getValidDomains(t *testing.T) { validDuration := time.Now().Add(1 * time.Hour) - validFlags := validations.FValid | validations.FSyntax | validations.FMXLookup | validations.FDomainHasIP + validFlags := validations.FValid | validations.FSyntax | validations.FMXLookup | validations.FMXDomainHasIP validVR := validator.Result{ Validations: validations.Validations(validFlags), Steps: validations.Steps(validFlags), @@ -521,12 +521,12 @@ func TestHitList_AddDomain(t *testing.T) { args: args{ d: "example.org", vr: validator.Result{ - Validations: validations.Validations(validations.FValid | validations.FSyntax | validations.FMXLookup | validations.FDomainHasIP), + Validations: validations.Validations(validations.FValid | validations.FSyntax | validations.FMXLookup | validations.FMXDomainHasIP), }, }, wantErr: false, wantVR: validator.Result{ - Validations: validations.Validations(validations.FValid | validations.FSyntax | validations.FMXLookup | validations.FDomainHasIP), + Validations: validations.Validations(validations.FValid | validations.FSyntax | validations.FMXLookup | validations.FMXDomainHasIP), }, }, { diff --git a/cmd/web/main.go b/cmd/web/main.go index ed0bb84..4023885 100644 --- a/cmd/web/main.go +++ b/cmd/web/main.go @@ -140,8 +140,8 @@ func main() { registerProfileHandler(mux, conf) registerHealthHandler(mux, logger) - mux.HandleFunc("/suggest", NewSuggestHandler(logger, suggestSvc, conf.Server.MaxRequestSize)) - mux.HandleFunc("/autocomplete", NewAutoCompleteHandler(logger, autocompleteSvc, conf.Services.Autocomplete.MaxSuggestions, conf.Server.MaxRequestSize)) + mux.HandleFunc("/suggest", NewSuggestHandler(logger, suggestSvc, conf.Server.MaxRequestSize, nil)) + mux.HandleFunc("/autocomplete", NewAutoCompleteHandler(logger, autocompleteSvc, conf.Services.Autocomplete.MaxSuggestions, conf.Server.MaxRequestSize, nil)) schema, err := NewGraphQLSchema(conf, suggestSvc, autocompleteSvc) if err != nil { diff --git a/cmd/web/services/suggest.go b/cmd/web/services/suggest.go index 23a3e04..a694930 100644 --- a/cmd/web/services/suggest.go +++ b/cmd/web/services/suggest.go @@ -6,6 +6,7 @@ import ( "github.com/Dynom/ERI/cmd/web/erihttp/handlers" "github.com/Dynom/ERI/cmd/web/preferrer" + "github.com/Dynom/ERI/validator/validations" "github.com/Dynom/ERI/validator" @@ -37,6 +38,7 @@ type SuggestSvc struct { type SuggestResult struct { Alternatives []string + HasValidMX bool } // @todo make this configurable and Algorithm dependent @@ -100,6 +102,7 @@ func (c *SuggestSvc) Suggest(ctx context.Context, email string) (SuggestResult, } } + sr.HasValidMX = vr.Validations.HasFlag(validations.FMXDomainHasIP | validations.FMXLookup) sr.Alternatives = alts return sr, err diff --git a/cmd/web/util.go b/cmd/web/util.go index 91c0087..57b581b 100644 --- a/cmd/web/util.go +++ b/cmd/web/util.go @@ -328,5 +328,4 @@ func writeErrorJSONResponse(logger logrus.FieldLogger, w http.ResponseWriter, re "bytes_written": c, }).Error("Failed to write response") } - } diff --git a/cmd/web/util_test.go b/cmd/web/util_test.go new file mode 100644 index 0000000..f1ee320 --- /dev/null +++ b/cmd/web/util_test.go @@ -0,0 +1,37 @@ +package main + +import ( + "fmt" + "net/http" + "net/http/httptest" + "testing" + + "github.com/Dynom/ERI/cmd/web/erihttp" + testLog "github.com/sirupsen/logrus/hooks/test" +) + +func Test_writeErrorJSONResponse(t *testing.T) { + t.Run("unable to write", func(t *testing.T) { + logger, hook := testLog.NewNullLogger() + writer := &brokenResponseWriter{ResponseWriter: httptest.NewRecorder()} + writer.writeErr = fmt.Errorf("boop") + writeErrorJSONResponse(logger, writer, &erihttp.AutoCompleteResponse{}) + + if hook.LastEntry().Message != "Failed to write response" { + t.Errorf("Expected an error log") + } + }) +} + +type brokenResponseWriter struct { + http.ResponseWriter + writeErr error +} + +func (b *brokenResponseWriter) Write(bytes []byte) (int, error) { + if b.writeErr == nil { + return b.ResponseWriter.Write(bytes) + } + + return len(bytes), b.writeErr +} diff --git a/validator/checks.go b/validator/checks.go index ee44898..8d26c56 100644 --- a/validator/checks.go +++ b/validator/checks.go @@ -2,7 +2,6 @@ package validator import ( "fmt" - "net" "net/mail" "net/smtp" "time" @@ -94,11 +93,6 @@ func checkIfDomainHasMX(a *Artifact) error { a.Timings.Add("checkIfDomainHasMX", time.Since(start)) if err != nil { - if e, ok := err.(net.Error); ok && e.Temporary() { - _ = e - // @todo what do we do when it's a temporary error? - } - return ValidationError{ Validator: "checkIfDomainHasMX", Internal: err, @@ -113,8 +107,8 @@ func checkIfDomainHasMX(a *Artifact) error { // checkIfMXHasIP performs a NS lookup and fetches the IP addresses of the MX hosts func checkIfMXHasIP(a *Artifact) error { - if a.Steps.HasFlag(validations.FDomainHasIP) { - if !a.Validations.HasFlag(validations.FDomainHasIP) { + if a.Steps.HasFlag(validations.FMXDomainHasIP) { + if !a.Validations.HasFlag(validations.FMXDomainHasIP) { return ValidationError{ Validator: "checkIfMXHasIP", error: ErrEmailAddressSyntax, @@ -124,7 +118,7 @@ func checkIfMXHasIP(a *Artifact) error { return nil } - a.Steps.SetFlag(validations.FDomainHasIP) + a.Steps.SetFlag(validations.FMXDomainHasIP) var err error for i, domain := range a.mx { @@ -135,11 +129,6 @@ func checkIfMXHasIP(a *Artifact) error { if innerErr != nil || len(ips) == 0 { a.mx[i] = "" - if e, ok := err.(net.Error); ok && e.Temporary() { - // @todo what do we do when it's a temporary error? - _ = e - } - if innerErr != nil { err = wrapError(err, innerErr) } @@ -154,7 +143,7 @@ func checkIfMXHasIP(a *Artifact) error { } } - a.Validations.SetFlag(validations.FDomainHasIP) + a.Validations.SetFlag(validations.FMXDomainHasIP) return nil } @@ -186,11 +175,6 @@ func checkMXAcceptsConnect(a *Artifact) error { conn, err := getConnection(a.ctx, a.dialer, mxToCheck) a.Timings.Add("checkMXAcceptsConnect", time.Since(start)) - if e, ok := err.(net.Error); ok && e.Temporary() { - // @todo what do we do when it's a temporary error? - _ = e - } - if err != nil { return ValidationError{ Validator: "checkMXAcceptsConnect", diff --git a/validator/checks_test.go b/validator/checks_test.go index 82f84e6..d325fe1 100644 --- a/validator/checks_test.go +++ b/validator/checks_test.go @@ -163,7 +163,7 @@ func Test_looksLikeValidLocalPart(t *testing.T) { {local: "john doe"}, {local: "john\ndoe"}, {local: "john.doe\n"}, - {local: "john.doe\u00A0"}, + {local: "john.doe\u00A0"}, // nbsp // Common copy&paste mistakes {local: "\u2018john"}, // A common quotation mark diff --git a/validator/util.go b/validator/util.go index 88cfefe..de85a5e 100644 --- a/validator/util.go +++ b/validator/util.go @@ -84,6 +84,12 @@ func getConnection(ctx context.Context, dialer DialContext, mxHost string) (net. } } + // When dialing fails, we could end up with a nil connection, which indicates that all options have been exhausted + // and that we've ended up with no reachable hosts + if conn == nil { + return conn, fmt.Errorf("no connection possible %w", ErrInvalidHost) + } + return conn, err } @@ -121,7 +127,8 @@ func fetchMXHosts(ctx context.Context, resolver LookupMX, domain string) ([]stri } // MightBeAHostOrIP is a very rudimentary check to see if the argument could be either a host name or IP address -// It aims on speed and not for correctness. It's intended to weed-out bogus responses such as '.' +// It aims at speed and not correctness. It's intended to weed-out bogus responses such as '.' +// //nolint:gocyclo func MightBeAHostOrIP(h string) bool { @@ -153,8 +160,9 @@ func MightBeAHostOrIP(h string) bool { } // Note: These explicitly exclude several, otherwise legal, characters (such as: 0x00A0). -// NBSP is a frequently occurring erroneous character in e-mail addresses (possibly introduced by a copy & paste -// from rich formatted documents) and not expected to be desired. +// +// NBSP is a frequently occurring erroneous character in e-mail addresses (possibly introduced by a copy & paste +// from rich formatted documents) and not expected to be desired. var ( reLocal = regexp.MustCompile(`(?i)\A(?:(?:[\p{L}\p{N}]|[!#$%&'*+\-/=?^_\x60{|}~]|[\x{00C0}-\x{1FFF}\x{2070}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}])+(?:\.(?:[\p{L}\p{N}]|[!#$%&'*+\-/=?^_\x60{|}~]|[\x{00A1}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}])+)*)\z`) reDomain = regexp.MustCompile(`(?i)\A(?:[\p{L}\p{N}\x{00A1}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}](?:[\p{L}\p{N}\x{00A1}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}-]*[\p{L}\p{N}\x{00A1}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}])?\.)+[\p{L}\p{N}\x{00A1}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}](?:[\p{L}\p{N}\x{00A1}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}-]*[\p{L}\p{N}\x{00A1}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}])?\z`) diff --git a/validator/util_test.go b/validator/util_test.go index 15ae120..3ae371e 100644 --- a/validator/util_test.go +++ b/validator/util_test.go @@ -11,13 +11,21 @@ import ( "github.com/Dynom/ERI/types" ) +func newStubDialer(err error) *stubDialer { + return &stubDialer{ + conn: &net.IPConn{}, + err: err, + } +} + type stubDialer struct { - err error + err error + conn net.Conn } func (sd stubDialer) DialContext(ctx context.Context, network, address string) (net.Conn, error) { // We don't use net.Conn, so returning nil for the interface here is safe - return nil, sd.err + return sd.conn, sd.err } type stubResolver struct { @@ -92,37 +100,37 @@ func Test_getConnection(t *testing.T) { // @todo type args struct { - err error + err error + conn net.Conn } + defaultConn := &net.IPConn{} + tests := []struct { name string args args - want net.Conn wantErr bool }{ // The good - {name: "happy flow", args: args{err: nil}}, - {name: "expected error", args: args{err: errors.New("connection refused")}}, + {name: "happy flow", args: args{err: nil, conn: defaultConn}}, + {name: "expected error", args: args{err: errors.New("connection refused"), conn: defaultConn}}, // The bad - {wantErr: true, name: "unexpected error", args: args{err: errors.New("b0rk")}}, + {wantErr: true, name: "unexpected error", args: args{err: errors.New("b0rk"), conn: defaultConn}}, + {wantErr: true, name: "no conn", args: args{err: nil, conn: nil}}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() - dialer := stubDialer{} - dialer.err = tt.args.err + dialer := newStubDialer(tt.args.err) + dialer.conn = tt.args.conn - got, err := getConnection(ctx, dialer, "mx.example.org") + _, err := getConnection(ctx, dialer, "mx.example.org") if (err != nil) != tt.wantErr { t.Errorf("getConnection() error = %v, wantErr %v", err, tt.wantErr) return } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("getConnection() got = %v, want %v", got, tt.want) - } }) } } diff --git a/validator/validations/flags.go b/validator/validations/flags.go index 7f31859..cedbb1d 100644 --- a/validator/validations/flags.go +++ b/validator/validations/flags.go @@ -2,22 +2,25 @@ package validations import "strings" +// Validation Flags, these flags represent successful validation steps. Depending on how far you want to go, you can +// classify a validation as valid enough, for your use-case. const ( - // Validation Flags, these flags represent successful validation steps. Depending on how far you want to go, you can - // classify a validation as valid enough, for your use-case. - FValid Flag = 1 << iota - FSyntax Flag = 1 << iota - FMXLookup Flag = 1 << iota - FDomainHasIP Flag = 1 << iota - FHostConnect Flag = 1 << iota - FValidRCPT Flag = 1 << iota - FDisposable Flag = 1 << iota // Address / Domain is considered a disposable e-mail trap + FValid Flag = 1 << iota + FSyntax Flag = 1 << iota + FMXLookup Flag = 1 << iota + FMXDomainHasIP Flag = 1 << iota // Flag set when the MX domain is verified to have at least one resolvable IP + FHostConnect Flag = 1 << iota + FValidRCPT Flag = 1 << iota + FDisposable Flag = 1 << iota // Address / Domain is considered a disposable e-mail trap + + // FDomainHasIP is Deprecated: Unclear naming. Prefer FMXDomainHasIP + FDomainHasIP = FMXDomainHasIP // @deprecated ) type Flag uint8 func (f Flag) AsStringSlice() []string { - var flags = []Flag{FValid, FSyntax, FMXLookup, FDomainHasIP, FHostConnect, FValidRCPT, FDomainHasIP} + var flags = []Flag{FValid, FSyntax, FMXLookup, FMXDomainHasIP, FHostConnect, FValidRCPT, FDisposable} var r = make([]string, 0, len(flags)) for _, flag := range flags { @@ -50,8 +53,8 @@ func toString(f Flag) string { return "syntax" case FMXLookup: return "lookup" - case FDomainHasIP: - return "domainHasIP" + case FMXDomainHasIP: + return "mxDomainHasIP" case FHostConnect: return "hostConnect" case FValidRCPT: diff --git a/validator/validations/flags_test.go b/validator/validations/flags_test.go index 47d85d9..c814c89 100644 --- a/validator/validations/flags_test.go +++ b/validator/validations/flags_test.go @@ -16,6 +16,7 @@ func TestFlag_String(t *testing.T) { } for _, tt := range tests { + tt := tt t.Run(tt.name, func(t *testing.T) { if got := tt.f.String(); got != tt.want { t.Errorf("String() = %v, want %v", got, tt.want) diff --git a/validator/validations/validations.go b/validator/validations/validations.go index bb70c17..4a38c02 100644 --- a/validator/validations/validations.go +++ b/validator/validations/validations.go @@ -50,7 +50,7 @@ func (v Validations) HasFlag(f Flag) bool { return v&Validations(f) != 0 } -// isValidationsForValidDomain checks if a mask of validations really marks a domain as valid. +// IsValidationsForValidDomain checks if a mask of validations really marks a domain as valid. func (v Validations) IsValidationsForValidDomain() bool { - return (v.HasFlag(FMXLookup) || v.HasFlag(FDomainHasIP) || v.HasFlag(FHostConnect)) && v.HasFlag(FSyntax) + return (v.HasFlag(FMXLookup) || v.HasFlag(FMXDomainHasIP) || v.HasFlag(FHostConnect)) && v.HasFlag(FSyntax) } diff --git a/validator/validations/validations_test.go b/validator/validations/validations_test.go index b337e31..4077941 100644 --- a/validator/validations/validations_test.go +++ b/validator/validations/validations_test.go @@ -14,11 +14,11 @@ func TestValidations_HasFlag(t *testing.T) { want bool }{ {want: true, name: "has flag", v: FValid, tf: FValid}, - {want: true, name: "has flag (multiple)", v: FValid | FDomainHasIP, tf: FValid}, - {want: true, name: "has flag (multiple)", v: FSyntax | FDomainHasIP, tf: FDomainHasIP}, + {want: true, name: "has flag (multiple)", v: FValid | FMXDomainHasIP, tf: FValid}, + {want: true, name: "has flag (multiple)", v: FSyntax | FMXDomainHasIP, tf: FMXDomainHasIP}, {name: "doesn't have flag", v: 0, tf: FValid}, - {name: "doesn't have flag", v: FDomainHasIP, tf: FValid}, + {name: "doesn't have flag", v: FMXDomainHasIP, tf: FValid}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -38,7 +38,7 @@ func TestValidations_IsValid(t *testing.T) { }{ {want: true, name: "mega valid", v: FValid}, {name: "default value", v: 0}, - {name: "some flags", v: FSyntax | FDomainHasIP}, + {name: "some flags", v: FSyntax | FMXDomainHasIP}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -56,8 +56,8 @@ func TestValidations_IsValidationsForValidDomain(t *testing.T) { v Flag want bool }{ - {want: true, name: "All domain flags", v: FSyntax | FHostConnect | FMXLookup | FDomainHasIP}, - {want: true, name: "Domain has IP", v: FSyntax | FDomainHasIP}, + {want: true, name: "All domain flags", v: FSyntax | FHostConnect | FMXLookup | FMXDomainHasIP}, + {want: true, name: "Domain has IP", v: FSyntax | FMXDomainHasIP}, {want: true, name: "Domain accepted connections", v: FSyntax | FHostConnect}, {want: true, name: "DNS lookup showed domain has MX records", v: FSyntax | FMXLookup}, @@ -332,20 +332,20 @@ func BenchmarkTypeMemoryUsageInt64(b *testing.B) { } func ExampleMaskTest() { - fmt.Printf("FValid %08b %d\n", FValid, FValid) - fmt.Printf("FSyntax %08b %d\n", FSyntax, FSyntax) - fmt.Printf("FMXLookup %08b %d\n", FMXLookup, FMXLookup) - fmt.Printf("FDomainHasIP %08b %d\n", FDomainHasIP, FDomainHasIP) - fmt.Printf("FHostConnect %08b %d\n", FHostConnect, FHostConnect) - fmt.Printf("FValidRCPT %08b %d\n", FValidRCPT, FValidRCPT) - fmt.Printf("FDisposable %08b %d\n", FDisposable, FDisposable) + fmt.Printf("FValid %08b %d\n", FValid, FValid) + fmt.Printf("FSyntax %08b %d\n", FSyntax, FSyntax) + fmt.Printf("FMXLookup %08b %d\n", FMXLookup, FMXLookup) + fmt.Printf("FMXDomainHasIP %08b %d\n", FMXDomainHasIP, FMXDomainHasIP) + fmt.Printf("FHostConnect %08b %d\n", FHostConnect, FHostConnect) + fmt.Printf("FValidRCPT %08b %d\n", FValidRCPT, FValidRCPT) + fmt.Printf("FDisposable %08b %d\n", FDisposable, FDisposable) // Output: - // FValid 00000001 1 - // FSyntax 00000010 2 - // FMXLookup 00000100 4 - // FDomainHasIP 00001000 8 - // FHostConnect 00010000 16 - // FValidRCPT 00100000 32 - // FDisposable 01000000 64 + // FValid 00000001 1 + // FSyntax 00000010 2 + // FMXLookup 00000100 4 + // FMXDomainHasIP 00001000 8 + // FHostConnect 00010000 16 + // FValidRCPT 00100000 32 + // FDisposable 01000000 64 } diff --git a/validator/validator.go b/validator/validator.go index bda86b6..49372ec 100644 --- a/validator/validator.go +++ b/validator/validator.go @@ -35,23 +35,34 @@ func prependOptions(options []ArtifactFn, o ...ArtifactFn) []ArtifactFn { return append(o, options...) } -// CheckWithConnect performs a thorough check, which has the least chance of false-positives. It requires a valid PTR -// and is probably not something you want to offer as a user-facing service. -func (v *EmailValidator) CheckWithConnect(ctx context.Context, emailParts types.EmailParts, options ...ArtifactFn) Result { +// CheckWithRCPT performs a thorough check, which includes a RCPT check. It requires a valid PTR and is probably not +// something you want to offer as a user-facing service. +// +// Warning: Using this _can_ degrade your IPs reputation, since it's also a process spammers use. +func (v *EmailValidator) CheckWithRCPT(ctx context.Context, emailParts types.EmailParts, options ...ArtifactFn) Result { + artifact, _ := validateSequence(ctx, + getNewArtifact(ctx, emailParts, prependOptions(options, WithDialer(v.dialer), WithDeadlineCTX(ctx))...), + []stateFn{ + getSyntaxCheck(emailParts), + checkIfDomainHasMX, + checkIfMXHasIP, + checkMXAcceptsConnect, + checkRCPT, + }) - var syntaxCheck stateFn = checkEmailAddressSyntax - if emailParts.Local == "" { - syntaxCheck = checkDomainSyntax - } + return createResult(artifact) +} +// CheckWithConnect performs a thorough check, which has the low chance of false-positives. It also tests if the MX server +// accepts connections, but won't try any mail commands. +func (v *EmailValidator) CheckWithConnect(ctx context.Context, emailParts types.EmailParts, options ...ArtifactFn) Result { artifact, _ := validateSequence(ctx, getNewArtifact(ctx, emailParts, prependOptions(options, WithDialer(v.dialer), WithDeadlineCTX(ctx))...), []stateFn{ - syntaxCheck, + getSyntaxCheck(emailParts), checkIfDomainHasMX, checkIfMXHasIP, checkMXAcceptsConnect, - checkRCPT, }) return createResult(artifact) @@ -59,16 +70,10 @@ func (v *EmailValidator) CheckWithConnect(ctx context.Context, emailParts types. // CheckWithLookup performs a sanity check using DNS lookups. It won't connect to the actual hosts. func (v *EmailValidator) CheckWithLookup(ctx context.Context, emailParts types.EmailParts, options ...ArtifactFn) Result { - - var syntaxCheck stateFn = checkEmailAddressSyntax - if emailParts.Local == "" { - syntaxCheck = checkDomainSyntax - } - artifact, _ := validateSequence(ctx, getNewArtifact(ctx, emailParts, prependOptions(options, WithDialer(v.dialer), WithDeadlineCTX(ctx))...), []stateFn{ - syntaxCheck, + getSyntaxCheck(emailParts), checkIfDomainHasMX, checkIfMXHasIP, }) @@ -78,21 +83,24 @@ func (v *EmailValidator) CheckWithLookup(ctx context.Context, emailParts types.E // CheckWithSyntax performs only a syntax check. func (v *EmailValidator) CheckWithSyntax(ctx context.Context, emailParts types.EmailParts, options ...ArtifactFn) Result { - - var syntaxCheck stateFn = checkEmailAddressSyntax - if emailParts.Local == "" { - syntaxCheck = checkDomainSyntax - } - artifact, _ := validateSequence(ctx, getNewArtifact(ctx, emailParts, prependOptions(options, WithDialer(v.dialer), WithDeadlineCTX(ctx))...), []stateFn{ - syntaxCheck, + getSyntaxCheck(emailParts), }) return createResult(artifact) } +// getSyntaxCheck returns a domain only check, when the local part is missing and otherwise uses a full address check +func getSyntaxCheck(parts types.EmailParts) stateFn { + var syntaxCheck stateFn = checkEmailAddressSyntax + if parts.Local == "" { + syntaxCheck = checkDomainSyntax + } + return syntaxCheck +} + func validateSequence(ctx context.Context, artifact Artifact, sequence []stateFn) (Artifact, error) { for _, v := range sequence { if err := v(&artifact); err != nil { diff --git a/validator/validator_test.go b/validator/validator_test.go index 1845d2d..f32bfc6 100644 --- a/validator/validator_test.go +++ b/validator/validator_test.go @@ -122,7 +122,7 @@ func Test_validateSequence(t *testing.T) { func(a *Artifact) error { // This fn shouldn't run a.Validations.SetFlag(validations.FDisposable) - a.Steps.SetFlag(validations.FDomainHasIP) + a.Steps.SetFlag(validations.FMXDomainHasIP) return nil }, }, @@ -146,7 +146,7 @@ func Test_validateSequence(t *testing.T) { func(a *Artifact) error { // This fn shouldn't run a.Validations.SetFlag(validations.FDisposable) - a.Steps.SetFlag(validations.FDomainHasIP) + a.Steps.SetFlag(validations.FMXDomainHasIP) return nil }, },