Skip to content

Commit

Permalink
Fix: do not parse HTML if body is empty
Browse files Browse the repository at this point in the history
  • Loading branch information
StJudeWasHere committed Feb 13, 2025
1 parent 100173c commit e1ce5c6
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
2 changes: 1 addition & 1 deletion internal/services/html_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func NewHTMLParser(u *url.URL, status int, headers *http.Header, body []byte, co
return &pageReport, parser.getHtmlNode(), nil
}

if isHTML(&pageReport) {
if isHTML(&pageReport) && size > 0 {
pageReport.Lang = parser.lang()
pageReport.Title = parser.htmlTitle()
pageReport.Description = parser.htmlMetaDescription()
Expand Down
21 changes: 21 additions & 0 deletions internal/services/html_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -478,5 +478,26 @@ func TestSrcset(t *testing.T) {
t.Errorf("pageReport image %d should be %s. Got: %s", n, i, pageReport.Images[n].URL)
}
}
}

func TestEmptyBody(t *testing.T) {
u, err := url.Parse(testURL)
if err != nil {
fmt.Println(err)
}

body := []byte("")
statusCode := 404
headers := &http.Header{
"Content-Type": []string{"text/html"},
}

pageReport, _, err := services.NewHTMLParser(u, statusCode, headers, body, int64(len(body)))
if err != nil {
t.Fatal(err)
}

if pageReport.StatusCode != statusCode {
t.Errorf("pageReport status code should be %d but received %d", statusCode, pageReport.StatusCode)
}
}

0 comments on commit e1ce5c6

Please sign in to comment.