Skip to content

Commit

Permalink
Detect elements with duplicated ids
Browse files Browse the repository at this point in the history
  • Loading branch information
StJudeWasHere committed Feb 27, 2025
1 parent e0a3b65 commit dfbb079
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 1 deletion.
1 change: 1 addition & 0 deletions internal/issues/errors/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,5 @@ const (
ErrorNosnippet // Pages with the nosnippet directive
ErrorImgWithoutSize // Pages with img elements that have no size attribtues
ErrorIncorrectMediaType // URLs with incorrect media type or media type that doesn't match extension
ErrorDuplicatedId // Pages with duplicated id attributes
)
39 changes: 39 additions & 0 deletions internal/issues/page/content.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

"golang.org/x/net/html"

"github.com/antchfx/htmlquery"
"github.com/stjudewashere/seonaut/internal/issues/errors"
"github.com/stjudewashere/seonaut/internal/models"
)
Expand Down Expand Up @@ -69,3 +70,41 @@ func NewIncorrectMediaType() *models.PageIssueReporter {
Callback: c,
}
}

func NewDuplicatedId() *models.PageIssueReporter {
c := func(pageReport *models.PageReport, htmlNode *html.Node, header *http.Header) bool {
if !pageReport.Crawled {
return false
}

if pageReport.MediaType != "text/html" {
return false
}

if pageReport.StatusCode < 200 || pageReport.StatusCode >= 300 {
return false
}

e := htmlquery.Find(htmlNode, "//*[@id]")
ids := make(map[string]bool)
for _, n := range e {
id := htmlquery.SelectAttr(n, "id")
if id == "" {
continue
}

if ids[id] {
return true
}

ids[id] = true
}

return false
}

return &models.PageIssueReporter{
ErrorType: errors.ErrorDuplicatedId,
Callback: c,
}
}
66 changes: 66 additions & 0 deletions internal/issues/page/content_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ package page_test
import (
"net/http"
"net/url"
"strings"
"testing"

"github.com/antchfx/htmlquery"
"github.com/stjudewashere/seonaut/internal/issues/errors"
"github.com/stjudewashere/seonaut/internal/issues/page"
"github.com/stjudewashere/seonaut/internal/models"
Expand Down Expand Up @@ -133,3 +135,67 @@ func TestIncorrectMediaTypeIssues(t *testing.T) {
t.Errorf("reportsIssue should be true")
}
}

func TestDuplicatedIdIssues(t *testing.T) {
pageReport := &models.PageReport{
Crawled: true,
MediaType: "text/html",
StatusCode: 200,
}

reporter := page.NewDuplicatedId()
if reporter.ErrorType != errors.ErrorDuplicatedId {
t.Errorf("error type is not correct")
}

html := strings.NewReader(`
<html>
<body>
<div id="header">Header 1</div>
<div id="header2">Header 2</div>
<span id="header">Header 3</div>
</body>
</html>`)

doc, err := htmlquery.Parse(html)
if err != nil {
t.Errorf("error parsing html")
}

reportsIssue := reporter.Callback(pageReport, doc, &http.Header{})
if reportsIssue == false {
t.Errorf("reportsIssue should be true")
}
}

func TestDuplicatedIdNoIssues(t *testing.T) {
pageReport := &models.PageReport{
Crawled: true,
MediaType: "text/html",
StatusCode: 200,
}

reporter := page.NewDuplicatedId()
if reporter.ErrorType != errors.ErrorDuplicatedId {
t.Errorf("error type is not correct")
}

html := strings.NewReader(`
<html>
<body>
<div id="header">Header 1</div>
<div id="header2">Header 2</div>
<span id="header3">Header 3</div>
</body>
</html>`)

doc, err := htmlquery.Parse(html)
if err != nil {
t.Errorf("error parsing html")
}

reportsIssue := reporter.Callback(pageReport, doc, &http.Header{})
if reportsIssue == true {
t.Errorf("reportsIssue should be false")
}
}
1 change: 1 addition & 0 deletions internal/issues/page/reporters.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ func GetAllReporters() []*models.PageIssueReporter {
// Add content issue reporters
NewLittleContentReporter(),
NewIncorrectMediaType(),
NewDuplicatedId(),

// Add scheme issue reporters
NewHTTPSchemeReporter(),
Expand Down
1 change: 1 addition & 0 deletions migrations/0072_duplicated_id.down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DELETE FROM issue_types WHERE id = 75;
1 change: 1 addition & 0 deletions migrations/0072_duplicated_id.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
INSERT INTO issue_types (id, type, priority) VALUES(75, "ERROR_DUPLICATED_ID", 3);
5 changes: 4 additions & 1 deletion translations/translation.en.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -252,4 +252,7 @@ ERROR_IMG_SIZE_ATTR: Pages containing images missing size attributes
ERROR_IMG_SIZE_ATTR_DESC: Not setting the size attributes for images can cause layout shifts as the page loads, which can negatively impact user experience as well as SEO. Make sure your images have the corresponding size attributes in place.

ERROR_INCORRECT_MEDIA_TYPE: URLs with incorrect media type
ERROR_INCORRECT_MEDIA_TYPE_DESC: Having URLs with incorrect MIME types can hurt SEO by causing browsers to misinterpret or fail to render content, which can impact user experience and search engine indexing.
ERROR_INCORRECT_MEDIA_TYPE_DESC: Having URLs with incorrect MIME types can hurt SEO by causing browsers to misinterpret or fail to render content, which can impact user experience and search engine indexing.

ERROR_DUPLICATED_ID: Pages containing elements with duplicated IDs
ERROR_DUPLICATED_ID_DESC: Having elements with duplicate IDs confuses assistive technologies and JavaScript, leading to navigation issues and unpredictable behavior. This could impact user experience and SEO. Make sure IDs are unique.

0 comments on commit dfbb079

Please sign in to comment.