Skip to content

Commit

Permalink
Merge branch 'development' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
StJudeWasHere committed Nov 8, 2024
2 parents bf55635 + 1342faf commit 3592cee
Show file tree
Hide file tree
Showing 41 changed files with 826 additions and 184 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ bin/*
# Ignore any file in the web/static folder except favicon.ico and robots.txt
# The frontend build will copy fonts and styles into this folder.

archive/*
web/static/*
!web/static/favicon.ico
!web/static/robots.txt
Empty file added archive/.gitignore
Empty file.
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ require (
github.com/gorilla/websocket v1.5.3
github.com/microcosm-cc/bluemonday v1.0.27
github.com/oxffaa/gopher-parse-sitemap v0.0.0-20191021113419-005d2eb1def4
github.com/slyrz/warc v0.0.0-20150806225202-a50edd19b690
github.com/spf13/viper v1.19.0
github.com/temoto/robotstxt v1.1.2
github.com/turk/go-sitemap v0.0.0-20210912154218-82ad01095e30
Expand All @@ -25,7 +26,7 @@ require (
filippo.io/edwards25519 v1.1.0 // indirect
github.com/antchfx/xpath v1.3.2 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/fsnotify/fsnotify v1.8.0 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/gorilla/css v1.0.1 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
Expand Down
6 changes: 4 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
Expand Down Expand Up @@ -100,6 +100,8 @@ github.com/sagikazarmark/locafero v0.6.0 h1:ON7AQg37yzcRPU69mt7gwhFEBwxI6P9T4Qu3
github.com/sagikazarmark/locafero v0.6.0/go.mod h1:77OmuIc6VTraTXKXIs/uvUxKGUXjE1GbemJYHqdNjX0=
github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE=
github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ=
github.com/slyrz/warc v0.0.0-20150806225202-a50edd19b690 h1:2RLSydlHktw3Fo4nwOQwjexn1d49KJb/i+EmlT4D878=
github.com/slyrz/warc v0.0.0-20150806225202-a50edd19b690/go.mod h1:LuhAhBK7l5/QEJmiz3tVGLi8n0IwqAwLX/ndr+6XSDE=
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0=
github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8=
Expand Down
14 changes: 0 additions & 14 deletions internal/crawler/basic_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@ import (
"encoding/base64"
"fmt"
"net/http"
"reflect"
"testing"
"unsafe"

"github.com/stjudewashere/seonaut/internal/crawler"
)
Expand All @@ -28,18 +26,6 @@ func (m *mockClient) Do(req *http.Request) (*http.Response, error) {
}, nil
}

// Replaces the http.Client with a mockClient.
func newTestClient(client *crawler.BasicClient) *mockClient {
mock := &mockClient{}

// Replace the internal HTTP client with the mock
clientField := reflect.ValueOf(client).Elem().FieldByName("client")
clientField = reflect.NewAt(clientField.Type(), unsafe.Pointer(clientField.UnsafeAddr())).Elem()
clientField.Set(reflect.ValueOf(mock))

return mock
}

// Test user agent in Get requests.
func TestGetUserAgent(t *testing.T) {
testUA := "TEST_UA"
Expand Down
8 changes: 4 additions & 4 deletions internal/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ type Status struct {
}

type Crawler struct {
Client Client
status Status
url *url.URL
options *Options
Expand All @@ -72,7 +73,6 @@ type Crawler struct {
mainDomain string
cancel context.CancelFunc
context context.Context
client Client
callback ResponseCallback
}

Expand Down Expand Up @@ -108,6 +108,7 @@ func NewCrawler(parsedURL *url.URL, options *Options, client Client) *Crawler {
ctx, cancel := context.WithTimeout(context.Background(), crawlerTimeout*time.Hour)

return &Crawler{
Client: client,
status: Status{Crawling: true},
url: parsedURL,
options: options,
Expand All @@ -120,7 +121,6 @@ func NewCrawler(parsedURL *url.URL, options *Options, client Client) *Crawler {
mainDomain: mainDomain,
cancel: cancel,
context: ctx,
client: client,
}
}

Expand Down Expand Up @@ -312,9 +312,9 @@ func (c *Crawler) consumer(reqStream <-chan *RequestMessage, respStream chan<- *
r := &ClientResponse{}
switch requestMessage.Method {
case GET:
r, rm.Error = c.client.Get(requestMessage.URL.String())
r, rm.Error = c.Client.Get(requestMessage.URL.String())
case HEAD:
r, rm.Error = c.client.Head(requestMessage.URL.String())
r, rm.Error = c.Client.Head(requestMessage.URL.String())
}

if rm.Error == nil {
Expand Down
1 change: 1 addition & 0 deletions internal/issues/errors/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,5 @@ const (
ErrorMetasInBody // Pages with meta tags in the document's body
ErrorNosnippet // Pages with the nosnippet directive
ErrorImgWithoutSize // Pages with img elements that have no size attribtues
ErrorIncorrectMediaType // URLs with incorrect media type or media type that doesn't match extension
)
35 changes: 35 additions & 0 deletions internal/issues/page/content.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package page

import (
"mime"
"net/http"
"path/filepath"
"strings"

"golang.org/x/net/html"

Expand Down Expand Up @@ -34,3 +37,35 @@ func NewLittleContentReporter() *models.PageIssueReporter {
Callback: c,
}
}

func NewIncorrectMediaType() *models.PageIssueReporter {
c := func(pageReport *models.PageReport, htmlNode *html.Node, header *http.Header) bool {
if pageReport.MediaType == "" {
return true
}

ext := filepath.Ext(pageReport.ParsedURL.Path)
if ext == "" {
ext = ".html"
}

// Allow both "application/javascript" and "text/javascript" as valid types.
if ext == ".js" {
return pageReport.MediaType != "application/javascript" && pageReport.MediaType != "text/javascript"
}

mimeType := mime.TypeByExtension(ext)
mimeType = strings.Split(mimeType, ";")[0]

if mimeType == "" {
return false
}

return mimeType != pageReport.MediaType
}

return &models.PageIssueReporter{
ErrorType: errors.ErrorIncorrectMediaType,
Callback: c,
}
}
79 changes: 79 additions & 0 deletions internal/issues/page/content_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package page_test

import (
"net/http"
"net/url"
"testing"

"github.com/stjudewashere/seonaut/internal/issues/errors"
Expand Down Expand Up @@ -54,3 +55,81 @@ func TestLittleContentIssues(t *testing.T) {
t.Errorf("TestLittleContentIssues: reportsIssue should be true")
}
}

// Test NewIncorrectMediaType with URLs that have correct media types.
// It should not report any issue.
func TestIncorrectMediaTypeNoIssues(t *testing.T) {
u := "https://example.com/no-issues"
parsedURL, err := url.Parse(u)
if err != nil {
t.Errorf("error parsing URL: %v", err)
}

pageReport := &models.PageReport{
MediaType: "text/html",
URL: u,
ParsedURL: parsedURL,
}

reporter := page.NewIncorrectMediaType()
if reporter.ErrorType != errors.ErrorIncorrectMediaType {
t.Errorf("TestNoIssues: error type is not correct")
}

reportsIssue := reporter.Callback(pageReport, &html.Node{}, &http.Header{})
if reportsIssue == true {
t.Errorf("reportsIssue should be false")
}

// Test javascript extension.
u = "https://example.com/script.js"
parsedURL, err = url.Parse(u)
if err != nil {
t.Errorf("error parsing URL: %v", err)
}

pageReport = &models.PageReport{
MediaType: "application/javascript",
URL: u,
ParsedURL: parsedURL,
}

reportsIssue = reporter.Callback(pageReport, &html.Node{}, &http.Header{})
if reportsIssue == true {
t.Errorf("reportsIssue should be false")
}
}

// Test NewIncorrectMediaType with URLs that have incorrect media types.
// It should report the issues.
func TestIncorrectMediaTypeIssues(t *testing.T) {
pageReport := &models.PageReport{} // Test missing media type

reporter := page.NewIncorrectMediaType()
if reporter.ErrorType != errors.ErrorIncorrectMediaType {
t.Errorf("TestNoIssues: error type is not correct")
}

reportsIssue := reporter.Callback(pageReport, &html.Node{}, &http.Header{})
if reportsIssue == false {
t.Errorf("reportsIssue should be true")
}

// Test media type that doesn't match the file extension.
u := "https://example.com/issues.pdf"
parsedURL, err := url.Parse(u)
if err != nil {
t.Errorf("error parsing URL: %v", err)
}

pageReport = &models.PageReport{
MediaType: "text/html",
URL: u,
ParsedURL: parsedURL,
}

reportsIssue = reporter.Callback(pageReport, &html.Node{}, &http.Header{})
if reportsIssue == false {
t.Errorf("reportsIssue should be true")
}
}
1 change: 1 addition & 0 deletions internal/issues/page/reporters.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ func GetAllReporters() []*models.PageIssueReporter {

// Add content issue reporters
NewLittleContentReporter(),
NewIncorrectMediaType(),

// Add scheme issue reporters
NewHTTPSchemeReporter(),
Expand Down
1 change: 1 addition & 0 deletions internal/models/project.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ type Project struct {
Deleting bool
BasicAuth bool
CheckExternalLinks bool
Archive bool
}
18 changes: 13 additions & 5 deletions internal/repository/project.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@ func (ds *ProjectRepository) SaveProject(project *models.Project, uid int) {
allow_subdomains,
basic_auth,
user_id,
check_external_links
check_external_links,
archive
)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`

stmt, _ := ds.DB.Prepare(query)
Expand All @@ -40,6 +41,7 @@ func (ds *ProjectRepository) SaveProject(project *models.Project, uid int) {
project.BasicAuth,
uid,
project.CheckExternalLinks,
project.Archive,
)
if err != nil {
log.Printf("saveProject: %v\n", err)
Expand All @@ -61,7 +63,8 @@ func (ds *ProjectRepository) FindProjectsByUser(uid int) []models.Project {
basic_auth,
deleting,
created,
check_external_links
check_external_links,
archive
FROM projects
WHERE user_id = ?
ORDER BY url ASC`
Expand All @@ -86,6 +89,7 @@ func (ds *ProjectRepository) FindProjectsByUser(uid int) []models.Project {
&p.Deleting,
&p.Created,
&p.CheckExternalLinks,
&p.Archive,
)
if err != nil {
log.Println(err)
Expand All @@ -112,7 +116,8 @@ func (ds *ProjectRepository) FindProjectById(id int, uid int) (models.Project, e
basic_auth,
deleting,
created,
check_external_links
check_external_links,
archive
FROM projects
WHERE id = ? AND user_id = ?`

Expand All @@ -131,6 +136,7 @@ func (ds *ProjectRepository) FindProjectById(id int, uid int) (models.Project, e
&p.Deleting,
&p.Created,
&p.CheckExternalLinks,
&p.Archive,
)
if err != nil {
log.Println(err)
Expand Down Expand Up @@ -169,7 +175,8 @@ func (ds *ProjectRepository) UpdateProject(p *models.Project) error {
crawl_sitemap = ?,
allow_subdomains = ?,
basic_auth = ?,
check_external_links = ?
check_external_links = ?,
archive = ?
WHERE id = ?
`
_, err := ds.DB.Exec(
Expand All @@ -181,6 +188,7 @@ func (ds *ProjectRepository) UpdateProject(p *models.Project) error {
p.AllowSubdomains,
p.BasicAuth,
p.CheckExternalLinks,
p.Archive,
p.Id,
)

Expand Down
3 changes: 2 additions & 1 deletion internal/routes/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ func NewServer(container *services.Container) {
http.HandleFunc("GET /export/csv", container.CookieSession.Auth(exportHandler.csvHandler))
http.HandleFunc("GET /export/sitemap", container.CookieSession.Auth(exportHandler.sitemapHandler))
http.HandleFunc("GET /export/resources", container.CookieSession.Auth(exportHandler.resourcesHandler))
http.HandleFunc("GET /export/wazc", container.CookieSession.Auth(exportHandler.waczHandler))

// Issues routes
issueHandler := issueHandler{container}
Expand Down Expand Up @@ -82,6 +83,6 @@ func NewServer(container *services.Container) {
fmt.Printf("Starting server at %s on port %d...\n", container.Config.HTTPServer.Server, container.Config.HTTPServer.Port)
err := http.ListenAndServe(fmt.Sprintf("%s:%d", container.Config.HTTPServer.Server, container.Config.HTTPServer.Port), nil)
if err != nil {
log.Fatal(err)
log.Fatalf("error starting server: %v", err)
}
}
Loading

0 comments on commit 3592cee

Please sign in to comment.