Skip to content

Commit

Permalink
Merge pull request #44 from gdholtslander/error-matching
Browse files Browse the repository at this point in the history
Add defined errors to the package
  • Loading branch information
anaskhan96 authored Nov 17, 2019
2 parents b32d4fb + 5dd62b9 commit 2dc0401
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 36 deletions.
13 changes: 12 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
## v1.1
## v1.2.0

### Added

- ErrorDetails to the Root object. This will contain the templated error messages that used to be returned by Error

## Changed

- Error will now be one of a standard set of errors defined by the package. Details about the error message have been moved
to the ErrorDetails property of Root.

## v1.1.0

### Added

Expand Down
33 changes: 33 additions & 0 deletions examples/errors/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Errors happen. This example shows how to detect and handle some of them.

package main

import (
"fmt"
"log"

"github.com/anaskhan96/soup"
)

func main() {
_, err := soup.Get("this url isn't real!")
if err != nil && err.(soup.Error).Type == soup.ErrInGetRequest {
// Handle as required!
}

url := fmt.Sprintf("https://xkcd.com/50")
xkcd, err := soup.Get(url)
if err != nil {
// Handle it
}
xkcdSoup := soup.HTMLParse(xkcd)
links := xkcdSoup.Find("div", "id", "linkz")
if links.Error != nil && links.Error.(soup.Error).Type == soup.ErrElementNotFound {
log.Printf("Element not found: %v", links.Error)
}
// These error types were introduced in version 1.2.0, but just checking for err still works:
links = xkcdSoup.Find("div", "id", "links2")
if links.Error != nil {
log.Printf("Something happened: %s", links.Error)
}
}
111 changes: 79 additions & 32 deletions soup.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ package soup

import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"net/http"
"regexp"
Expand All @@ -15,20 +15,67 @@ import (
"golang.org/x/net/html"
)

// Root is a structure containing a pointer to an html node, the node value, and an error variable to return an error if occurred
// ErrorType defines types of errors that are possible from soup
type ErrorType int

const (
// ErrUnableToParse will be returned when the HTML could not be parsed
ErrUnableToParse ErrorType = iota
// ErrElementNotFound will be returned when element was not found
ErrElementNotFound
// ErrNoNextSibling will be returned when no next sibling can be found
ErrNoNextSibling
// ErrNoPreviousSibling will be returned when no previous sibling can be found
ErrNoPreviousSibling
// ErrNoNextElementSibling will be returned when no next element sibling can be found
ErrNoNextElementSibling
// ErrNoPreviousElementSibling will be returned when no previous element sibling can be found
ErrNoPreviousElementSibling
// ErrCreatingGetRequest will be returned when the get request couldn't be created
ErrCreatingGetRequest
// ErrInGetRequest will be returned when there was an error during the get request
ErrInGetRequest
// ErrReadingResponse will be returned if there was an error reading the response to our get request
ErrReadingResponse
)

// Error allows easier introspection on the type of error returned.
// If you know you have a Error, you can compare the Type to one of the exported types
// from this package to see what kind of error it is, then further inspect the Error() method
// to see if it has more specific details for you, like in the case of a ErrElementNotFound
// type of error.
type Error struct {
Type ErrorType
msg string
}

func (se Error) Error() string {
return se.msg
}

func newError(t ErrorType, msg string) Error {
return Error{Type: t, msg: msg}
}

// Root is a structure containing a pointer to an html node, the node value, and an error variable to return an error if one occurred
type Root struct {
Pointer *html.Node
NodeValue string
Error error
}

var debug = false
// Init a new HTTP client for use when the client doesn't want to use their own.
var (
defaultClient = &http.Client{}

// Headers contains all HTTP headers to send
var Headers = make(map[string]string)
debug = false

// Cookies contains all HTTP cookies to send
var Cookies = make(map[string]string)
// Headers contains all HTTP headers to send
Headers = make(map[string]string)

// Cookies contains all HTTP cookies to send
Cookies = make(map[string]string)
)

// SetDebug sets the debug status
// Setting this to true causes the panics to be thrown and logged onto the console.
Expand All @@ -42,6 +89,7 @@ func Header(n string, v string) {
Headers[n] = v
}

// Cookie sets a cookie for http requests
func Cookie(n string, v string) {
Cookies[n] = v
}
Expand All @@ -53,7 +101,7 @@ func GetWithClient(url string, client *http.Client) (string, error) {
if debug {
panic("Couldn't perform GET request to " + url)
}
return "", errors.New("couldn't perform GET request to " + url)
return "", newError(ErrCreatingGetRequest, "error creating get request to "+url)
}
// Set headers
for hName, hValue := range Headers {
Expand All @@ -72,24 +120,22 @@ func GetWithClient(url string, client *http.Client) (string, error) {
if debug {
panic("Couldn't perform GET request to " + url)
}
return "", errors.New("couldn't perform GET request to " + url)
return "", newError(ErrInGetRequest, "couldn't perform GET request to "+url)
}
defer resp.Body.Close()
bytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
if debug {
panic("Unable to read the response body")
}
return "", errors.New("unable to read the response body")
return "", newError(ErrReadingResponse, "unable to read the response body")
}
return string(bytes), nil
}

// Get returns the HTML returned by the url in string using the default HTTP client
func Get(url string) (string, error) {
// Init a new HTTP client
client := &http.Client{}
return GetWithClient(url, client)
return GetWithClient(url, defaultClient)
}

// HTMLParse parses the HTML returning a start pointer to the DOM
Expand All @@ -99,7 +145,7 @@ func HTMLParse(s string) Root {
if debug {
panic("Unable to parse the HTML")
}
return Root{nil, "", errors.New("unable to parse the HTML")}
return Root{Error: newError(ErrUnableToParse, "unable to parse the HTML")}
}
for r.Type != html.ElementNode {
switch r.Type {
Expand All @@ -111,7 +157,7 @@ func HTMLParse(s string) Root {
r = r.NextSibling
}
}
return Root{r, r.Data, nil}
return Root{Pointer: r, NodeValue: r.Data}
}

// Find finds the first occurrence of the given tag name,
Expand All @@ -123,9 +169,9 @@ func (r Root) Find(args ...string) Root {
if debug {
panic("Element `" + args[0] + "` with attributes `" + strings.Join(args[1:], " ") + "` not found")
}
return Root{nil, "", errors.New("element `" + args[0] + "` with attributes `" + strings.Join(args[1:], " ") + "` not found")}
return Root{Error: newError(ErrElementNotFound, fmt.Sprintf("element `%s` with attributes `%s` not found", args[0], strings.Join(args[1:], " ")))}
}
return Root{temp, temp.Data, nil}
return Root{Pointer: temp, NodeValue: temp.Data}
}

// FindAll finds all occurrences of the given tag name,
Expand All @@ -142,7 +188,7 @@ func (r Root) FindAll(args ...string) []Root {
}
pointers := make([]Root, 0, len(temp))
for i := 0; i < len(temp); i++ {
pointers = append(pointers, Root{temp[i], temp[i].Data, nil})
pointers = append(pointers, Root{Pointer: temp[i], NodeValue: temp[i].Data})
}
return pointers
}
Expand All @@ -155,9 +201,9 @@ func (r Root) FindStrict(args ...string) Root {
if debug {
panic("Element `" + args[0] + "` with attributes `" + strings.Join(args[1:], " ") + "` not found")
}
return Root{nil, "", errors.New("element `" + args[0] + "` with attributes `" + strings.Join(args[1:], " ") + "` not found")}
return Root{nil, "", newError(ErrElementNotFound, fmt.Sprintf("element `%s` with attributes `%s` not found", args[0], strings.Join(args[1:], " ")))}
}
return Root{temp, temp.Data, nil}
return Root{Pointer: temp, NodeValue: temp.Data}
}

// FindAllStrict finds all occurrences of the given tag name
Expand All @@ -172,7 +218,7 @@ func (r Root) FindAllStrict(args ...string) []Root {
}
pointers := make([]Root, 0, len(temp))
for i := 0; i < len(temp); i++ {
pointers = append(pointers, Root{temp[i], temp[i].Data, nil})
pointers = append(pointers, Root{Pointer: temp[i], NodeValue: temp[i].Data})
}
return pointers
}
Expand All @@ -185,9 +231,9 @@ func (r Root) FindNextSibling() Root {
if debug {
panic("No next sibling found")
}
return Root{nil, "", errors.New("no next sibling found")}
return Root{Error: newError(ErrNoNextSibling, "no next sibling found")}
}
return Root{nextSibling, nextSibling.Data, nil}
return Root{Pointer: nextSibling, NodeValue: nextSibling.Data}
}

// FindPrevSibling finds the previous sibling of the pointer in the DOM
Expand All @@ -198,9 +244,10 @@ func (r Root) FindPrevSibling() Root {
if debug {
panic("No previous sibling found")
}
return Root{nil, "", errors.New("no previous sibling found")}

return Root{Error: newError(ErrNoPreviousSibling, "no previous sibling found")}
}
return Root{prevSibling, prevSibling.Data, nil}
return Root{Pointer: prevSibling, NodeValue: prevSibling.Data}
}

// FindNextElementSibling finds the next element sibling of the pointer in the DOM
Expand All @@ -211,12 +258,12 @@ func (r Root) FindNextElementSibling() Root {
if debug {
panic("No next element sibling found")
}
return Root{nil, "", errors.New("no next element sibling found")}
return Root{Error: newError(ErrNoNextElementSibling, "no next element sibling found")}
}
if nextSibling.Type == html.ElementNode {
return Root{nextSibling, nextSibling.Data, nil}
return Root{Pointer: nextSibling, NodeValue: nextSibling.Data}
}
p := Root{nextSibling, nextSibling.Data, nil}
p := Root{Pointer: nextSibling, NodeValue: nextSibling.Data}
return p.FindNextElementSibling()
}

Expand All @@ -228,12 +275,12 @@ func (r Root) FindPrevElementSibling() Root {
if debug {
panic("No previous element sibling found")
}
return Root{nil, "", errors.New("no previous element sibling found")}
return Root{Error: newError(ErrNoPreviousElementSibling, "no previous element sibling found")}
}
if prevSibling.Type == html.ElementNode {
return Root{prevSibling, prevSibling.Data, nil}
return Root{Pointer: prevSibling, NodeValue: prevSibling.Data}
}
p := Root{prevSibling, prevSibling.Data, nil}
p := Root{Pointer: prevSibling, NodeValue: prevSibling.Data}
return p.FindPrevElementSibling()
}

Expand All @@ -242,7 +289,7 @@ func (r Root) Children() []Root {
child := r.Pointer.FirstChild
var children []Root
for child != nil {
children = append(children, Root{child, child.Data, nil})
children = append(children, Root{Pointer: child, NodeValue: child.Data})
child = child.NextSibling
}
return children
Expand Down
22 changes: 19 additions & 3 deletions soup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"strconv"
"strings"
"testing"

"github.com/stretchr/testify/assert"
)

const testHTML = `
Expand Down Expand Up @@ -205,10 +207,24 @@ func TestFullText(t *testing.T) {
}

func TestFullTextEmpty(t *testing.T) {
// <div id="5"><h1><span></span></h1></div>
h1 := doc.Find("div", "id", "5").Find("h1")
// <div id="5"><h1><span></span></h1></div>
h1 := doc.Find("div", "id", "5").Find("h1")

if h1.FullText() != "" {
if h1.FullText() != "" {
t.Errorf("Wrong text: %s", h1.FullText())
}
}

func TestNewErrorReturnsInspectableError(t *testing.T) {
err := newError(ErrElementNotFound, "element not found")
assert.NotNil(t, err)
assert.Equal(t, ErrElementNotFound, err.Type)
assert.Equal(t, "element not found", err.Error())
}

func TestFindReturnsInspectableError(t *testing.T) {
r := doc.Find("bogus", "thing")
assert.IsType(t, Error{}, r.Error)
assert.Equal(t, "element `bogus` with attributes `thing` not found", r.Error.Error())
assert.Equal(t, ErrElementNotFound, r.Error.(Error).Type)
}

0 comments on commit 2dc0401

Please sign in to comment.