From d401cf08e42c5a354a16628342bc2e5901c9545f Mon Sep 17 00:00:00 2001 From: Arran Hobson Sayers Date: Fri, 19 Apr 2024 03:24:40 +0100 Subject: [PATCH] Improve book search --- .golangci.yaml | 3 ++ goodreads/book.go | 16 +++++-- goodreads/client.go | 93 +++++++++++++++++++++++++++++++++++----- goodreads/client_test.go | 43 ++++++++++--------- server/book.go | 2 +- utils/utils.go | 23 ++++++++++ 6 files changed, 145 insertions(+), 35 deletions(-) diff --git a/.golangci.yaml b/.golangci.yaml index 930693a..1f31fdb 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -22,6 +22,9 @@ linters-settings: - name: add-constant disabled: true + - name: cognitive-complexity + arguments: [15] + - name: line-length-limit arguments: [120] diff --git a/goodreads/book.go b/goodreads/book.go index 18b80a0..32809ce 100644 --- a/goodreads/book.go +++ b/goodreads/book.go @@ -7,6 +7,8 @@ import ( "strings" "github.com/k3a/html2text" + "golang.org/x/text/language" + "golang.org/x/text/language/display" ) type Book struct { @@ -19,8 +21,9 @@ type Book struct { func (b *Book) Sanitise() { b.BestEdition.Sanitise() - for _, series := range b.Series { + for idx, series := range b.Series { series.Sanitise() + b.Series[idx] = series } } @@ -47,7 +50,6 @@ type Work struct { Title string `xml:"original_title"` MediaType string `xml:"media_type"` EditionsCount int `xml:"books_count"` - Language int `xml:"original_language_id"` // Publication PublicationYear int `xml:"original_publication_year"` @@ -80,7 +82,7 @@ type Edition struct { PublicationDay string `xml:"publication_day"` Publisher string `xml:"publisher"` CountryCode string `xml:"country_code"` - LanguageCode string `xml:"language_code"` + Language string `xml:"language_code"` } func (e *Edition) Sanitise() { @@ -93,6 +95,14 @@ func (e *Edition) Sanitise() { // Should be: // "https://i.gr-assets.com/images/S/compressed.photo.goodreads.com/books/1546071216l/5907.jpg" e.ImageURL = (regexp.MustCompile(`(\d+)\..*?\.(jpe?g)`).ReplaceAllString(e.ImageURL, "$1.$2")) + + // Convert language from code to name (if possible) + lang, err := language.Parse(e.Language) + if err == nil { + e.Language = display.English.Languages().Name(lang) + } else { + e.Language = strings.ToTitle(e.Language) + } } type SeriesBook struct { diff --git a/goodreads/client.go b/goodreads/client.go index 417d516..e64e5e9 100644 --- a/goodreads/client.go +++ b/goodreads/client.go @@ -9,7 +9,10 @@ import ( "net/http" "net/url" "regexp" + "strconv" "sync" + + "github.com/ahobsonsayers/abs-goodreads/utils" ) const ( @@ -99,18 +102,28 @@ func (c *Client) GetBookById(ctx context.Context, bookId string) (Book, error) { func (c *Client) GetBooksByIds(ctx context.Context, bookIds []string) ([]Book, error) { books := make([]Book, len(bookIds)) var errs error + var wg sync.WaitGroup + var booksMutex sync.Mutex + var errsMutex sync.Mutex + for idx, bookId := range bookIds { wg.Add(1) + go func(bookId string, idx int) { defer wg.Done() book, err := c.GetBookById(ctx, bookId) if err != nil { + errsMutex.Lock() errs = errors.Join(errs, err) + errsMutex.Unlock() return } + + booksMutex.Lock() books[idx] = book + booksMutex.Unlock() }(bookId, idx) } @@ -145,26 +158,84 @@ func (c *Client) GetBookByTitle(ctx context.Context, bookTitle string, bookAutho // SearchBooks search for a book by its title and optionally an author (which can give better results) // https://www.goodreads.com/api/index#search.books func (c *Client) SearchBooks(ctx context.Context, bookTitle string, bookAuthor *string) ([]Book, error) { - query := bookTitle - if bookAuthor != nil && *bookAuthor != "" { - query = fmt.Sprintf("%s %s", query, *bookAuthor) + // Search for books via title, getting their ids. + bookIds, err := c.searchBookIdsByTitle(ctx, bookTitle) + if err != nil { + return nil, err } - queryParams := map[string]string{"q": query} - // Search for books, getting their ids - var unmarshaller struct { - BookIds []string `xml:"search>results>work>best_book>id"` + // If author is set, also search for books via author, getting their ids. + // Only keeps book ids that appear in both title and author searches + if bookAuthor != nil && *bookAuthor != "" { + authorBookIds, err := c.searchBookIdsByAuthor(ctx, *bookAuthor) + if err != nil { + return nil, err + } + + bookIds = utils.Intersection(bookIds, authorBookIds) } - err := c.Get(ctx, "search/index.xml", queryParams, &unmarshaller) + + // Get book details using their ids + books, err := c.GetBooksByIds(ctx, bookIds) if err != nil { return nil, err } - // Get book details using their ids - books, err := c.GetBooksByIds(ctx, unmarshaller.BookIds) + return books, nil +} + +type bookIdUnmarshaller struct { + BookIds []string `xml:"search>results>work>best_book>id"` +} + +func (c *Client) searchBookIdsByTitle(ctx context.Context, title string) ([]string, error) { + queryParams := map[string]string{ + "q": title, + "search[field]": "title", + } + var unmarshaller bookIdUnmarshaller + err := c.Get(ctx, "search/index.xml", queryParams, &unmarshaller) if err != nil { return nil, err } - return books, nil + return unmarshaller.BookIds, nil +} + +func (c *Client) searchBookIdsByAuthor(ctx context.Context, author string) ([]string, error) { + var bookIds []string + var errs error + + var wg sync.WaitGroup + var bookIdsMutex sync.Mutex + var errsMutex sync.Mutex + + for pageNumber := 1; pageNumber <= 5; pageNumber++ { + wg.Add(1) + + go func(page int) { + defer wg.Done() + + queryParams := map[string]string{ + "q": author, + "page": strconv.Itoa(page), + "search[field]": "author", + } + var unmarshaller bookIdUnmarshaller + err := c.Get(ctx, "search/index.xml", queryParams, &unmarshaller) + if err != nil { + errsMutex.Lock() + errs = errors.Join(errs, err) + errsMutex.Unlock() + return + } + + bookIdsMutex.Lock() + bookIds = append(bookIds, unmarshaller.BookIds...) + bookIdsMutex.Unlock() + }(pageNumber) + } + wg.Wait() + + return bookIds, errs } diff --git a/goodreads/client_test.go b/goodreads/client_test.go index d0d9924..585a214 100644 --- a/goodreads/client_test.go +++ b/goodreads/client_test.go @@ -6,10 +6,15 @@ import ( "testing" "github.com/ahobsonsayers/abs-goodreads/goodreads" + "github.com/ahobsonsayers/abs-goodreads/utils" "github.com/stretchr/testify/require" ) -const TheHobbitBookId = "5907" +const ( + TheHobbitBookId = "5907" + TheHobbitBookTitle = "The Hobbit" + TheHobbitBookAuthor = "J.R.R. Tolkien" +) func TestGetBookById(t *testing.T) { client := goodreads.NewClient(http.DefaultClient, nil, nil) @@ -17,40 +22,38 @@ func TestGetBookById(t *testing.T) { book, err := client.GetBookById(context.Background(), TheHobbitBookId) require.NoError(t, err) - require.Equal(t, "The Hobbit", book.Work.Title) - require.Equal(t, TheHobbitBookId, book.BestEdition.Id) - require.Equal(t, "J.R.R. Tolkien", book.Authors[0].Name) - require.Equal(t, "The Lord of the Rings", book.Series[0].Series.Title) - require.Equal(t, "0", *book.Series[0].BookPosition) + checkTheHobbitBookDetails(t, book) } func TestGetBookByTitle(t *testing.T) { client := goodreads.NewClient(http.DefaultClient, nil, nil) - theHobbitSearchQuery := "The Hobbit" - book, err := client.GetBookByTitle(context.Background(), theHobbitSearchQuery, nil) + book, err := client.GetBookByTitle(context.Background(), TheHobbitBookTitle, nil) require.NoError(t, err) - // Check first book returned - require.Equal(t, "The Hobbit", book.Work.Title) - require.Equal(t, TheHobbitBookId, book.BestEdition.Id) - require.Equal(t, "J.R.R. Tolkien", book.Authors[0].Name) - require.Equal(t, "The Lord of the Rings", book.Series[0].Series.Title) - require.Equal(t, "0", *book.Series[0].BookPosition) + checkTheHobbitBookDetails(t, book) } func TestSearch(t *testing.T) { client := goodreads.NewClient(http.DefaultClient, nil, nil) - theHobbitSearchQuery := "The Hobbit" - books, err := client.SearchBooks(context.Background(), theHobbitSearchQuery, nil) + books, err := client.SearchBooks( + context.Background(), + TheHobbitBookTitle, + utils.ToPointer(TheHobbitBookAuthor), + ) require.NoError(t, err) - // Check first book returned - book := books[0] - require.Equal(t, "The Hobbit", book.Work.Title) + checkTheHobbitBookDetails(t, books[0]) +} + +func checkTheHobbitBookDetails(t *testing.T, book goodreads.Book) { + require.Equal(t, TheHobbitBookTitle, book.Work.Title) require.Equal(t, TheHobbitBookId, book.BestEdition.Id) - require.Equal(t, "J.R.R. Tolkien", book.Authors[0].Name) + require.Regexp(t, "1546071216l/5907.jpg$", book.BestEdition.ImageURL) + require.Equal(t, "English", book.BestEdition.Language) + require.Equal(t, TheHobbitBookAuthor, book.Authors[0].Name) require.Equal(t, "The Lord of the Rings", book.Series[0].Series.Title) require.Equal(t, "0", *book.Series[0].BookPosition) + require.EqualValues(t, []string{"Fantasy", "Classic", "Fiction"}, book.Genres) } diff --git a/server/book.go b/server/book.go index 3758fd5..7a60d71 100644 --- a/server/book.go +++ b/server/book.go @@ -32,7 +32,7 @@ func GoodreadsBookToAudioBookShelfBook(goodreadsBook goodreads.Book) BookMetadat Cover: utils.ToPointer(goodreadsBook.BestEdition.ImageURL), Description: &goodreadsBook.BestEdition.Description, Publisher: &goodreadsBook.BestEdition.Publisher, - Language: &goodreadsBook.BestEdition.LanguageCode, + Language: &goodreadsBook.BestEdition.Language, // Other fields Series: &series, Genres: utils.ToPointer([]string(goodreadsBook.Genres)), diff --git a/utils/utils.go b/utils/utils.go index 72b9678..420c25c 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -1,5 +1,28 @@ package utils +import ( + "slices" + + mapset "github.com/deckarep/golang-set/v2" +) + func ToPointer[T any](value T) *T { return &value } + +// Intersection returns the elements in slice 1 that also +// appear in slice 2. Returned slice order will match that +// of slice 1. If slice 1 has duplicates of a value that is +// present in slice 2, all duplicates will be included. +func Intersection[T comparable](slice1, slice2 []T) []T { + set2 := mapset.NewSet(slice2...) + + intersection := make([]T, 0, len(slice1)) + for _, elem := range slice1 { + if set2.Contains(elem) { + intersection = append(intersection, elem) + } + } + + return slices.Clip(intersection) +}