Skip to content

Support ignoring images based on hash when inferring nonvisual reading #228

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,19 @@ All notable changes to this project will be documented in this file.

**Warning:** Features marked as *alpha* may change or be removed in a future release without notice. Use with caution.

## [0.9.1] - 2025-05-05

### Added

- New config option available when creating a `Streamer`: `InferIgnoredImages`, a list of hashes of images to ignore when when inferring nonvisual reading
- `analyzer.MatchImage` function that compares an image link's hashes with given hashes to check for a match
- `HashValue` has new `String` and `Equal` convenience functions. `HashList` has a new `Find` convenience function.

### Changed

- Renamed `analyzer.Image` to `analyzer.InspectImage`
- Slight adjustments to behavior of manifest properties functions

## [0.9.0] - 2025-04-30

### Removed
Expand Down
83 changes: 74 additions & 9 deletions pkg/analyzer/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func (p *imageProperties) EnhanceLink(link *manifest.Link) {
}
hashes.Deduplicate()

link.Properties["hash"] = hashes
link.Properties["hash"] = hashes.ToJSONArray()
link.Properties["animated"] = p.Animated
}

Expand All @@ -101,14 +101,31 @@ func hasVisualAlgorithm(hashes []manifest.HashAlgorithm) bool {
return visualHash
}

// Image inspects an image located in the provided filesystem, using the provided link's [manifest.HREF]
// InspectImage inspects an image located in the provided filesystem, using the provided link's [manifest.HREF]
// as a path. Additional properties from the link, such as the [mediatype.MediaType], may be used, and should
// be included. A copy of the provided link will be returned, with the `size`, `width`, `height` and
// `properties.animated` attributes set. A slice of [manifest.HashAlgorithm] can be provided, in which case
// the returned link will also have `properties.hash` set with the computed hashes. Currently, the supported
// algorithms are: [manifest.HashAlgorithmSHA256], [manifest.HashAlgorithmMD5], [manifest.HashAlgorithmPhashDCT],
// and `https://blurha.sh` (BlurHash). The latter two are visual hashes, which are more computationally expensive.
func Image(system fs.FS, link manifest.Link, algorithms []manifest.HashAlgorithm) (*manifest.Link, error) {
func InspectImage(system fs.FS, link manifest.Link, algorithms []manifest.HashAlgorithm) (*manifest.Link, error) {

// Skip any supplied algorithms for hashes that have already been computed in the link properties
neededAlgorithms := make([]manifest.HashAlgorithm, 0, len(algorithms))
existingHashes := link.Properties.Hash()
for _, algorithm := range algorithms {
exists := false
for _, hash := range existingHashes {
if hash.Algorithm == algorithm {
exists = true
break
}
}
if !exists && !slices.Contains(neededAlgorithms, algorithm) {
neededAlgorithms = append(neededAlgorithms, algorithm)
}
}

path := link.Href.String()
file, err := system.Open(path)
if err != nil {
Expand Down Expand Up @@ -236,7 +253,7 @@ func Image(system fs.FS, link manifest.Link, algorithms []manifest.HashAlgorithm
if err != nil {
return nil, errors.Wrap(err, "failed reopening file")
}
visualHash := hasVisualAlgorithm(algorithms)
visualHash := hasVisualAlgorithm(neededAlgorithms)
hashVisually := func(img image.Image) {
if !visualHash {
return
Expand All @@ -248,12 +265,12 @@ func Image(system fs.FS, link manifest.Link, algorithms []manifest.HashAlgorithm
img = imaging.Resize(img, 128, 0, imaging.Lanczos)
}

if slices.Contains(algorithms, manifest.HashAlgorithmPhashDCT) {
if slices.Contains(neededAlgorithms, manifest.HashAlgorithmPhashDCT) {
// Create phash and put it in a byte array
p.Hashes.PhashDCT = make([]byte, 8)
binary.BigEndian.PutUint64(p.Hashes.PhashDCT, phash.DTC(img))
}
if slices.Contains(algorithms, blurHashAlgorithm) {
if slices.Contains(neededAlgorithms, blurHashAlgorithm) {
// Create the blurhash
blurhash, _ := blurhash.Encode(5, 5, img)
p.Hashes.BlurHash = blurhash
Expand Down Expand Up @@ -343,21 +360,21 @@ func Image(system fs.FS, link manifest.Link, algorithms []manifest.HashAlgorithm
// TODO: rewrite more cleanly
s2hash := sha256.New()
mdhash := md5.New()
if slices.Contains(algorithms, manifest.HashAlgorithmSHA256) && slices.Contains(algorithms, manifest.HashAlgorithmMD5) {
if slices.Contains(neededAlgorithms, manifest.HashAlgorithmSHA256) && slices.Contains(neededAlgorithms, manifest.HashAlgorithmMD5) {
mw := io.MultiWriter(s2hash, mdhash)
if _, err := io.Copy(mw, file); err != nil {
return nil, errors.Wrap(err, "failed computing SHA256 and MD5 hashes")
}
p.Hashes.Sha256 = s2hash.Sum(nil)
p.Hashes.Md5 = mdhash.Sum(nil)
} else {
if slices.Contains(algorithms, manifest.HashAlgorithmSHA256) {
if slices.Contains(neededAlgorithms, manifest.HashAlgorithmSHA256) {
if _, err := io.Copy(s2hash, file); err != nil {
return nil, errors.Wrap(err, "failed computing SHA256 hash")
}
p.Hashes.Sha256 = s2hash.Sum(nil)
}
if slices.Contains(algorithms, manifest.HashAlgorithmMD5) {
if slices.Contains(neededAlgorithms, manifest.HashAlgorithmMD5) {
if _, err := io.Copy(mdhash, file); err != nil {
return nil, errors.Wrap(err, "failed computing MD5 hash")
}
Expand Down Expand Up @@ -387,3 +404,51 @@ func isWEBPAnimated(file io.Reader) (bool, error) {
}
return frames > 1, nil
}

// MatchImage compares the link with the given hashes to determine if they match.
func MatchImage(link manifest.Link, hashes manifest.HashList) (bool, error) {
if link.MediaType == nil || !link.MediaType.IsBitmap() {
return false, errors.New("link is not to an image that can be matched")
}

linkHashes := link.Properties.Hash()
if len(linkHashes) == 0 {
// No hashes in the link, we can't match it
return false, nil
}
for _, hash := range hashes {
if v, ok := linkHashes.Find(hash.Algorithm); ok {
if v.Equal(hash) {
// Simple equality
return true, nil
}

// Special distance-based matching for perceptual hashes
if v.Algorithm == manifest.HashAlgorithmPhashDCT {
phashVal, err := base64.StdEncoding.DecodeString(v.Value)
if err != nil {
return false, errors.Wrap(err, "failed decoding perceptual hash value of link")
}
if len(phashVal) != 8 {
return false, errors.New("perceptual hash value of link is not 8 bytes in length")
}
linkPerceptualHash := binary.BigEndian.Uint64(phashVal)

phashVal, err = base64.StdEncoding.DecodeString(hash.Value)
if err != nil {
return false, errors.Wrap(err, "failed decoding provided perceptual hash value")
}
if len(phashVal) != 8 {
return false, errors.New("provided perceptual hash value is not 8 bytes in length")
}
providedPerceptualHash := binary.BigEndian.Uint64(phashVal)

if phash.Distance(linkPerceptualHash, providedPerceptualHash) == 0 {
return true, nil
}
}
}
}

return false, nil
}
151 changes: 151 additions & 0 deletions pkg/analyzer/image_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
package analyzer

import (
"os"
"testing"

"github.com/readium/go-toolkit/pkg/manifest"
"github.com/readium/go-toolkit/pkg/mediatype"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestInspectImage(t *testing.T) {
fs := os.DirFS("testdata/")
catLink := manifest.Link{
Href: manifest.MustNewHREFFromString("catsink.jpg", false),
MediaType: &mediatype.JPEG,
}

link, err := InspectImage(fs, catLink, []manifest.HashAlgorithm{})
require.NoError(t, err)
require.NotNil(t, link)
assert.Equal(t, uint(615), link.Width)
assert.Equal(t, uint(458), link.Height)
assert.Equal(t, uint(36710), link.Size)
assert.False(t, link.Properties.Get("animated").(bool))
assert.Empty(t, link.Properties.Hash())

link, err = InspectImage(fs, manifest.Link{
Href: manifest.MustNewHREFFromString("animated.webp", false),
MediaType: &mediatype.WEBP,
}, []manifest.HashAlgorithm{})
require.NoError(t, err)
require.NotNil(t, link)
assert.Equal(t, uint(1000), link.Width)
assert.Equal(t, uint(1000), link.Height)
assert.Equal(t, uint(5764), link.Size)
assert.True(t, link.Properties.Get("animated").(bool))

link, err = InspectImage(fs, manifest.Link{
Href: manifest.MustNewHREFFromString("animated.png", false),
MediaType: &mediatype.PNG,
}, []manifest.HashAlgorithm{})
require.NoError(t, err)
require.NotNil(t, link)
assert.Equal(t, uint(1000), link.Width)
assert.Equal(t, uint(1000), link.Height)
assert.Equal(t, uint(2932), link.Size)
assert.True(t, link.Properties.Get("animated").(bool))

_, err = InspectImage(fs, manifest.Link{
Href: manifest.MustNewHREFFromString("corrupt.png", false),
MediaType: &mediatype.PNG,
}, []manifest.HashAlgorithm{})
require.Error(t, err)

_, err = InspectImage(fs, manifest.Link{
Href: manifest.MustNewHREFFromString("frame1.jxl", false),
MediaType: &mediatype.JXL,
}, []manifest.HashAlgorithm{})
require.ErrorContains(t, err, "JXL file format is currently unsupported")

link, err = InspectImage(fs, catLink, []manifest.HashAlgorithm{
manifest.HashAlgorithmBlake2b, // This is expected to not to anything
manifest.HashAlgorithmSHA256,
})
require.NoError(t, err)
require.NotNil(t, link)
if assert.Len(t, link.Properties.Hash(), 1) {
assert.True(t, link.Properties.Hash()[0].Equal(manifest.HashValue{
Algorithm: manifest.HashAlgorithmSHA256,
Value: "nzGm6cNL7fAadGSoFdtLzg/Z3MFqe3/fiWUZF9CPAKY=",
}))
}

link, err = InspectImage(fs, catLink, []manifest.HashAlgorithm{
manifest.HashAlgorithmPhashDCT,
})
require.NoError(t, err)
require.NotNil(t, link)
if assert.Len(t, link.Properties.Hash(), 1) {
assert.True(t, link.Properties.Hash()[0].Equal(manifest.HashValue{
Algorithm: manifest.HashAlgorithmPhashDCT,
Value: "TL5pWb0AIL8=",
}))
}
}

func TestMatchImage(t *testing.T) {
fs := os.DirFS("testdata/")

ok, err := MatchImage(manifest.Link{
Href: manifest.MustNewHREFFromString("audio.mp3", false),
MediaType: &mediatype.MP3,
}, manifest.HashList{})
require.ErrorContains(t, err, "link is not to an image that can be matched")
require.False(t, ok)

link, err := InspectImage(fs, manifest.Link{
Href: manifest.MustNewHREFFromString("catsink.jpg", false),
MediaType: &mediatype.JPEG,
}, []manifest.HashAlgorithm{
manifest.HashAlgorithmSHA256,
manifest.HashAlgorithmPhashDCT,
})
require.NoError(t, err)
require.NotNil(t, link)
ok, err = MatchImage(*link, manifest.HashList{
manifest.HashValue{
Algorithm: manifest.HashAlgorithmSHA256,
Value: "nzGm6cNL7fAadGSoFdtLzg/Z3MFqe3/fiWUZF9CPAKY=",
},
})
require.NoError(t, err)
require.True(t, ok)
ok, err = MatchImage(*link, manifest.HashList{
manifest.HashValue{
Algorithm: manifest.HashAlgorithmSHA256,
Value: "xxxxxxxxfAadGSoFdtLzg/Z3MFqe3/fiWUZF9CPAKY=",
},
})
require.NoError(t, err)
require.False(t, ok)

link1, err := InspectImage(fs, manifest.Link{
Href: manifest.MustNewHREFFromString("frame1.png", false),
MediaType: &mediatype.PNG,
}, []manifest.HashAlgorithm{manifest.HashAlgorithmPhashDCT})
require.NoError(t, err)
require.NotNil(t, link1)
link2, err := InspectImage(fs, manifest.Link{
Href: manifest.MustNewHREFFromString("frame2.png", false),
MediaType: &mediatype.PNG,
}, []manifest.HashAlgorithm{manifest.HashAlgorithmPhashDCT})
require.NoError(t, err)
require.NotNil(t, link2)
if assert.Len(t, link1.Properties.Hash(), 1) && assert.Len(t, link2.Properties.Hash(), 1) {
hashes1 := link1.Properties.Hash()
hashes2 := link2.Properties.Hash()

// Too similar, they match
ok, err = MatchImage(*link1, hashes2)
require.NoError(t, err)
assert.True(t, ok)

// Pretty different, no match
ok, err = MatchImage(*link, hashes1)
require.NoError(t, err)
assert.False(t, ok)
}
}
Binary file added pkg/analyzer/testdata/animated.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pkg/analyzer/testdata/animated.webp
Binary file not shown.
Binary file added pkg/analyzer/testdata/catsink.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pkg/analyzer/testdata/corrupt.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pkg/analyzer/testdata/frame1.jxl
Binary file not shown.
Binary file added pkg/analyzer/testdata/frame1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added pkg/analyzer/testdata/frame2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 9 additions & 1 deletion pkg/fetcher/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,14 @@ func (f *fsResource) Read(b []byte) (int, error) {
}
return len(bin), rerr
}
// Out-of-range indexes are clamped to the available length automatically when calling `Read`
// That means we need to find the EOF ourselves by comparing the length requested and returned
if len(bin) < len(b) {
if len(bin) > 0 {
copy(b, bin)
}
return len(bin), io.EOF
}
return copy(b, bin), nil
}

Expand Down Expand Up @@ -165,7 +173,7 @@ func (f fsFetcher) Open(name string) (fs.File, error) {
return &fsResource{r: r, ctx: f.ctx}, nil
}

// Turn a [Fetcher] into a [fs.FS] filesystem
// Turn a [Fetcher] into a [fs.FS] virtual filesystem
func ToFS(ctx context.Context, f Fetcher) fsFetcher {
return fsFetcher{f, ctx}
}
Expand Down
21 changes: 9 additions & 12 deletions pkg/manifest/properties.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,14 @@ func (p Properties) Layout() EPUBLayout {
}

func (p Properties) Encryption() *Encryption {
mp, ok := p.Get("encrypted").(map[string]interface{})
v := p.Get("encrypted")
if v == nil {
return nil
}
mp, ok := v.(map[string]interface{})
if mp == nil || !ok {
return nil
}

enc, err := EncryptionFromJSON(mp)
if err != nil {
return nil
Expand All @@ -115,11 +118,8 @@ func (p Properties) Encryption() *Encryption {
}

func (p Properties) Contains() []string {
if p == nil {
return nil
}
v, ok := p["contains"]
if !ok {
v := p.Get("contains")
if v == nil {
return nil
}
cv, ok := v.([]string)
Expand All @@ -130,11 +130,8 @@ func (p Properties) Contains() []string {
}

func (p Properties) Hash() HashList {
if p == nil {
return nil
}
v, ok := p["hash"]
if !ok {
v := p.Get("hash")
if v == nil {
return nil
}
cv, ok := v.([]interface{})
Expand Down
Loading