From f6ddd1394f051620104c39c3292ed4b3995edf1a Mon Sep 17 00:00:00 2001 From: Johannes Buchner Date: Mon, 27 Jun 2016 12:48:36 +0100 Subject: [PATCH] version 2 --- .gitignore | 5 +++++ README.rst | 2 ++ find_similar_images.py | 14 ++++++++++---- imagehash/__init__.py | 5 ++++- setup.py | 7 ++++--- 5 files changed, 25 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 0d20b64..34ce7cf 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,6 @@ *.pyc +env +*.jpg +build +dist +ImageHash.egg-info/ diff --git a/README.rst b/README.rst index 680cfe9..da1b755 100644 --- a/README.rst +++ b/README.rst @@ -6,6 +6,7 @@ A image hashing library written in Python. ImageHash supports: * average hashing (`aHash`_) * perception hashing (`pHash`_) * difference hashing (`dHash`_) +* wavelet hashing (`wHash`_) Requirements ------------- @@ -36,6 +37,7 @@ Source hosted at github: https://github.com/JohannesBuchner/imagehash .. _aHash: http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html .. _pHash: http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html .. _dHash: http://www.hackerfactor.com/blog/index.php?/archives/529-Kind-of-Like-That.html +.. _wHash: https://www.kaggle.com/c/avito-duplicate-ads-detection/ .. _pypi: https://pypi.python.org/pypi/ImageHash diff --git a/find_similar_images.py b/find_similar_images.py index 948cddc..bf85abf 100644 --- a/find_similar_images.py +++ b/find_similar_images.py @@ -26,14 +26,16 @@ def is_image(filename): if __name__ == '__main__': import sys, os def usage(): - sys.stderr.write("""SYNOPSIS: %s [ahash|phash|dhash] [] + sys.stderr.write("""SYNOPSIS: %s [ahash|phash|dhash|...] [] Identifies similar images in the directory. Method: - ahash: Average hash - phash: Perceptual hash - dhash: Difference hash + ahash: Average hash + phash: Perceptual hash + dhash: Difference hash + whash-haar: Haar wavelet hash + whash-db4: Daubechies wavelet hash (C) Johannes Buchner, 2013 """ % sys.argv[0]) @@ -46,6 +48,10 @@ def usage(): hashfunc = imagehash.phash elif hashmethod == 'dhash': hashfunc = imagehash.dhash + elif hashmethod == 'whash-haar': + hashfunc = imagehash.whash + elif hashmethod == 'whash-db4': + hashfunc = lambda img: imagehash.whash(img, mode='db4') else: usage() userpath = sys.argv[2] if len(sys.argv) > 2 else "." diff --git a/imagehash/__init__.py b/imagehash/__init__.py index ddcd86e..f112f50 100644 --- a/imagehash/__init__.py +++ b/imagehash/__init__.py @@ -165,6 +165,8 @@ def dhash(image, hash_size=8): def whash(image, hash_size = 8, image_scale = None, mode = 'haar', remove_max_haar_ll = True): """ Wavelet Hash computation. + + based on https://www.kaggle.com/c/avito-duplicate-ads-detection/ @image must be a PIL instance. @hash_size must be a power of 2 and less than @image_scale. @@ -176,7 +178,7 @@ def whash(image, hash_size = 8, image_scale = None, mode = 'haar', remove_max_ha @remove_max_haar_ll - remove the lowest low level (LL) frequency using Haar wavelet. """ - if image_scale != None: + if image_scale is not None: assert image_scale == int(2**image_scale), "image_scale is not power of 2" else: image_scale = 2**int(numpy.log2(min(image.size))) @@ -206,3 +208,4 @@ def whash(image, hash_size = 8, image_scale = None, mode = 'haar', remove_max_ha med = numpy.median(dwt_low) diff = dwt_low > med return ImageHash(diff) + diff --git a/setup.py b/setup.py index 207c139..ab00c09 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ setup( name='ImageHash', - version='1.0', + version='2.0', author='Johannes Buchner', author_email='buchner.johannes@gmx.at', packages=['imagehash'], @@ -19,9 +19,10 @@ description='Image Hashing library', long_description=long_description, install_requires=[ - "scipy", "numpy", - "pillow", # or PIL + "scipy", # for phash + "pillow", # or PIL + "PyWavelets", # for whash ], )