032. Converting image into vector by hash function
# @
# "Average hash" expresses image as hash value,
# which can be compared to other "hash value of image"
# You can use MD5,SHA256,etc as hash function
# @
# You use term of "hash" in 2 ways
# You use term of "hash" for hashtable
# Hashtable means one method of data structure,
# which stores data based on pair of key and value
# like Python's dictionary or JavaScript's object
# You use term of "hash" to call fixed length image_in_binary_form as "hash"
# @
# Let's learn how to use hash library in Python
import hashlib
md5_hash_function_object=hashlib.md5()
md5_hash_function_object.update("StringWhatYouWantToHash")
# I get final hashed image_in_binary_form
hashed_image_represented_in_string=md5_hash_function_object.digest()
# @
# Let's deal with image with hash library
import hashlib
# You open file stream,
# which will load tower.jpg file in rb mode
with open("tower.jpg","rb") as file:
# You get binary image represented in string from file
image_in_binary_form=file.read()
# You create md5 hash function object
md5_hash_function_object=hashlib.md5()
# You hash image_in_binary_form by md5_hash_function_object
md5_hash_function_object.update(image_in_binary_form)
# You obtain hashed string
hashed_image_represented_in_string=md5_hash_function_object.digest()
# The good points of hashed image
# 1. It makes comparison possible,
# to other image by fixed short unique sequence of characters
# 1. A specific image generates unique hash value,
# so hash value can be used when you claim ownership of image
# @
# Steps of converting image into vector is following
# 1. You reduce size of image to 8*8
# 1. You convert color of image to grey
# 1. You calculate average value from values of all pixels
# 1. If pixel's darkness is larger than average,
# you input 1 into that pixel, otherwise you input 0 into corresponding pixel
# @
pop install Pillow
# @
from PIL import Image
import numpy as np
# This method takes 2 arguments(filename, size)
def average_hash(fname,size=16):
# You use Pillow to load file by file name
opend_file_by_pillow=Image.open(fname)
# You convert color of image to grey
# You can use other options(1:binary,RGB,RGBA,CMYK) to convert
color_changed_image=opend_file_by_pillow.convert('L')
# You resize image
resized_image=color_changed_image.resize((size,size),Image.ANTIALIAS)
# You extract pixel data
pixel_data_image=resized_image.getdata()
# You convert pixel_data_image into numpy array
pixel_data_of_image_in_nparray=np.array(pixel_data_image)
# pixel_data_of_image_in_nparray is 1 dimensional array
# [
# 1 1
# 1 1
# ]
# So, it's hard to get clear overview data
# To resolve this issue,
# you can reshape 1 dimensional array into 2 dimensional array
# [
# [1 1]
# [1 1]
# ]
# High number represents bright point
# size is passed by user,
# or default value is 16
reshaped_pixel_data_of_image_in_nparray=pixel_data_of_image_in_nparray.reshape((size,size))
# You find mean value of all pixels
mean_value_of_pixel_data=reshaped_pixel_data_of_image_in_nparray.mean()
# If pixel is higher than mean_value_of_pixel_data,
# 1 is assigned
compare_reshaped_pixel_data_and_mean_value_of_pixel_data=1*(reshaped_pixel_data_of_image_in_nparray>mean_value_of_pixel_data)
return compare_reshaped_pixel_data_and_mean_value_of_pixel_data
# This method converts data into binary hash
def np2hash(n):
binary_hash_list=[]
for nl in average_hash_value_from_image.tolist():
# You create list
sl_list=[str(integer_converted_from_string) for integer_converted_from_string in nl]
s2_joined_from_sl_string="".join(sl_list)
# You convert binary number into integer
integer_converted_from_string=int(s2_joined_from_sl_string,2)
# You append integer_converted_from_string into binary_hash_list
binary_hash_list.append("%04x"%integer_converted_from_string)
# You join binary_hash_list into string
return "".join(binary_hash_list)
# You use average_hash() with passing tower.jpg
average_hash_value_from_image=average_hash('tower.jpg')
# You can reshape 2 dimensinal array into 1 dimensional array
reshaped_average_hash_value_from_image=average_hash_value_from_image.reshape(1,-1)
reshaped_average_hash_value_from_image
np2hash(reshaped_average_hash_value_from_image)
# @
average_hash_value_from_image1=average_hash('tower.jpg')
average_hash_value_from_image2=average_hash('test.png')
reshaped_average_hash_value_from_image1=average_hash_value_from_image1.reshape(1,-1)
reshaped_average_hash_value_from_image2=average_hash_value_from_image2.reshape(1,-1)
# This compare 2 hashed value,
# and returns true,
# if 2 values are same
reshaped_average_hash_value_from_image1!=reshaped_average_hash_value_from_image2
# [
# [True True False
# True True False
# True False False
# True True False
# False True False]
# ]
# True is 0 in number
print((reshaped_average_hash_value_from_image1!=reshaped_average_hash_value_from_image2).sum())
# 155 means how many pixels of images are different
# 155 is called haming distance
# If 2 images have 155 different pixels out of 16*16=256 pixels,
# it's fair to say they're different images