@@ -0,0 +1,256 @@
import onnx
from onnx_tf .backend import prepare
import tensorflow as tf
import onnxruntime
from PIL import Image
import numpy as np
import sys
#You need to have model.onnx and neuralhash_128x96_seed1.dat in your working directory
#See the previous and more simple collisionLSH gist for a simpler and more general case
#What's new : LBFGS-B, using the real specific network, an alternative dual loss function
#This is a neural hash preimage, some image take longer to converge others converge in 30 LBFGS steps
#Using only CPU the amortized time for a collision is between 10 seconds and 10 minutes
#To improve it you can add some additional Losses to make the generated image more natural
#Or you can use a trained conditionnal image generator and search for the input of such generator so that when you compute the
#hash the distance between hashes is minimized
#This are the Apple Shape Constant
hashLength = 96
featLength = 128
#This are some paramters you can fiddle with to speed-up the convergence process
featureScaling = 100.0 #features are divided by this factor
gap = 1.0 #How far we want to be from the hyperplane
#if this is not enough we may fail by a few bits due to the postprocessing rounding
# and truncating float to uint8 and png compression
#When not using scipyOpt
learning_rate = 1e-1 #Increasing it makes the code take bigger step meaning faster initial convergence but slower final convergence
#Increasing the learning rate too much can easily be observed when the loss function is not decreasing at every step
#An alternative loss function
useDualLoss = False
useScipyOpt = True
if ( useScipyOpt ):
import scipy .optimize
def distanceBetweenHashes ( input ,model , seed1 , flip , gap ):
features = tf .reshape ( model (image = input )['leaf/logits' ], (1 ,featLength ))
lshfeat = tf .reduce_sum ( features * seed1 ,axis = 1 )
#we scale the features by 100 as a manual preconditionning step as this won't change the sign of the features
#flatfeat = tf.nn.l2_normalize( tf.reshape(lshfeat,(-1,)) ) * featureScaling
if ( useScipyOpt ):
flatfeat = tf .reshape (lshfeat ,(- 1 ,)) / featureScaling
else :
flatfeat = tf .reshape (lshfeat , (- 1 ,)) / featureScaling
if useDualLoss == False :
loss = tf .nn .l2_loss (tf .nn .relu ( flatfeat * flip + gap ) )
#alternatively we can use a dual_loss
#The hard dual loss would be something like
#loss = tf.reduce_max(flatfeat * flip) + gap with stopping criteria loss < 0
#but this is not smooth enough so we make a smooth version from it
#with a smoothing length
else :
smoothing_length = 5.0
val = flatfeat * flip
#The stopping criterion is only valid if smoothing_length is high enough and gap high enough
#To do things correctly we can use the exact hard dual loss stopping criterion
loss = tf .reduce_sum ( tf .nn .softmax ( smoothing_length * val ) * val ) + gap
#You can add additional loss terms here to make the produced image more natural
return loss
def gradient (x , model , seed1 , flip , gap ):
input = tf .convert_to_tensor (x , dtype = tf .float32 )
with tf .GradientTape () as t :
t .watch (input )
loss = distanceBetweenHashes (input , model ,seed1 ,flip ,gap )
return t .gradient (loss , input ).numpy ()
goalvalue = None
#This is the wrapper to provide for scipy that needs flat inputs of double precision
def npdistanceBetweenHashes ( flatinput , model , seed1 , flip , gap ):
# = args
global goalvalue
input = np .reshape (flatinput ,(1 ,3 ,360 ,360 )).astype (np .float32 )
loss = distanceBetweenHashes (input , model , seed1 , flip , gap ).numpy ()
print ("loss : " )
print (loss )
if ( useDualLoss == False ):
if ( loss < gap * gap ):
goalvalue = flatinput
raise ValueError ("Goal Reached" )
else :
if (loss < 0 ):
goalvalue = flatinput
raise ValueError ("Goal Reached" )
return loss
#This is the wrapper to provide for scipy that needs flat inputs of double precision
def npgradient (flatinput , model , seed1 , flip , gap ):
input = np .reshape (flatinput , (1 ,3 , 360 , 360 )).astype (np .float32 )
grad = gradient (input ,model ,seed1 ,flip ,gap )
flatgrad = np .reshape (grad ,(- 1 ,))
return flatgrad .astype (np .float64 )
def getArrFromImageName ( imgname ):
image = Image .open (imgname ).convert ('RGB' )
image = image .resize ([360 , 360 ])
arr = np .array (image ).astype (np .float32 ) / 255.0
arr = arr * 2.0 - 1.0
arr = arr .transpose (2 , 0 , 1 ).reshape ([1 , 3 , 360 , 360 ])
return arr
def computeHashInteractiveSession ( imgname , seed1 ):
arr = getArrFromImageName ( imgname )
# We check that the
session = onnxruntime .InferenceSession ("model.onnx" )
inputs = {session .get_inputs ()[0 ].name : arr }
outs = session .run (None , inputs )
hash_output = seed1 .dot (outs [0 ].flatten ())
hash_bits = '' .join (['1' if it >= 0 else '0' for it in hash_output ])
hash_hex = '{:0{}x}' .format (int (hash_bits , 2 ), len (hash_bits ) // 4 )
return hash_bits , hash_hex
def computeHashTF (imgname , compute_features ,seed1 ):
arr = getArrFromImageName (imgname )
out = compute_features (image = arr )
res = out ['leaf/logits' ].numpy () #tf_rep.outputs[0]
hash_output = seed1 .dot (res .flatten ())
hash_bits = '' .join (['1' if it >= 0 else '0' for it in hash_output ])
tfhash_hex = '{:0{}x}' .format (int (hash_bits , 2 ), len (hash_bits ) // 4 )
return hash_bits ,tfhash_hex
def demo ( imageTargetHash , startingImage , outputimage ):
#warnings.filterwarnings('ignore') # Ignore all the warning messages in this tutorial
model = onnx .load ('model.onnx' ) # Load the ONNX file
tf_rep = prepare (model ) # Import the ONNX model to Tensorflow
tf_rep .export_graph ("mytfmodel" ) #We export it to disk
print (tf_rep .inputs ) # Input nodes to the model
print (tf_rep .outputs ) # Output nodes from the model
seed1 = open ("neuralhash_128x96_seed1.dat" ,'rb' ).read ()[128 :]
seed1 = np .frombuffer (seed1 , dtype = np .float32 )
seed1 = seed1 .reshape ([96 , 128 ])
# Preprocess image
hash_bits ,hash_hex = computeHashInteractiveSession (imageTargetHash ,seed1 )
#We load the converted model from disk
mytfmodel = tf .saved_model .load ("mytfmodel" )
compute_features = mytfmodel .signatures ["serving_default" ]
hash_bits_tf ,hash_hex_tf = computeHashTF (imageTargetHash ,compute_features ,seed1 )
if ( hash_hex != hash_hex_tf ):
print ("something went wrong in the tf model export as hash computed with tensorflow isn't the same as hash computed by onnx" )
exit ()
print ("Target hash : " )
print (hash_hex_tf )
flip = [1.0 if x == "0" else - 1.0 for x in hash_bits_tf ]
# when the network is trying to have a 1 for the kth bit, it will try to have the feature in the range [gap, +infinity]
# when the network is trying to have a 0 for the kth bit, it will try to have the feature in the range [-infinity,-gap]
# Otherwise it get penalized
# we use a standard gradient descent
# we can do better using l-bfgs-b optimizer and handle bounds constraints
# we can also add some additional loss to make the result similar to a provided image
# or use a gan-loss to make it look "natural"
initialImage = Image .open (startingImage ).convert ('RGB' )
initialImage = initialImage .resize ([360 , 360 ])
arr = np .array (initialImage ).astype (np .float32 ) / 255.0
arr = arr * 2.0 - 1.0
arr = arr .transpose (2 , 0 , 1 ).reshape ([1 , 3 , 360 , 360 ])
# we initialize loss so that we take at least one iteration
loss = distanceBetweenHashes (arr ,compute_features , seed1 , flip , gap ).numpy ()
print ("initial loss : " )
print (loss )
if useScipyOpt :
flatarr = np .reshape (arr , (- 1 ,)).astype (np .float64 )
print ("before scipy.optimize.fmin_l_bfgs_b" )
bounds = [(- 1.00 ,1.00 ) for x in flatarr ]
try :
optresult = scipy .optimize .minimize (npdistanceBetweenHashes ,flatarr ,jac = npgradient , bounds = bounds ,
args = (compute_features ,seed1 ,flip ,gap ),
method = 'L-BFGS-B' ,
options = {'disp' : True , 'maxcor' :8 }, )
arr = np .reshape (optresult .x , [1 , 3 , 360 , 360 ])
except Exception as e :
print (str (e ))
arr = np .reshape (goalvalue ,[1 ,3 ,360 ,360 ])
print ("after scipy.optimize.fmin_l_bfgs_b" )
else :
print ("will finish when loss <= " + str (gap * gap ) )
while (True ):
grad = gradient (arr ,compute_features , seed1 , flip , gap )
arr -= learning_rate * grad
#We constrain the image to its domain
arr = np .clip (arr ,- 1.0 ,1.0 )
loss = distanceBetweenHashes (arr ,compute_features , seed1 , flip , gap ).numpy ()
print ("loss : " )
print (loss )
if useDualLoss == False :
if loss < gap * gap :
break
else :
if loss < 0 :
break
#arr now contains the result
#At this point the neural hash should match
#But the neural hash still needs to survive the rounding and compression steps
#The greater the gap parameter the more rare this failure will be
#We convert from [-1.0,1.0] -> [0 255]
reshaped = arr .reshape ([3 , 360 , 360 ]).transpose (1 ,2 , 0 )
reshaped = ( (reshaped + 1 ) / 2 * 255.0 ).astype (np .uint8 )
j = Image .fromarray (reshaped )
j .save (outputimage )
print ("Checking the hash of the output image" )
outhash_bits_tf ,outhash_hex_tf = computeHashTF (outputimage , compute_features , seed1 )
print ("output hash : " )
print ( outhash_hex_tf )
print ("target hash : " )
print (hash_hex_tf )
if ( outhash_hex_tf == hash_hex_tf ):
print ("Collision Found !" )
else :
print ("Failed to find collision" )
if __name__ == "__main__" :
if len (sys .argv ) != 4 :
print ("usage python3 collisionNeuralHash.py imageTarget.png startingImage.png outputImage.png" )
exit ()
#We are trying to generate images that have the same hash as imageTarget.png
#Some starting point image are more simple than other
#If the optimization get stuck in local minima try another startingImage
demo (sys .argv [1 ],sys .argv [2 ],sys .argv [3 ])