Created
August 19, 2021 04:38
-
-
Save unrealwill/d64d653a7626b825ef332aa3b2aa1a43 to your computer and use it in GitHub Desktop.
Revisions
-
unrealwill created this gist
Aug 19, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,256 @@ import onnx from onnx_tf.backend import prepare import tensorflow as tf import onnxruntime from PIL import Image import numpy as np import sys #You need to have model.onnx and neuralhash_128x96_seed1.dat in your working directory #See the previous and more simple collisionLSH gist for a simpler and more general case #What's new : LBFGS-B, using the real specific network, an alternative dual loss function #This is a neural hash preimage, some image take longer to converge others converge in 30 LBFGS steps #Using only CPU the amortized time for a collision is between 10 seconds and 10 minutes #To improve it you can add some additional Losses to make the generated image more natural #Or you can use a trained conditionnal image generator and search for the input of such generator so that when you compute the #hash the distance between hashes is minimized #This are the Apple Shape Constant hashLength = 96 featLength = 128 #This are some paramters you can fiddle with to speed-up the convergence process featureScaling = 100.0 #features are divided by this factor gap = 1.0 #How far we want to be from the hyperplane #if this is not enough we may fail by a few bits due to the postprocessing rounding # and truncating float to uint8 and png compression #When not using scipyOpt learning_rate = 1e-1 #Increasing it makes the code take bigger step meaning faster initial convergence but slower final convergence #Increasing the learning rate too much can easily be observed when the loss function is not decreasing at every step #An alternative loss function useDualLoss = False useScipyOpt = True if( useScipyOpt ): import scipy.optimize def distanceBetweenHashes( input,model, seed1, flip , gap ): features = tf.reshape( model(image=input)['leaf/logits'], (1,featLength)) lshfeat = tf.reduce_sum( features*seed1,axis=1) #we scale the features by 100 as a manual preconditionning step as this won't change the sign of the features #flatfeat = tf.nn.l2_normalize( tf.reshape(lshfeat,(-1,)) ) * featureScaling if( useScipyOpt ): flatfeat = tf.reshape(lshfeat,(-1,)) / featureScaling else: flatfeat = tf.reshape(lshfeat, (-1,)) / featureScaling if useDualLoss == False: loss = tf.nn.l2_loss(tf.nn.relu( flatfeat * flip + gap) ) #alternatively we can use a dual_loss #The hard dual loss would be something like #loss = tf.reduce_max(flatfeat * flip) + gap with stopping criteria loss < 0 #but this is not smooth enough so we make a smooth version from it #with a smoothing length else: smoothing_length= 5.0 val = flatfeat * flip #The stopping criterion is only valid if smoothing_length is high enough and gap high enough #To do things correctly we can use the exact hard dual loss stopping criterion loss = tf.reduce_sum( tf.nn.softmax( smoothing_length*val ) * val) + gap #You can add additional loss terms here to make the produced image more natural return loss def gradient(x, model, seed1, flip , gap): input = tf.convert_to_tensor(x, dtype=tf.float32) with tf.GradientTape() as t: t.watch(input) loss = distanceBetweenHashes(input, model ,seed1,flip,gap) return t.gradient(loss, input).numpy() goalvalue = None #This is the wrapper to provide for scipy that needs flat inputs of double precision def npdistanceBetweenHashes( flatinput, model, seed1, flip, gap): # = args global goalvalue input = np.reshape(flatinput,(1,3,360,360)).astype(np.float32) loss = distanceBetweenHashes(input, model, seed1, flip, gap).numpy() print("loss : ") print(loss) if( useDualLoss == False): if( loss < gap*gap): goalvalue = flatinput raise ValueError("Goal Reached") else: if (loss < 0 ): goalvalue = flatinput raise ValueError("Goal Reached") return loss #This is the wrapper to provide for scipy that needs flat inputs of double precision def npgradient(flatinput, model, seed1, flip , gap): input = np.reshape(flatinput, (1,3, 360, 360)).astype(np.float32) grad = gradient(input,model,seed1,flip,gap) flatgrad= np.reshape(grad,(-1,)) return flatgrad.astype(np.float64) def getArrFromImageName( imgname): image = Image.open(imgname).convert('RGB') image = image.resize([360, 360]) arr = np.array(image).astype(np.float32) / 255.0 arr = arr * 2.0 - 1.0 arr = arr.transpose(2, 0, 1).reshape([1, 3, 360, 360]) return arr def computeHashInteractiveSession( imgname, seed1 ): arr = getArrFromImageName( imgname ) # We check that the session = onnxruntime.InferenceSession("model.onnx") inputs = {session.get_inputs()[0].name: arr} outs = session.run(None, inputs) hash_output = seed1.dot(outs[0].flatten()) hash_bits = ''.join(['1' if it >= 0 else '0' for it in hash_output]) hash_hex = '{:0{}x}'.format(int(hash_bits, 2), len(hash_bits) // 4) return hash_bits, hash_hex def computeHashTF(imgname, compute_features,seed1): arr = getArrFromImageName(imgname) out = compute_features(image=arr) res = out['leaf/logits'].numpy() #tf_rep.outputs[0] hash_output = seed1.dot(res.flatten()) hash_bits = ''.join(['1' if it >= 0 else '0' for it in hash_output]) tfhash_hex = '{:0{}x}'.format(int(hash_bits, 2), len(hash_bits) // 4) return hash_bits,tfhash_hex def demo( imageTargetHash, startingImage, outputimage): #warnings.filterwarnings('ignore') # Ignore all the warning messages in this tutorial model = onnx.load('model.onnx') # Load the ONNX file tf_rep = prepare(model) # Import the ONNX model to Tensorflow tf_rep.export_graph("mytfmodel") #We export it to disk print(tf_rep.inputs) # Input nodes to the model print(tf_rep.outputs) # Output nodes from the model seed1 = open("neuralhash_128x96_seed1.dat",'rb').read()[128:] seed1 = np.frombuffer(seed1, dtype=np.float32) seed1 = seed1.reshape([96, 128]) # Preprocess image hash_bits,hash_hex = computeHashInteractiveSession(imageTargetHash,seed1) #We load the converted model from disk mytfmodel = tf.saved_model.load("mytfmodel") compute_features = mytfmodel.signatures["serving_default"] hash_bits_tf,hash_hex_tf = computeHashTF(imageTargetHash,compute_features,seed1) if( hash_hex != hash_hex_tf): print("something went wrong in the tf model export as hash computed with tensorflow isn't the same as hash computed by onnx") exit() print("Target hash : ") print(hash_hex_tf) flip = [1.0 if x == "0" else -1.0 for x in hash_bits_tf] # when the network is trying to have a 1 for the kth bit, it will try to have the feature in the range [gap, +infinity] # when the network is trying to have a 0 for the kth bit, it will try to have the feature in the range [-infinity,-gap] # Otherwise it get penalized # we use a standard gradient descent # we can do better using l-bfgs-b optimizer and handle bounds constraints # we can also add some additional loss to make the result similar to a provided image # or use a gan-loss to make it look "natural" initialImage = Image.open(startingImage).convert('RGB') initialImage = initialImage.resize([360, 360]) arr = np.array(initialImage).astype(np.float32) / 255.0 arr = arr * 2.0 - 1.0 arr = arr.transpose(2, 0, 1).reshape([1, 3, 360, 360]) # we initialize loss so that we take at least one iteration loss = distanceBetweenHashes(arr,compute_features, seed1, flip, gap).numpy() print("initial loss : ") print(loss) if useScipyOpt : flatarr = np.reshape(arr, (-1,)).astype(np.float64) print("before scipy.optimize.fmin_l_bfgs_b") bounds = [(-1.00,1.00) for x in flatarr] try: optresult = scipy.optimize.minimize(npdistanceBetweenHashes,flatarr,jac=npgradient, bounds=bounds, args=(compute_features,seed1,flip,gap), method='L-BFGS-B', options={'disp': True, 'maxcor':8}, ) arr = np.reshape(optresult.x, [1, 3, 360, 360]) except Exception as e: print(str(e)) arr = np.reshape(goalvalue,[1,3,360,360]) print("after scipy.optimize.fmin_l_bfgs_b") else: print("will finish when loss <= " + str(gap * gap) ) while (True): grad = gradient(arr,compute_features, seed1, flip, gap) arr -= learning_rate * grad #We constrain the image to its domain arr = np.clip(arr,-1.0,1.0) loss = distanceBetweenHashes(arr,compute_features, seed1, flip, gap).numpy() print("loss : ") print(loss) if useDualLoss == False: if loss < gap * gap: break else: if loss < 0: break #arr now contains the result #At this point the neural hash should match #But the neural hash still needs to survive the rounding and compression steps #The greater the gap parameter the more rare this failure will be #We convert from [-1.0,1.0] -> [0 255] reshaped = arr.reshape([3, 360, 360]).transpose(1,2, 0) reshaped = ( (reshaped + 1) / 2 * 255.0).astype(np.uint8) j = Image.fromarray(reshaped) j.save(outputimage) print("Checking the hash of the output image") outhash_bits_tf,outhash_hex_tf = computeHashTF(outputimage, compute_features, seed1) print("output hash : ") print( outhash_hex_tf) print("target hash : ") print(hash_hex_tf) if( outhash_hex_tf == hash_hex_tf ): print("Collision Found !") else: print("Failed to find collision") if __name__ == "__main__": if len(sys.argv) != 4 : print("usage python3 collisionNeuralHash.py imageTarget.png startingImage.png outputImage.png") exit() #We are trying to generate images that have the same hash as imageTarget.png #Some starting point image are more simple than other #If the optimization get stuck in local minima try another startingImage demo(sys.argv[1],sys.argv[2],sys.argv[3])