AzureDVBB · November 19, 2018 22:34 · Nov 19, 2018
diff --git a/computations_parallel.py b/computations_parallel.py
@@ -0,0 +1,271 @@
+# -*- coding: utf-8 -*-
+
+# look into 'streamz' package, neat pipelining with dask integration
+import cv2 # opencv-python for frame reading
+import skimage # scikit-image for loaded image analysis
+import dask # parallelized python EZ mode
+import numpy as np # yep
+import matplotlib.pyplot as plt # pretty charts no?
+import matplotlib
+from skimage.feature import match_descriptors, ORB
+from skimage.measure import ransac
+from skimage.transform import FundamentalMatrixTransform
+import os
+
+#standard libs
+import time
+import random
+import itertools
+
+# due to bugs in scikit-video with opening and reading files
+# resorted to using OpenCV for reading frames
+
+class VideoFile_p:
+
+    def __init__(self, file):
+        self.file = file
+        # look at opencv documentation: Flags for video I/O
+        # the cv2 properties did not function properly,
+        # passing the integer value of the flag did
+        self.capture = cv2.VideoCapture(self.file)
+        self.number_of_frames = int(self.capture.get(7))
+        self.current_index = 0
+
+    def __len__(self):
+        return self.number_of_frames
+
+    def __iter__(self):
+        self.current_index = 0
+        self.capture = cv2.VideoCapture(self.file)
+        self.number_of_frames = int(self.capture.get(7))
+        return self
+
+    def __next__(self):
+        self.current_index += 1
+        ret, frame = self.capture.read() # ret is false at EOF
+        if ret is False:
+            self.current_index = None
+            self.capture = None
+            raise StopIteration
+        elif ret is True:
+            # cv2 opens in bgr mode and needs to be converted to RGB
+            return {'index': self.current_index,
+                    'raw_frame': cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)}
+
+    def save_frames(self, list_of_frames, save_folder):
+        # checks and makes the directory path if not existing
+        if not os.path.exists(save_folder):
+            os.makedirs(save_folder)
+        reader = iter(self)
+        for img in reader:
+            # premature termination on last frame write
+            if img['index'] > max(list_of_frames):
+                break
+
+            elif img['index'] in list_of_frames:
+                # padding out number for up to 6 digits
+                filename = os.path.join(save_folder, str(img['index']).zfill(6)) + '.jpg'
+                frame = cv2.cvtColor(img['raw_frame'], cv2.COLOR_RGB2BGR)
+                cv2.imwrite(filename, frame)
+
+
+from skimage.feature import match_descriptors, ORB
+from skimage.measure import ransac
+from skimage.transform import FundamentalMatrixTransform
+import numpy as np
+from numba import jit
+
+class Analysis_p:
+
+    @staticmethod
+    def compute_keypoints_descriptors_blur(frame, n_keypoints = 500,
+                                           opencv=True, sift=False):
+        # using opencv to reduce dependencies needed
+        # estimate blur by taking the image's laplacian vairance (blurry=low)
+        blur = cv2.Laplacian(frame['raw_frame'], cv2.CV_64F).var()
+
+        if not opencv:
+            # skimage has poor detection speed, 10x slower as of writing this
+            # keeping it here if in the future its better
+
+            if sift:
+                orb = cv2.SIFT(nfeatures=n_keypoints)
+            else:
+                orb = skimage.feature.ORB(n_keypoints = n_keypoints, downscale=2)
+            # skimage ORB needs grayscale image
+            orb.detect_and_extract(skimage.color.rgb2gray(frame['raw_frame']))
+            keypoints = orb.keypoints
+            descriptors = orb.descriptors
+
+            return {'index': frame['index'], 'blur': blur,
+                    'keypoints': keypoints, 'descriptors': descriptors}
+
+        else:
+            # boilerplate from opencv python reference
+            orb = cv2.ORB_create(nfeatures = n_keypoints) # Initiate ORB detector
+            keypoints_o = orb.detect(frame['raw_frame'], None)
+            keypoints_o, descriptors = orb.compute(frame['raw_frame'], keypoints_o)
+            # make keypoints compatible with scikit-image
+            # array of [[x, y],] coords ndarray
+            keypoints = np.ndarray(shape=(n_keypoints, 2), dtype=np.int64)
+
+            try:
+                for i, k in enumerate(keypoints_o, start=0):
+                    keypoints[i] = k.pt
+            except:
+                keypoints = None # if something goes catastrophically wrong
+
+        #cannot pickle openCV keypoint objects unfortunately, need to convert to coords (x,y aray)
+        return {'index': frame['index'], 'blur': blur,
+                'keypoints': keypoints, 'descriptors': descriptors}
+
+
+    @staticmethod
+    def match_frames(frame1, frame2, minsamples=8, maxtrials=100, opencv=False):
+        if opencv is False:
+            # skimage has nicer matching then opencv
+            # modified boilerplate example code from doc of skimage
+            # ORB
+            matches = match_descriptors(frame1['descriptors'],
+                                        frame2['descriptors'],
+                                        cross_check = True)
+            try:
+                # filtering out outliers, note first return is 'model', we dont care
+                _, inliers = ransac((frame1['keypoints'][matches[:, 0]],
+                                    frame2['keypoints'][matches[:, 1]]),
+                                    FundamentalMatrixTransform,
+                                    min_samples = minsamples,
+                                    residual_threshold = 1, max_trials = maxtrials)
+
+                # only the number of inliers matter to us
+                inliers_sum = inliers.sum()
+                #inliers_sum = len(matches)
+
+            except:
+                # just show raw matches if RANSAC errors out
+                inliers_sum = len(matches)
+
+            finally:
+                return inliers_sum
+
+        else:
+            pass # I doubt anyone wants to use opencv here
+
+
+class FrameSelection_p:
+
+    def __init__(self):
+        pass
+
+    def variance_picker(matches_to_base_frame, min_variance=0.1):
+        new = None
+        old = None
+
+        for i, _ in enumerate(matches_to_base_frame, start=0):
+            if old is None:
+                old = matches_to_base_frame[i]
+                new = old
+            else:
+                old = new
+                #new = sum(matches_to_base_frame[:i])/(i+1)
+                new = matches_to_base_frame[i]
+                variance = abs(new - old) / old
+
+                if variance <= min_variance:
+                    return i
+
+        return None # too much variance in dataset
+
+    def compute_best_frames(frame_stream, last_frame_index, client,
+                            batch_size=10, min_variance=0.05):
+
+        from itertools import repeat, islice
+
+        last_frame_index = last_frame_index-1 # removes infinite loop bug
+        frame_generator = itertools.islice(vid_stream, last_frame_index)
+        base_descriptor = None
+        batch_num = 1
+        descriptor_collection = []
+        found_at_collection_index = None
+        matches_to_base_frame = []
+        good_frame_indexes = [1]   # include first frame
+        last_batch = False
+
+
+        while True:
+
+            if base_descriptor is None:
+                base_descriptor = client.submit(
+                        Analysis_p.compute_keypoints_descriptors_blur,
+                        next(frame_generator))
+
+            # check if the next batch is the last one
+            if good_frame_indexes[-1] + batch_num*batch_size >= last_frame_index:
+                if last_batch is True:
+                    break  # end the loop if it has been
+                else:
+                    last_batch = True
+                    # put the appropriate amount onto the collection
+                    futures = client.map(Analysis_p.compute_keypoints_descriptors_blur,
+                                         islice(frame_generator,
+                                                last_frame_index -
+                                                good_frame_indexes[-1] -
+                                                batch_size * (batch_num - 1)))
+
+                    descriptor_collection += futures
+
+            else:
+                futures = client.map(Analysis_p.compute_keypoints_descriptors_blur,
+                                     islice(frame_generator,
+                                            batch_size * batch_num -
+                                            len(descriptor_collection)))
+
+                descriptor_collection += futures
+
+            # match all elements in the collection against base
+            match_num_futures = client.map(Analysis_p.match_frames,
+                                        repeat(base_descriptor),
+                                        islice(descriptor_collection,
+                                               len(matches_to_base_frame),
+                                               batch_size * batch_num))
+            # TODO: the above method passes the entire slice into method
+            # need to fix that so it only sends base future and collection future
+            matches_to_base_frame += client.gather(match_num_futures)
+
+            # selection pass
+            found_at_collection_index = FrameSelection_p.variance_picker(
+                                                        matches_to_base_frame,
+                                                        min_variance=min_variance)
+
+            if found_at_collection_index is not None:
+                # save the frame's index as good
+                frame_index = descriptor_collection[found_at_collection_index].result()['index']
+                base_descriptor = descriptor_collection[found_at_collection_index]
+                good_frame_indexes.append(frame_index)
+                # make the good frame the base
+                base_descriptor = descriptor_collection[found_at_collection_index]
+
+                # delete frame dictionaries at new base and before
+                # and reset variables
+                del descriptor_collection[:found_at_collection_index+1]
+                found_at_collection_index = None
+                matches_to_base_frame.clear()
+                batch_num = 1
+
+            else:
+                # if not found then repeat
+                batch_num += 1
+
+        # repeat untill input frames are exhausted
+
+        return good_frame_indexes # finished
+
+if __name__ == '__main__':
+    from dask.distributed import Client
+    file = 'G:/_SFMDatasets/VideoCodeTest/ground.mp4'
+    client = Client('tcp://127.0.0.1:8786') #change address for cluster's one
+    vid_stream = VideoFile_p(file)
+    #slc = itertools.islice(vid_stream, 2000)
+    good = FrameSelection_p.compute_best_frames(vid_stream, vid_stream.number_of_frames, client,
+                                                min_variance=0.08, batch_size=20)
+    vid_stream.save_frames(good, 'J:/selected_video')