Skip to content

Instantly share code, notes, and snippets.

@isagalaev
Forked from bobuk/img.py
Last active December 11, 2015 05:48
Show Gist options
  • Save isagalaev/4554182 to your computer and use it in GitHub Desktop.
Save isagalaev/4554182 to your computer and use it in GitHub Desktop.

Revisions

  1. isagalaev revised this gist Jan 17, 2013. 1 changed file with 3 additions and 9 deletions.
    12 changes: 3 additions & 9 deletions img.py
    Original file line number Diff line number Diff line change
    @@ -1,9 +1,6 @@
    #!/usr/bin/env python
    # coding: utf-8
    #
    # Video of this screencast: https://vimeo.com/57296525
    #
    #

    import os
    import random
    import glob
    @@ -39,7 +36,7 @@ def duplicates(dirname):
    """
    files = glob.glob(os.path.join(dirname, '*.jpg'))
    images = [(f, image_data(f)) for f in files]
    # random.shuffle(images)
    random.shuffle(images)

    for filename, data in images:
    distances = [(distance(data, d), f) for f, d in images]
    @@ -60,7 +57,4 @@ def html(groups):


    if __name__ == '__main__':
    reference = open('reference.html').read()
    result = html(duplicates('/home/maniac/Desktop/4554182')) + '\n'
    print(reference.replace('\n', '') == result.replace('\n', ''))
    # print(html(duplicates('/home/maniac/Desktop/4554182')))
    print(html(duplicates('/home/maniac/Desktop/4554182')))
  2. isagalaev revised this gist Jan 17, 2013. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions img.py
    Original file line number Diff line number Diff line change
    @@ -23,7 +23,7 @@ def image_data(filename):
    Get data from image ready for comparison
    """
    img = Image.open(filename).resize((BLOCK_SIZE, BLOCK_SIZE), Image.BILINEAR)
    return filename, numpy.array([sum(x) for x in img.getdata()])
    return numpy.array([sum(x) for x in img.getdata()])

    def distance(data1, data2):
    """
    @@ -38,7 +38,7 @@ def duplicates(dirname):
    Returns an iterator of image groups ([], [], ... [])
    """
    files = glob.glob(os.path.join(dirname, '*.jpg'))
    images = [image_data(f) for f in files]
    images = [(f, image_data(f)) for f in files]
    # random.shuffle(images)

    for filename, data in images:
  3. isagalaev revised this gist Jan 17, 2013. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion img.py
    Original file line number Diff line number Diff line change
    @@ -46,7 +46,7 @@ def duplicates(dirname):
    yield sorted([(dist, f) for dist, f in distances if dist < MAX_DISTANCE])

    def html_group(group):
    return '\n'.join(
    return ''.join(
    '<img src="%s" width="%s"/>%s' % (os.path.basename(f), WIDTH, dist)
    for dist, f in group
    )
  4. isagalaev revised this gist Jan 17, 2013. 1 changed file with 3 additions and 6 deletions.
    9 changes: 3 additions & 6 deletions img.py
    Original file line number Diff line number Diff line change
    @@ -55,15 +55,12 @@ def html(groups):
    """
    Generates HTML from groups of image duplicates
    """
    body = '\n<hr/>\n'.join(html_group(g) for g in groups)
    return '''<html><body>
    %s
    <hr/>
    </body></html>''' % body
    body = '<hr/>'.join(html_group(g) for g in groups)
    return '<html><body>%s<hr/></body></html>' % body


    if __name__ == '__main__':
    reference = open('reference.html').read()
    result = html(duplicates('/home/maniac/Desktop/4554182')) + '\n'
    print(reference == result)
    print(reference.replace('\n', '') == result.replace('\n', ''))
    # print(html(duplicates('/home/maniac/Desktop/4554182')))
  5. isagalaev revised this gist Jan 17, 2013. 1 changed file with 5 additions and 5 deletions.
    10 changes: 5 additions & 5 deletions img.py
    Original file line number Diff line number Diff line change
    @@ -55,12 +55,12 @@ def html(groups):
    """
    Generates HTML from groups of image duplicates
    """
    res = ['<html><body>']
    res += ['\n<hr/>\n'.join(html_group(g) for g in groups)]
    res += ['<hr/>']
    res += ['</body></html>']
    body = '\n<hr/>\n'.join(html_group(g) for g in groups)
    return '''<html><body>
    %s
    <hr/>
    </body></html>''' % body

    return '\n'.join(res)

    if __name__ == '__main__':
    reference = open('reference.html').read()
  6. isagalaev revised this gist Jan 17, 2013. 1 changed file with 2 additions and 3 deletions.
    5 changes: 2 additions & 3 deletions img.py
    Original file line number Diff line number Diff line change
    @@ -56,9 +56,8 @@ def html(groups):
    Generates HTML from groups of image duplicates
    """
    res = ['<html><body>']
    for group in groups:
    res += [html_group(group)]
    res += ['<hr/>']
    res += ['\n<hr/>\n'.join(html_group(g) for g in groups)]
    res += ['<hr/>']
    res += ['</body></html>']

    return '\n'.join(res)
  7. isagalaev revised this gist Jan 17, 2013. 1 changed file with 8 additions and 3 deletions.
    11 changes: 8 additions & 3 deletions img.py
    Original file line number Diff line number Diff line change
    @@ -45,15 +45,19 @@ def duplicates(dirname):
    distances = [(distance(data, d), f) for f, d in images]
    yield sorted([(dist, f) for dist, f in distances if dist < MAX_DISTANCE])

    def html_group(group):
    return '\n'.join(
    '<img src="%s" width="%s"/>%s' % (os.path.basename(f), WIDTH, dist)
    for dist, f in group
    )

    def html(groups):
    """
    Generates HTML from groups of image duplicates
    """
    res = ['<html><body>']
    for group in groups:
    res += [
    '<img src="%s" width="%s"/>%s' % (os.path.basename(f), WIDTH, dist)
    for dist, f in group]
    res += [html_group(group)]
    res += ['<hr/>']
    res += ['</body></html>']

    @@ -63,3 +67,4 @@ def html(groups):
    reference = open('reference.html').read()
    result = html(duplicates('/home/maniac/Desktop/4554182')) + '\n'
    print(reference == result)
    # print(html(duplicates('/home/maniac/Desktop/4554182')))
  8. isagalaev revised this gist Jan 17, 2013. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions img.py
    Original file line number Diff line number Diff line change
    @@ -42,8 +42,8 @@ def duplicates(dirname):
    # random.shuffle(images)

    for filename, data in images:
    distances = [(distance(data, d), f, d) for f, d in images]
    yield sorted([(dist, f, d) for dist, f, d in distances if dist < MAX_DISTANCE])
    distances = [(distance(data, d), f) for f, d in images]
    yield sorted([(dist, f) for dist, f in distances if dist < MAX_DISTANCE])

    def html(groups):
    """
    @@ -53,7 +53,7 @@ def html(groups):
    for group in groups:
    res += [
    '<img src="%s" width="%s"/>%s' % (os.path.basename(f), WIDTH, dist)
    for dist, f, d in group]
    for dist, f in group]
    res += ['<hr/>']
    res += ['</body></html>']

  9. isagalaev revised this gist Jan 17, 2013. 1 changed file with 3 additions and 2 deletions.
    5 changes: 3 additions & 2 deletions img.py
    Original file line number Diff line number Diff line change
    @@ -42,7 +42,8 @@ def duplicates(dirname):
    # random.shuffle(images)

    for filename, data in images:
    yield sorted([(distance(data, d), f, d) for f, d in images])
    distances = [(distance(data, d), f, d) for f, d in images]
    yield sorted([(dist, f, d) for dist, f, d in distances if dist < MAX_DISTANCE])

    def html(groups):
    """
    @@ -52,7 +53,7 @@ def html(groups):
    for group in groups:
    res += [
    '<img src="%s" width="%s"/>%s' % (os.path.basename(f), WIDTH, dist)
    for dist, f, d in group if dist < MAX_DISTANCE]
    for dist, f, d in group]
    res += ['<hr/>']
    res += ['</body></html>']

  10. isagalaev revised this gist Jan 17, 2013. 1 changed file with 13 additions and 6 deletions.
    19 changes: 13 additions & 6 deletions img.py
    Original file line number Diff line number Diff line change
    @@ -31,27 +31,34 @@ def distance(data1, data2):
    """
    return sum(1 for x in data1 - data2 if abs(x) > THRESHOLD)

    def html(dirname):
    def duplicates(dirname):
    """
    Generates HTML from a set of images in a directory.
    Finds duplicate images in a directory.
    All files must be *.jpg.
    Returns an iterator of image groups ([], [], ... [])
    """
    files = glob.glob(os.path.join(dirname, '*.jpg'))
    images = [image_data(f) for f in files]
    # random.shuffle(images)

    res = ['<html><body>']
    for filename, data in images:
    distances = sorted([ (distance(data, d), f, d) for f, d in images ])
    yield sorted([(distance(data, d), f, d) for f, d in images])

    def html(groups):
    """
    Generates HTML from groups of image duplicates
    """
    res = ['<html><body>']
    for group in groups:
    res += [
    '<img src="%s" width="%s"/>%s' % (os.path.basename(f), WIDTH, dist)
    for dist, f, d in distances if dist < MAX_DISTANCE]
    for dist, f, d in group if dist < MAX_DISTANCE]
    res += ['<hr/>']
    res += ['</body></html>']

    return '\n'.join(res)

    if __name__ == '__main__':
    reference = open('reference.html').read()
    result = html('/home/maniac/Desktop/4554182') + '\n'
    result = html(duplicates('/home/maniac/Desktop/4554182')) + '\n'
    print(reference == result)
  11. isagalaev revised this gist Jan 17, 2013. 1 changed file with 9 additions and 8 deletions.
    17 changes: 9 additions & 8 deletions img.py
    Original file line number Diff line number Diff line change
    @@ -6,13 +6,16 @@
    #
    import os
    import random
    import glob

    from PIL import Image
    import numpy


    BLOCK_SIZE = 20
    THRESHOLD = 60
    WIDTH = 200
    MAX_DISTANCE = 220


    def image_data(filename):
    @@ -33,18 +36,16 @@ def html(dirname):
    Generates HTML from a set of images in a directory.
    All files must be *.jpg.
    """
    images = \
    [image_data(os.path.join(dirname, filename)) \
    for filename in os.listdir(dirname)
    if filename.endswith('.jpg')]
    files = glob.glob(os.path.join(dirname, '*.jpg'))
    images = [image_data(f) for f in files]
    # random.shuffle(images)

    res = ['<html><body>']
    for img in images:
    distances = sorted([ (distance(img[1], x[1]), x) for x in images ])
    for filename, data in images:
    distances = sorted([ (distance(data, d), f, d) for f, d in images ])
    res += [
    '<img src="' + os.path.basename(x[0]) + '" width="200"/>' + str(dist)
    for dist, x in distances if dist < 220]
    '<img src="%s" width="%s"/>%s' % (os.path.basename(f), WIDTH, dist)
    for dist, f, d in distances if dist < MAX_DISTANCE]
    res += ['<hr/>']
    res += ['</body></html>']

  12. isagalaev revised this gist Jan 17, 2013. 1 changed file with 3 additions and 10 deletions.
    13 changes: 3 additions & 10 deletions img.py
    Original file line number Diff line number Diff line change
    @@ -7,7 +7,7 @@
    import os
    import random

    from PIL import Image as pImage
    from PIL import Image
    import numpy


    @@ -19,15 +19,8 @@ def image_data(filename):
    """
    Get data from image ready for comparison
    """
    filename = filename
    img = pImage.open(filename)
    small = img.resize( (BLOCK_SIZE, BLOCK_SIZE),
    pImage.BILINEAR )
    t_data = numpy.array(
    [sum(list(x)) for x in small.getdata()]
    )
    del img, small
    return filename, t_data
    img = Image.open(filename).resize((BLOCK_SIZE, BLOCK_SIZE), Image.BILINEAR)
    return filename, numpy.array([sum(x) for x in img.getdata()])

    def distance(data1, data2):
    """
  13. isagalaev revised this gist Jan 17, 2013. 1 changed file with 22 additions and 31 deletions.
    53 changes: 22 additions & 31 deletions img.py
    Original file line number Diff line number Diff line change
    @@ -35,38 +35,29 @@ def distance(data1, data2):
    """
    return sum(1 for x in data1 - data2 if abs(x) > THRESHOLD)

    class ImageList:
    """List of images information, built from directory.
    All files must be *.jpg"""
    def __init__(self, dirname):
    self.dirname = dirname
    self.load()

    def load(self):
    self.images = \
    [image_data(os.path.join(self.dirname, filename)) \
    for filename in os.listdir(self.dirname)
    if filename.endswith('.jpg')]
    # random.shuffle(self.images)
    return self

    def __repr__(self):
    return '\n'.join( ( x.filename for x in self.images ) )

    def html(self):
    res = ['<html><body>']
    for img in self.images:
    distances = sorted([ (distance(img[1], x[1]), x) for x in self.images ])
    res += [
    '<img src="' + os.path.basename(x[0]) + '" width="200"/>' + str(dist)
    for dist, x in distances if dist < 220]
    res += ['<hr/>']
    res += ['</body></html>']

    return '\n'.join(res)
    def html(dirname):
    """
    Generates HTML from a set of images in a directory.
    All files must be *.jpg.
    """
    images = \
    [image_data(os.path.join(dirname, filename)) \
    for filename in os.listdir(dirname)
    if filename.endswith('.jpg')]
    # random.shuffle(images)

    res = ['<html><body>']
    for img in images:
    distances = sorted([ (distance(img[1], x[1]), x) for x in images ])
    res += [
    '<img src="' + os.path.basename(x[0]) + '" width="200"/>' + str(dist)
    for dist, x in distances if dist < 220]
    res += ['<hr/>']
    res += ['</body></html>']

    return '\n'.join(res)

    if __name__ == '__main__':
    reference = open('reference.html').read()
    il = ImageList('/home/maniac/Desktop/4554182')
    result = il.html() + '\n'
    result = html('/home/maniac/Desktop/4554182') + '\n'
    print(reference == result)
  14. isagalaev revised this gist Jan 17, 2013. 1 changed file with 18 additions and 16 deletions.
    34 changes: 18 additions & 16 deletions img.py
    Original file line number Diff line number Diff line change
    @@ -15,23 +15,25 @@
    THRESHOLD = 60


    class Image:
    """Take an information from image file"""
    def __init__(self, filename):
    self.filename = filename
    img = pImage.open(self.filename)
    small = img.resize( (BLOCK_SIZE, BLOCK_SIZE),
    pImage.BILINEAR )
    self.t_data = numpy.array(
    [sum(list(x)) for x in small.getdata()]
    )
    del img, small
    def image_data(filename):
    """
    Get data from image ready for comparison
    """
    filename = filename
    img = pImage.open(filename)
    small = img.resize( (BLOCK_SIZE, BLOCK_SIZE),
    pImage.BILINEAR )
    t_data = numpy.array(
    [sum(list(x)) for x in small.getdata()]
    )
    del img, small
    return filename, t_data

    def distance(self, other):
    def distance(data1, data2):
    """
    Logical distance between two images on a scale 0..400
    """
    return sum(1 for x in self.t_data - other.t_data if abs(x) > THRESHOLD)
    return sum(1 for x in data1 - data2 if abs(x) > THRESHOLD)

    class ImageList:
    """List of images information, built from directory.
    @@ -42,7 +44,7 @@ def __init__(self, dirname):

    def load(self):
    self.images = \
    [Image(os.path.join(self.dirname, filename)) \
    [image_data(os.path.join(self.dirname, filename)) \
    for filename in os.listdir(self.dirname)
    if filename.endswith('.jpg')]
    # random.shuffle(self.images)
    @@ -54,9 +56,9 @@ def __repr__(self):
    def html(self):
    res = ['<html><body>']
    for img in self.images:
    distances = sorted([ (distance(img, x), x) for x in self.images ])
    distances = sorted([ (distance(img[1], x[1]), x) for x in self.images ])
    res += [
    '<img src="' + os.path.basename(x.filename) + '" width="200"/>' + str(dist)
    '<img src="' + os.path.basename(x[0]) + '" width="200"/>' + str(dist)
    for dist, x in distances if dist < 220]
    res += ['<hr/>']
    res += ['</body></html>']
  15. isagalaev revised this gist Jan 17, 2013. 1 changed file with 16 additions and 19 deletions.
    35 changes: 16 additions & 19 deletions img.py
    Original file line number Diff line number Diff line change
    @@ -1,40 +1,37 @@
    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # coding: utf-8
    #
    # Video of this screencast: https://vimeo.com/57296525
    #
    #
    from __future__ import print_function, division, absolute_import
    import os
    import random

    from PIL import Image as pImage
    import numpy

    import os
    import random

    class Image:
    """Take an information from image file"""
    BLOCK_SIZE = 20
    THRESHOLD = 60

    BLOCK_SIZE = 20
    TRESHOLD = 60

    class Image:
    """Take an information from image file"""
    def __init__(self, filename):
    self.filename = filename

    def load(self):
    img = pImage.open(self.filename)
    small = img.resize( (Image.BLOCK_SIZE, Image.BLOCK_SIZE),
    small = img.resize( (BLOCK_SIZE, BLOCK_SIZE),
    pImage.BILINEAR )
    self.t_data = numpy.array(
    [sum(list(x)) for x in small.getdata()]
    )
    del img, small
    return self

    def __repr__(self):
    return self.filename

    def __mul__(self, other):
    return sum(1 for x in self.t_data - other.t_data if abs(x) > Image.TRESHOLD)
    def distance(self, other):
    """
    Logical distance between two images on a scale 0..400
    """
    return sum(1 for x in self.t_data - other.t_data if abs(x) > THRESHOLD)

    class ImageList:
    """List of images information, built from directory.
    @@ -45,7 +42,7 @@ def __init__(self, dirname):

    def load(self):
    self.images = \
    [Image(os.path.join(self.dirname, filename)).load() \
    [Image(os.path.join(self.dirname, filename)) \
    for filename in os.listdir(self.dirname)
    if filename.endswith('.jpg')]
    # random.shuffle(self.images)
    @@ -57,7 +54,7 @@ def __repr__(self):
    def html(self):
    res = ['<html><body>']
    for img in self.images:
    distances = sorted([ (img * x, x) for x in self.images ])
    distances = sorted([ (distance(img, x), x) for x in self.images ])
    res += [
    '<img src="' + os.path.basename(x.filename) + '" width="200"/>' + str(dist)
    for dist, x in distances if dist < 220]
  16. isagalaev revised this gist Jan 17, 2013. 1 changed file with 8 additions and 6 deletions.
    14 changes: 8 additions & 6 deletions img.py
    Original file line number Diff line number Diff line change
    @@ -1,9 +1,9 @@
    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    #
    #
    # Video of this screencast: https://vimeo.com/57296525
    #
    #
    #
    #
    from __future__ import print_function, division, absolute_import
    from PIL import Image as pImage
    import numpy
    @@ -48,7 +48,7 @@ def load(self):
    [Image(os.path.join(self.dirname, filename)).load() \
    for filename in os.listdir(self.dirname)
    if filename.endswith('.jpg')]
    random.shuffle(self.images)
    # random.shuffle(self.images)
    return self

    def __repr__(self):
    @@ -67,5 +67,7 @@ def html(self):
    return '\n'.join(res)

    if __name__ == '__main__':
    il = ImageList('/Users/bobuk/,misc/wm')
    print(il.html())
    reference = open('reference.html').read()
    il = ImageList('/home/maniac/Desktop/4554182')
    result = il.html() + '\n'
    print(reference == result)
  17. Grigory Bakunov revised this gist Jan 13, 2013. 1 changed file with 4 additions and 1 deletion.
    5 changes: 4 additions & 1 deletion img.py
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,9 @@
    #!/usr/bin/env python
    # -*- coding: utf-8 -*-

    #
    # Video of this screencast: https://vimeo.com/57296525
    #
    #
    from __future__ import print_function, division, absolute_import
    from PIL import Image as pImage
    import numpy
  18. Grigory Bakunov revised this gist Jan 13, 2013. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion img.py
    Original file line number Diff line number Diff line change
    @@ -34,7 +34,7 @@ def __mul__(self, other):
    return sum(1 for x in self.t_data - other.t_data if abs(x) > Image.TRESHOLD)

    class ImageList:
    """List of images information, build from directory.
    """List of images information, built from directory.
    All files must be *.jpg"""
    def __init__(self, dirname):
    self.dirname = dirname
  19. Grigory Bakunov created this gist Jan 13, 2013.
    68 changes: 68 additions & 0 deletions img.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,68 @@
    #!/usr/bin/env python
    # -*- coding: utf-8 -*-

    from __future__ import print_function, division, absolute_import
    from PIL import Image as pImage
    import numpy

    import os
    import random

    class Image:
    """Take an information from image file"""

    BLOCK_SIZE = 20
    TRESHOLD = 60

    def __init__(self, filename):
    self.filename = filename

    def load(self):
    img = pImage.open(self.filename)
    small = img.resize( (Image.BLOCK_SIZE, Image.BLOCK_SIZE),
    pImage.BILINEAR )
    self.t_data = numpy.array(
    [sum(list(x)) for x in small.getdata()]
    )
    del img, small
    return self

    def __repr__(self):
    return self.filename

    def __mul__(self, other):
    return sum(1 for x in self.t_data - other.t_data if abs(x) > Image.TRESHOLD)

    class ImageList:
    """List of images information, build from directory.
    All files must be *.jpg"""
    def __init__(self, dirname):
    self.dirname = dirname
    self.load()

    def load(self):
    self.images = \
    [Image(os.path.join(self.dirname, filename)).load() \
    for filename in os.listdir(self.dirname)
    if filename.endswith('.jpg')]
    random.shuffle(self.images)
    return self

    def __repr__(self):
    return '\n'.join( ( x.filename for x in self.images ) )

    def html(self):
    res = ['<html><body>']
    for img in self.images:
    distances = sorted([ (img * x, x) for x in self.images ])
    res += [
    '<img src="' + os.path.basename(x.filename) + '" width="200"/>' + str(dist)
    for dist, x in distances if dist < 220]
    res += ['<hr/>']
    res += ['</body></html>']

    return '\n'.join(res)

    if __name__ == '__main__':
    il = ImageList('/Users/bobuk/,misc/wm')
    print(il.html())