georgy7 · March 8, 2017 18:09 · Apr 2, 2015 · Apr 2, 2015 · Apr 2, 2015 · Apr 2, 2015
diff --git a/find-duplicate-files.rb b/find-duplicate-files.rb
@@ -50,15 +50,14 @@
 File.open(output, 'w') do |f|
   f.puts '['
   hash.each do |key, filename_array|
-    if filename_array.length > 1
-      record = {}
-      record['files'] = filename_array
-      record['md5'] = key
-
-      f.puts ',' if counter > 0
-      f.write JSON.pretty_generate(record)
-      counter += 1
-    end
+    next if filename_array.length <= 1
+    record = {}
+    record['files'] = filename_array
+    record['md5'] = key
+
+    f.puts ',' if counter > 0
+    f.write JSON.pretty_generate(record)
+    counter += 1
   end
   f.puts "\n]"
 end

diff --git a/find-duplicate-files.rb b/find-duplicate-files.rb
@@ -9,7 +9,7 @@
 # 2. Run the script without any arguments.
 # 3. Watch the progress.
 # 4. Get your dublicates.json file.
-# 
+#
 # https://gist.github.com/georgy7/a8ab4d5a2e90282b189c
 # Forked from https://gist.github.com/mattdipasquale/571405
 # Dot (unix hidden) files and folders are ignored.
@@ -19,45 +19,45 @@
 hash = {}
 
 output = 'dublicates.json'
-raise "#{output} already exists" if File.exists?(output)
+fail "#{output} already exists" if File.exist?(output)
 
 puts 'Exploring subdirectories. It may take a long time.'
 counter = 0
 
-Dir.glob("**/*").each do |filename|
+Dir.glob('**/*').each do |filename|
   next if File.directory?(filename)
   puts "Start!\n" if counter < 1
 
   begin
     key = Digest::MD5.file(filename).to_s
-    if hash.has_key? key
+    if hash.key? key
       hash[key].push filename
     else
       hash[key] = [filename]
     end
-  rescue  
+  rescue
     puts "Error processing #{filename}"
   end
-  
-  counter = counter + 1
+
+  counter += 1
   sleep(0.005 * rand)
   puts "#{counter} calculated (#{filename})." if 0 == counter % 1000
 end
 
 puts "\nWriting #{output}"
 counter = 0
 
-File.open(output,"w") do |f|
+File.open(output, 'w') do |f|
   f.puts '['
   hash.each do |key, filename_array|
     if filename_array.length > 1
       record = {}
-      record['files'] = filename_array 
+      record['files'] = filename_array
       record['md5'] = key
 
-      f.puts "," if counter > 0
+      f.puts ',' if counter > 0
       f.write JSON.pretty_generate(record)
-      counter = counter + 1
+      counter += 1
     end
   end
   f.puts "\n]"

diff --git a/find-duplicate-files.rb b/find-duplicate-files.rb
@@ -1,18 +1,24 @@
+#! /usr/bin/ruby
+
+require 'rubygems'
 require 'digest/md5'
+require 'json'
 
 # Usage:
-# 1. Locate a folder in console.
+# 1. Locate a folder where you want to search dublicates in console.
 # 2. Run the script without any arguments.
 # 3. Watch the progress.
-# 4. Get your dublicates.txt file.
+# 4. Get your dublicates.json file.
 # 
+# https://gist.github.com/georgy7/a8ab4d5a2e90282b189c
 # Forked from https://gist.github.com/mattdipasquale/571405
 # Dot (unix hidden) files and folders are ignored.
 # Warning: This script is *very* IO intensive. It can freeze your PC down.
+# It's provided 'as-is', without any express or implied warranty, etc.
 
 hash = {}
 
-output = 'dublicates.txt'
+output = 'dublicates.json'
 raise "#{output} already exists" if File.exists?(output)
 
 puts 'Exploring subdirectories. It may take a long time.'
@@ -38,15 +44,23 @@
   puts "#{counter} calculated (#{filename})." if 0 == counter % 1000
 end
 
-puts "\nWriting of output file..."
+puts "\nWriting #{output}"
+counter = 0
 
 File.open(output,"w") do |f|
-  hash.each_value do |filename_array|
+  f.puts '['
+  hash.each do |key, filename_array|
     if filename_array.length > 1
-      f.puts "=== Identical Files ===\n"
-      filename_array.each { |filename| f.puts '  '+filename }
+      record = {}
+      record['files'] = filename_array 
+      record['md5'] = key
+
+      f.puts "," if counter > 0
+      f.write JSON.pretty_generate(record)
+      counter = counter + 1
     end
   end
+  f.puts "\n]"
 end
 
 puts "Done.\n"
diff --git a/find-duplicate-files.rb b/find-duplicate-files.rb
@@ -15,17 +15,16 @@
 output = 'dublicates.txt'
 raise "#{output} already exists" if File.exists?(output)
 
-puts 'Exploring subdirectories, calculating MD5...'
+puts 'Exploring subdirectories. It may take a long time.'
 counter = 0
 
 Dir.glob("**/*").each do |filename|
   next if File.directory?(filename)
-  # puts 'Checking ' + filename
+  puts "Start!\n" if counter < 1
 
   begin
     key = Digest::MD5.file(filename).to_s
     if hash.has_key? key
-      # puts "same file #{filename}"
       hash[key].push filename
     else
       hash[key] = [filename]
@@ -36,19 +35,18 @@
 
   counter = counter + 1
   sleep(0.005 * rand)
-  if 0 == counter % 1000
-    puts "#{counter} calculated (#{filename})."
-    sleep(0.5 * rand)
-  end
+  puts "#{counter} calculated (#{filename})." if 0 == counter % 1000
 end
 
-puts 'Exploring finished.'
+puts "\nWriting of output file..."
 
 File.open(output,"w") do |f|
   hash.each_value do |filename_array|
     if filename_array.length > 1
       f.puts "=== Identical Files ===\n"
-      filename_array.each { |filename| puts '  '+filename }
+      filename_array.each { |filename| f.puts '  '+filename }
     end
   end
 end
+
+puts "Done.\n"
diff --git a/find-duplicate-files.rb b/find-duplicate-files.rb
@@ -35,7 +35,7 @@
   end
 
   counter = counter + 1
-  sleep(0.01 * rand)
+  sleep(0.005 * rand)
   if 0 == counter % 1000
     puts "#{counter} calculated (#{filename})."
     sleep(0.5 * rand)

diff --git a/find-duplicate-files.rb b/find-duplicate-files.rb
@@ -1,23 +1,54 @@
 require 'digest/md5'
 
+# Usage:
+# 1. Locate a folder in console.
+# 2. Run the script without any arguments.
+# 3. Watch the progress.
+# 4. Get your dublicates.txt file.
+# 
+# Forked from https://gist.github.com/mattdipasquale/571405
+# Dot (unix hidden) files and folders are ignored.
+# Warning: This script is *very* IO intensive. It can freeze your PC down.
+
 hash = {}
 
-Dir.glob("**/*", File::FNM_DOTMATCH).each do |filename|
+output = 'dublicates.txt'
+raise "#{output} already exists" if File.exists?(output)
+
+puts 'Exploring subdirectories, calculating MD5...'
+counter = 0
+
+Dir.glob("**/*").each do |filename|
   next if File.directory?(filename)
   # puts 'Checking ' + filename
 
-  key = Digest::MD5.hexdigest(IO.read(filename)).to_sym
-  if hash.has_key? key
-    # puts "same file #{filename}"
-    hash[key].push filename
-  else
-    hash[key] = [filename]
+  begin
+    key = Digest::MD5.file(filename).to_s
+    if hash.has_key? key
+      # puts "same file #{filename}"
+      hash[key].push filename
+    else
+      hash[key] = [filename]
+    end
+  rescue  
+    puts "Error processing #{filename}"
+  end
+
+  counter = counter + 1
+  sleep(0.01 * rand)
+  if 0 == counter % 1000
+    puts "#{counter} calculated (#{filename})."
+    sleep(0.5 * rand)
   end
 end
 
-hash.each_value do |filename_array|
-  if filename_array.length > 1
-    puts "=== Identical Files ===\n"
-    filename_array.each { |filename| puts '  '+filename }
+puts 'Exploring finished.'
+
+File.open(output,"w") do |f|
+  hash.each_value do |filename_array|
+    if filename_array.length > 1
+      f.puts "=== Identical Files ===\n"
+      filename_array.each { |filename| puts '  '+filename }
+    end
   end
 end
diff --git a/find-duplicate-files.rb b/find-duplicate-files.rb
@@ -0,0 +1,23 @@
+require 'digest/md5'
+
+hash = {}
+
+Dir.glob("**/*", File::FNM_DOTMATCH).each do |filename|
+  next if File.directory?(filename)
+  # puts 'Checking ' + filename
+
+  key = Digest::MD5.hexdigest(IO.read(filename)).to_sym
+  if hash.has_key? key
+    # puts "same file #{filename}"
+    hash[key].push filename
+  else
+    hash[key] = [filename]
+  end
+end
+
+hash.each_value do |filename_array|
+  if filename_array.length > 1
+    puts "=== Identical Files ===\n"
+    filename_array.each { |filename| puts '  '+filename }
+  end
+end
No results found