Forked from dylanmckay/facebook-contact-info-summary.rb
          
        
    
          Created
          April 3, 2018 15:14 
        
      - 
      
- 
        Save sgwd/c8eeafe764473d1c9370933b82fbe947 to your computer and use it in GitHub Desktop. 
Revisions
- 
        dylanmckay revised this gist Mar 27, 2018 . 1 changed file with 8 additions and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -108,7 +108,13 @@ def print_metadata(metadata, metadata_title:) end def print_timestamps(metadata, metadata_name:) timestamps = metadata.map { |r| r[1].to_s.chomp }.select { |s| s.size > 0 }.map do |t| begin Time.parse(t) rescue ArgumentError # do not parse timestamp if unparseable t end end if timestamps.size > 0 puts "The oldest #{metadata_name} is from #{timestamps.min.to_date}, the most recent at #{timestamps.max.to_date}" @@ -193,3 +199,4 @@ def print_metadata_human(html_doc) hr $stdout.puts "dumped metadata to CSV files at #{Dir.pwd}/csv" dump_metadata_csv(html_doc) 
- 
        dylanmckay revised this gist Mar 25, 2018 . 1 changed file with 24 additions and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -14,6 +14,30 @@ # alongside the 'html' folder. # # This script requires Ruby and the Nokogiri library to be installed. # # Open source licensing # --------------------- # # Dual-licensed under the MIT and Apache 2.0 open source licenses. Either license can be chosen # by any user of the program. # # The MIT license is duplicated here, the Apache 2.0 license can be found here # https://opensource.org/licenses/Apache-2.0 # # The MIT License (MIT) # Copyright (c) 2018 Dylan McKay # # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated # documentation files (the "Software"), to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, # and to permit persons to whom the Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS # OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. require 'nokogiri' require 'time' @@ -169,4 +193,3 @@ def print_metadata_human(html_doc) hr $stdout.puts "dumped metadata to CSV files at #{Dir.pwd}/csv" dump_metadata_csv(html_doc) 
- 
        dylanmckay revised this gist Mar 24, 2018 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,7 @@ #! /usr/bin/env ruby # NOTE: Requires Ruby 2.1 or greater. # This script can be used to parse and dump the information from # the 'html/contact_info.htm' file in a Facebook user data ZIP download. # @@ -167,3 +169,4 @@ def print_metadata_human(html_doc) hr $stdout.puts "dumped metadata to CSV files at #{Dir.pwd}/csv" dump_metadata_csv(html_doc) 
- 
        dylanmckay revised this gist Mar 23, 2018 . 1 changed file with 10 additions and 8 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -39,10 +39,11 @@ def section(title, level: 1) # Returns nil if there is no metadata in this table. # Returns a 2d list of row/colums def extract_table_metadata(metadata_table) headings = metadata_table.css('tr').first.css('th').map(&:text).map(&:chomp) records = metadata_table.css('tr')[1..-1] return nil if records.size <= 1 # many tables are empty (excluding headings). [headings] + records.map do |call_record| call_record.css('td').map(&:text).map(&:chomp).map do |field| if field.include? ' at ' # some fields are dates/times # Time example: "Wednesday, 14 June 2017 at 19:02 UTC+12" @@ -64,13 +65,16 @@ def dig_out_metadata(container:) contact_tables.map do |contact_table| metadata_table = contact_table.css('table')[0] extract_table_metadata(metadata_table) end.compact.select { |t| t.size > 1 } # must include non-header rows end def print_metadata(metadata, metadata_title:) section(metadata_title) do metadata.each do |phone_records| puts indent(2) and puts "Another phone number" puts phone_records.each do |record| indent(2) and puts record.join(", ") end end @@ -135,11 +139,10 @@ def print_metadata_human(html_doc) section("The full list of phone numbers that have stored data") do phone_numbers.each_slice(8).to_a.map { |g| g.join(", ") }.each do |line| indent(2) and $stdout.puts line end end $stdout.puts "A brief summary of phone records" hr $stdout.puts "There are phone records for #{phone_numbers.size} distinct phone numbers" @@ -164,4 +167,3 @@ def print_metadata_human(html_doc) hr $stdout.puts "dumped metadata to CSV files at #{Dir.pwd}/csv" dump_metadata_csv(html_doc) 
- 
        dylanmckay revised this gist Mar 23, 2018 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -46,7 +46,7 @@ def extract_table_metadata(metadata_table) call_record.css('td').map(&:text).map(&:chomp).map do |field| if field.include? ' at ' # some fields are dates/times # Time example: "Wednesday, 14 June 2017 at 19:02 UTC+12" Time.strptime(field, "%A, %e %B %Y at %R UTC%z") rescue field else field # no special processing end 
- 
        dylanmckay revised this gist Mar 23, 2018 . 1 changed file with 9 additions and 1 deletion.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -43,7 +43,14 @@ def extract_table_metadata(metadata_table) return nil if records.size == 0 # many tables are empty. records.map do |call_record| call_record.css('td').map(&:text).map(&:chomp).map do |field| if field.include? ' at ' # some fields are dates/times # Time example: "Wednesday, 14 June 2017 at 19:02 UTC+12" Time.strptime(field, "%A, %e %B %Y at %R UTC%z") else field # no special processing end end end end @@ -157,3 +164,4 @@ def print_metadata_human(html_doc) hr $stdout.puts "dumped metadata to CSV files at #{Dir.pwd}/csv" dump_metadata_csv(html_doc) 
- 
        dylanmckay revised this gist Mar 23, 2018 . 1 changed file with 1 addition and 2 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -87,7 +87,7 @@ def print_status_breakdown(metadata, metadata_name:) end def metadata_to_csv(metadata) metadata.flatten(1).map { |record| record.join(',') }.join("\n") end def dump_metadata_csv(html_doc) @@ -157,4 +157,3 @@ def print_metadata_human(html_doc) hr $stdout.puts "dumped metadata to CSV files at #{Dir.pwd}/csv" dump_metadata_csv(html_doc) 
- 
        dylanmckay revised this gist Mar 23, 2018 . 1 changed file with 31 additions and 5 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,16 +3,19 @@ # This script can be used to parse and dump the information from # the 'html/contact_info.htm' file in a Facebook user data ZIP download. # # It prints all cell phone call + SMS message + MMS records, plus a summary of each. # # It also dumps all of the records into CSV files inside a 'CSV' folder, that is created # in whatever the working directory of the program is when executed. # # Place this script inside the extracted Facebook data download folder # alongside the 'html' folder. # # This script requires Ruby and the Nokogiri library to be installed. require 'nokogiri' require 'time' require 'fileutils' def hr $stdout.puts "-" * 24 @@ -59,7 +62,6 @@ def dig_out_metadata(container:) def print_metadata(metadata, metadata_title:) section(metadata_title) do metadata.each do |record| section("Another Phone Number", :level => 2) do indent(2) and puts record.join(", ") @@ -84,7 +86,27 @@ def print_status_breakdown(metadata, metadata_name:) end end def metadata_to_csv(metadata) metadata.map { |record| record.join(',') }.join("\n") end def dump_metadata_csv(html_doc) call_history_container = html_doc.xpath("//h2[text()='Call History']/following-sibling::div")[0] sms_history_container = html_doc.xpath("//h2[text()='SMS History']/following-sibling::div")[0] mms_history_container = html_doc.xpath("//h2[text()='MMS History']/following-sibling::div")[0] FileUtils.mkdir_p("csv") call_metadata = dig_out_metadata(:container => call_history_container) sms_metadata = dig_out_metadata(:container => sms_history_container) mms_metadata = dig_out_metadata(:container => mms_history_container) File.write(File.join("csv", "call.csv"), metadata_to_csv(call_metadata)) File.write(File.join("csv", "sms.csv"), metadata_to_csv(sms_metadata)) File.write(File.join("csv", "mms.csv"), metadata_to_csv(mms_metadata)) end def print_metadata_human(html_doc) call_history_container = html_doc.xpath("//h2[text()='Call History']/following-sibling::div")[0] sms_history_container = html_doc.xpath("//h2[text()='SMS History']/following-sibling::div")[0] mms_history_container = html_doc.xpath("//h2[text()='MMS History']/following-sibling::div")[0] @@ -129,6 +151,10 @@ def print_call_history(html_doc) html_text = File.read('html/contact_info.htm') html_doc = Nokogiri::HTML(html_text) print_metadata_human(html_doc) $stdout.puts hr $stdout.puts "dumped metadata to CSV files at #{Dir.pwd}/csv" dump_metadata_csv(html_doc) 
- 
        dylanmckay revised this gist Mar 22, 2018 . 1 changed file with 16 additions and 4 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -45,6 +45,10 @@ def extract_table_metadata(metadata_table) end def dig_out_metadata(container:) # If a specific type of metadata is missing (calls, texts, ..), the # container div will simply not be present. return [] if container.nil? contact_tables = container.children.select { |c| c.name == "table" } contact_tables.map do |contact_table| @@ -67,13 +71,17 @@ def print_metadata(metadata, metadata_title:) def print_timestamps(metadata, metadata_name:) timestamps = metadata.map { |r| r[1].to_s.chomp }.select { |s| s.size > 0 }.map { |t| Time.parse(t) } if timestamps.size > 0 puts "The oldest #{metadata_name} is from #{timestamps.min.to_date}, the most recent at #{timestamps.max.to_date}" end end def print_status_breakdown(metadata, metadata_name:) grouped_statuses = metadata.flatten(1).group_by(&:first) if grouped_statuses.size > 0 puts "This includes " + grouped_statuses.map { |status,records| "#{records.size} #{status.downcase} #{metadata_name}"}.join(", ") end end def print_call_history(html_doc) @@ -85,8 +93,12 @@ def print_call_history(html_doc) sms_metadata = dig_out_metadata(:container => sms_history_container) mms_metadata = dig_out_metadata(:container => mms_history_container) if call_history_container phone_numbers = call_history_container.xpath("//b[text()='Number:']/following-sibling::text()") .map(&:text).sort.uniq else phone_numbers = [] end print_metadata(call_metadata, :metadata_title => "Call History") print_metadata(sms_metadata, :metadata_title => "SMS History") 
- 
        dylanmckay revised this gist Mar 22, 2018 . 1 changed file with 10 additions and 0 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,15 @@ #! /usr/bin/env ruby # This script can be used to parse and dump the information from # the 'html/contact_info.htm' file in a Facebook user data ZIP download. # # It dumps all cell phone call + SMS message + MMS records, plus a summary of each. # # Place this script inside the extracted Facebook data download folder # alongside the 'html' folder. # # This script requires Ruby and the Nokogiri library to be installed. require 'nokogiri' require 'byebug' require 'time' 
- 
        dylanmckay revised this gist Mar 22, 2018 . 1 changed file with 4 additions and 13 deletions.There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,16 +1,7 @@ #! /usr/bin/env ruby require 'nokogiri' require 'byebug' require 'time' def hr @@ -101,13 +92,13 @@ def print_call_history(html_doc) $stdout.puts "A brief summary of phone records" hr $stdout.puts "There are phone records for #{phone_numbers.size} distinct phone numbers" $stdout.puts "There are records of #{call_metadata.flatten(1).size} distinct cell phone calls" indent(2) and print_timestamps(call_metadata, :metadata_name => "cell phone call") indent(2) and print_status_breakdown(call_metadata, :metadata_name => "cell phone calls") $stdout.puts "There are records of #{sms_metadata.flatten(1).size} distinct SMS messages" indent(2) and print_timestamps(sms_metadata, :metadata_name => "SMS message") indent(2) and print_status_breakdown(sms_metadata, :metadata_name => "SMS messages") $stdout.puts "There are records of #{mms_metadata.flatten(1).size} distinct MMS messages" indent(2) and print_timestamps(mms_metadata, :metadata_name => "MMS message") indent(2) and print_status_breakdown(mms_metadata, :metadata_name => "MMS messages") hr 
- 
        dylanmckay created this gist Mar 22, 2018 .There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,121 @@ #! /usr/bin/env ruby # This script can be used to parse and dump the information from # the 'html/contact_info.htm' file in a Facebook user data ZIP download. # # It dumps all cell phone call + SMS message + MMS records, plus a summary of each. # # Place this script inside the extracted Facebook data download folder # alongside the 'html' folder. # # This script requires Ruby and the Nokogiri library to be installed. require 'nokogiri' require 'time' def hr $stdout.puts "-" * 24 end def indent(level = 1) $stdout.print " " * (level - 1) $stdout.flush end def section(title, level: 1) indent(level) and hr indent(level) and $stdout.puts title indent(level) and $stdout.puts yield indent(level) and hr indent(level) and $stdout.puts end # Extracts metadata from a call/text/sms/mms table # Returns nil if there is no metadata in this table. # Returns a 2d list of row/colums def extract_table_metadata(metadata_table) records = metadata_table.css('tr')[1..-1] return nil if records.size == 0 # many tables are empty. records.map do |call_record| call_record.css('td').map(&:text).map(&:chomp) end end def dig_out_metadata(container:) contact_tables = container.children.select { |c| c.name == "table" } contact_tables.map do |contact_table| metadata_table = contact_table.css('table')[0] extract_table_metadata(metadata_table) end.compact end def print_metadata(metadata, metadata_title:) section(metadata_title) do byebug if metadata.include? nil metadata.each do |record| section("Another Phone Number", :level => 2) do indent(2) and puts record.join(", ") end end end end def print_timestamps(metadata, metadata_name:) timestamps = metadata.map { |r| r[1].to_s.chomp }.select { |s| s.size > 0 }.map { |t| Time.parse(t) } puts "The oldest #{metadata_name} is from #{timestamps.min.to_date}, the most recent at #{timestamps.max.to_date}" end def print_status_breakdown(metadata, metadata_name:) grouped_statuses = metadata.flatten(1).group_by(&:first) puts "This includes " + grouped_statuses.map { |status,records| "#{records.size} #{status.downcase} #{metadata_name}"}.join(", ") end def print_call_history(html_doc) call_history_container = html_doc.xpath("//h2[text()='Call History']/following-sibling::div")[0] sms_history_container = html_doc.xpath("//h2[text()='SMS History']/following-sibling::div")[0] mms_history_container = html_doc.xpath("//h2[text()='MMS History']/following-sibling::div")[0] call_metadata = dig_out_metadata(:container => call_history_container) sms_metadata = dig_out_metadata(:container => sms_history_container) mms_metadata = dig_out_metadata(:container => mms_history_container) phone_numbers = call_history_container.xpath("//b[text()='Number:']/following-sibling::text()") .map(&:text).sort.uniq print_metadata(call_metadata, :metadata_title => "Call History") print_metadata(sms_metadata, :metadata_title => "SMS History") print_metadata(mms_metadata, :metadata_title => "MMS History") section("The full list of phone numbers that have stored data") do phone_numbers.each_slice(8).to_a.map { |g| g.join(", ") }.each do |line| indent(2) and puts line end end $stdout.puts "A brief summary of phone records" hr $stdout.puts "There are phone records for #{phone_numbers.size} distinct phone numbers" $stdout.puts "There are records of #{call_metadata.size} distinct cell phone calls" indent(2) and print_timestamps(call_metadata, :metadata_name => "cell phone call") indent(2) and print_status_breakdown(call_metadata, :metadata_name => "cell phone calls") $stdout.puts "There are records of #{sms_metadata.size} distinct SMS messages" indent(2) and print_timestamps(sms_metadata, :metadata_name => "SMS message") indent(2) and print_status_breakdown(sms_metadata, :metadata_name => "SMS messages") $stdout.puts "There are records of #{mms_metadata.size} distinct MMS messages" indent(2) and print_timestamps(mms_metadata, :metadata_name => "MMS message") indent(2) and print_status_breakdown(mms_metadata, :metadata_name => "MMS messages") hr end html_text = File.read('html/contact_info.htm') html_doc = Nokogiri::HTML(html_text) print_call_history(html_doc)