Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save sgwd/c8eeafe764473d1c9370933b82fbe947 to your computer and use it in GitHub Desktop.
Save sgwd/c8eeafe764473d1c9370933b82fbe947 to your computer and use it in GitHub Desktop.

Revisions

  1. @dylanmckay dylanmckay revised this gist Mar 27, 2018. 1 changed file with 8 additions and 1 deletion.
    9 changes: 8 additions & 1 deletion facebook-contact-info-summary.rb
    Original file line number Diff line number Diff line change
    @@ -108,7 +108,13 @@ def print_metadata(metadata, metadata_title:)
    end

    def print_timestamps(metadata, metadata_name:)
    timestamps = metadata.map { |r| r[1].to_s.chomp }.select { |s| s.size > 0 }.map { |t| Time.parse(t) }
    timestamps = metadata.map { |r| r[1].to_s.chomp }.select { |s| s.size > 0 }.map do |t|
    begin
    Time.parse(t)
    rescue ArgumentError # do not parse timestamp if unparseable
    t
    end
    end

    if timestamps.size > 0
    puts "The oldest #{metadata_name} is from #{timestamps.min.to_date}, the most recent at #{timestamps.max.to_date}"
    @@ -193,3 +199,4 @@ def print_metadata_human(html_doc)
    hr
    $stdout.puts "dumped metadata to CSV files at #{Dir.pwd}/csv"
    dump_metadata_csv(html_doc)

  2. @dylanmckay dylanmckay revised this gist Mar 25, 2018. 1 changed file with 24 additions and 1 deletion.
    25 changes: 24 additions & 1 deletion facebook-contact-info-summary.rb
    Original file line number Diff line number Diff line change
    @@ -14,6 +14,30 @@
    # alongside the 'html' folder.
    #
    # This script requires Ruby and the Nokogiri library to be installed.
    #
    # Open source licensing
    # ---------------------
    #
    # Dual-licensed under the MIT and Apache 2.0 open source licenses. Either license can be chosen
    # by any user of the program.
    #
    # The MIT license is duplicated here, the Apache 2.0 license can be found here
    # https://opensource.org/licenses/Apache-2.0
    #
    # The MIT License (MIT)
    # Copyright (c) 2018 Dylan McKay
    #
    # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
    # documentation files (the "Software"), to deal in the Software without restriction, including without limitation
    # the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    # and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
    #
    # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
    #
    # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
    # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
    # OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
    # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

    require 'nokogiri'
    require 'time'
    @@ -169,4 +193,3 @@ def print_metadata_human(html_doc)
    hr
    $stdout.puts "dumped metadata to CSV files at #{Dir.pwd}/csv"
    dump_metadata_csv(html_doc)

  3. @dylanmckay dylanmckay revised this gist Mar 24, 2018. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions facebook-contact-info-summary.rb
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,7 @@
    #! /usr/bin/env ruby

    # NOTE: Requires Ruby 2.1 or greater.

    # This script can be used to parse and dump the information from
    # the 'html/contact_info.htm' file in a Facebook user data ZIP download.
    #
    @@ -167,3 +169,4 @@ def print_metadata_human(html_doc)
    hr
    $stdout.puts "dumped metadata to CSV files at #{Dir.pwd}/csv"
    dump_metadata_csv(html_doc)

  4. @dylanmckay dylanmckay revised this gist Mar 23, 2018. 1 changed file with 10 additions and 8 deletions.
    18 changes: 10 additions & 8 deletions facebook-contact-info-summary.rb
    Original file line number Diff line number Diff line change
    @@ -39,10 +39,11 @@ def section(title, level: 1)
    # Returns nil if there is no metadata in this table.
    # Returns a 2d list of row/colums
    def extract_table_metadata(metadata_table)
    headings = metadata_table.css('tr').first.css('th').map(&:text).map(&:chomp)
    records = metadata_table.css('tr')[1..-1]
    return nil if records.size == 0 # many tables are empty.
    return nil if records.size <= 1 # many tables are empty (excluding headings).

    records.map do |call_record|
    [headings] + records.map do |call_record|
    call_record.css('td').map(&:text).map(&:chomp).map do |field|
    if field.include? ' at ' # some fields are dates/times
    # Time example: "Wednesday, 14 June 2017 at 19:02 UTC+12"
    @@ -64,13 +65,16 @@ def dig_out_metadata(container:)
    contact_tables.map do |contact_table|
    metadata_table = contact_table.css('table')[0]
    extract_table_metadata(metadata_table)
    end.compact
    end.compact.select { |t| t.size > 1 } # must include non-header rows
    end

    def print_metadata(metadata, metadata_title:)
    section(metadata_title) do
    metadata.each do |record|
    section("Another Phone Number", :level => 2) do
    metadata.each do |phone_records|
    puts
    indent(2) and puts "Another phone number"
    puts
    phone_records.each do |record|
    indent(2) and puts record.join(", ")
    end
    end
    @@ -135,11 +139,10 @@ def print_metadata_human(html_doc)

    section("The full list of phone numbers that have stored data") do
    phone_numbers.each_slice(8).to_a.map { |g| g.join(", ") }.each do |line|
    indent(2) and puts line
    indent(2) and $stdout.puts line
    end
    end


    $stdout.puts "A brief summary of phone records"
    hr
    $stdout.puts "There are phone records for #{phone_numbers.size} distinct phone numbers"
    @@ -164,4 +167,3 @@ def print_metadata_human(html_doc)
    hr
    $stdout.puts "dumped metadata to CSV files at #{Dir.pwd}/csv"
    dump_metadata_csv(html_doc)

  5. @dylanmckay dylanmckay revised this gist Mar 23, 2018. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion facebook-contact-info-summary.rb
    Original file line number Diff line number Diff line change
    @@ -46,7 +46,7 @@ def extract_table_metadata(metadata_table)
    call_record.css('td').map(&:text).map(&:chomp).map do |field|
    if field.include? ' at ' # some fields are dates/times
    # Time example: "Wednesday, 14 June 2017 at 19:02 UTC+12"
    Time.strptime(field, "%A, %e %B %Y at %R UTC%z")
    Time.strptime(field, "%A, %e %B %Y at %R UTC%z") rescue field
    else
    field # no special processing
    end
  6. @dylanmckay dylanmckay revised this gist Mar 23, 2018. 1 changed file with 9 additions and 1 deletion.
    10 changes: 9 additions & 1 deletion facebook-contact-info-summary.rb
    Original file line number Diff line number Diff line change
    @@ -43,7 +43,14 @@ def extract_table_metadata(metadata_table)
    return nil if records.size == 0 # many tables are empty.

    records.map do |call_record|
    call_record.css('td').map(&:text).map(&:chomp)
    call_record.css('td').map(&:text).map(&:chomp).map do |field|
    if field.include? ' at ' # some fields are dates/times
    # Time example: "Wednesday, 14 June 2017 at 19:02 UTC+12"
    Time.strptime(field, "%A, %e %B %Y at %R UTC%z")
    else
    field # no special processing
    end
    end
    end
    end

    @@ -157,3 +164,4 @@ def print_metadata_human(html_doc)
    hr
    $stdout.puts "dumped metadata to CSV files at #{Dir.pwd}/csv"
    dump_metadata_csv(html_doc)

  7. @dylanmckay dylanmckay revised this gist Mar 23, 2018. 1 changed file with 1 addition and 2 deletions.
    3 changes: 1 addition & 2 deletions facebook-contact-info-summary.rb
    Original file line number Diff line number Diff line change
    @@ -87,7 +87,7 @@ def print_status_breakdown(metadata, metadata_name:)
    end

    def metadata_to_csv(metadata)
    metadata.map { |record| record.join(',') }.join("\n")
    metadata.flatten(1).map { |record| record.join(',') }.join("\n")
    end

    def dump_metadata_csv(html_doc)
    @@ -157,4 +157,3 @@ def print_metadata_human(html_doc)
    hr
    $stdout.puts "dumped metadata to CSV files at #{Dir.pwd}/csv"
    dump_metadata_csv(html_doc)

  8. @dylanmckay dylanmckay revised this gist Mar 23, 2018. 1 changed file with 31 additions and 5 deletions.
    36 changes: 31 additions & 5 deletions facebook-contact-info-summary.rb
    Original file line number Diff line number Diff line change
    @@ -3,16 +3,19 @@
    # This script can be used to parse and dump the information from
    # the 'html/contact_info.htm' file in a Facebook user data ZIP download.
    #
    # It dumps all cell phone call + SMS message + MMS records, plus a summary of each.
    # It prints all cell phone call + SMS message + MMS records, plus a summary of each.
    #
    # It also dumps all of the records into CSV files inside a 'CSV' folder, that is created
    # in whatever the working directory of the program is when executed.
    #
    # Place this script inside the extracted Facebook data download folder
    # alongside the 'html' folder.
    #
    # This script requires Ruby and the Nokogiri library to be installed.

    require 'nokogiri'
    require 'byebug'
    require 'time'
    require 'fileutils'

    def hr
    $stdout.puts "-" * 24
    @@ -59,7 +62,6 @@ def dig_out_metadata(container:)

    def print_metadata(metadata, metadata_title:)
    section(metadata_title) do
    byebug if metadata.include? nil
    metadata.each do |record|
    section("Another Phone Number", :level => 2) do
    indent(2) and puts record.join(", ")
    @@ -84,7 +86,27 @@ def print_status_breakdown(metadata, metadata_name:)
    end
    end

    def print_call_history(html_doc)
    def metadata_to_csv(metadata)
    metadata.map { |record| record.join(',') }.join("\n")
    end

    def dump_metadata_csv(html_doc)
    call_history_container = html_doc.xpath("//h2[text()='Call History']/following-sibling::div")[0]
    sms_history_container = html_doc.xpath("//h2[text()='SMS History']/following-sibling::div")[0]
    mms_history_container = html_doc.xpath("//h2[text()='MMS History']/following-sibling::div")[0]

    FileUtils.mkdir_p("csv")

    call_metadata = dig_out_metadata(:container => call_history_container)
    sms_metadata = dig_out_metadata(:container => sms_history_container)
    mms_metadata = dig_out_metadata(:container => mms_history_container)

    File.write(File.join("csv", "call.csv"), metadata_to_csv(call_metadata))
    File.write(File.join("csv", "sms.csv"), metadata_to_csv(sms_metadata))
    File.write(File.join("csv", "mms.csv"), metadata_to_csv(mms_metadata))
    end

    def print_metadata_human(html_doc)
    call_history_container = html_doc.xpath("//h2[text()='Call History']/following-sibling::div")[0]
    sms_history_container = html_doc.xpath("//h2[text()='SMS History']/following-sibling::div")[0]
    mms_history_container = html_doc.xpath("//h2[text()='MMS History']/following-sibling::div")[0]
    @@ -129,6 +151,10 @@ def print_call_history(html_doc)
    html_text = File.read('html/contact_info.htm')
    html_doc = Nokogiri::HTML(html_text)

    print_call_history(html_doc)
    print_metadata_human(html_doc)

    $stdout.puts
    hr
    $stdout.puts "dumped metadata to CSV files at #{Dir.pwd}/csv"
    dump_metadata_csv(html_doc)

  9. @dylanmckay dylanmckay revised this gist Mar 22, 2018. 1 changed file with 16 additions and 4 deletions.
    20 changes: 16 additions & 4 deletions facebook-contact-info-summary.rb
    Original file line number Diff line number Diff line change
    @@ -45,6 +45,10 @@ def extract_table_metadata(metadata_table)
    end

    def dig_out_metadata(container:)
    # If a specific type of metadata is missing (calls, texts, ..), the
    # container div will simply not be present.
    return [] if container.nil?

    contact_tables = container.children.select { |c| c.name == "table" }

    contact_tables.map do |contact_table|
    @@ -67,13 +71,17 @@ def print_metadata(metadata, metadata_title:)
    def print_timestamps(metadata, metadata_name:)
    timestamps = metadata.map { |r| r[1].to_s.chomp }.select { |s| s.size > 0 }.map { |t| Time.parse(t) }

    puts "The oldest #{metadata_name} is from #{timestamps.min.to_date}, the most recent at #{timestamps.max.to_date}"
    if timestamps.size > 0
    puts "The oldest #{metadata_name} is from #{timestamps.min.to_date}, the most recent at #{timestamps.max.to_date}"
    end
    end

    def print_status_breakdown(metadata, metadata_name:)
    grouped_statuses = metadata.flatten(1).group_by(&:first)

    puts "This includes " + grouped_statuses.map { |status,records| "#{records.size} #{status.downcase} #{metadata_name}"}.join(", ")
    if grouped_statuses.size > 0
    puts "This includes " + grouped_statuses.map { |status,records| "#{records.size} #{status.downcase} #{metadata_name}"}.join(", ")
    end
    end

    def print_call_history(html_doc)
    @@ -85,8 +93,12 @@ def print_call_history(html_doc)
    sms_metadata = dig_out_metadata(:container => sms_history_container)
    mms_metadata = dig_out_metadata(:container => mms_history_container)

    phone_numbers = call_history_container.xpath("//b[text()='Number:']/following-sibling::text()")
    .map(&:text).sort.uniq
    if call_history_container
    phone_numbers = call_history_container.xpath("//b[text()='Number:']/following-sibling::text()")
    .map(&:text).sort.uniq
    else
    phone_numbers = []
    end

    print_metadata(call_metadata, :metadata_title => "Call History")
    print_metadata(sms_metadata, :metadata_title => "SMS History")
  10. @dylanmckay dylanmckay revised this gist Mar 22, 2018. 1 changed file with 10 additions and 0 deletions.
    10 changes: 10 additions & 0 deletions facebook-contact-info-summary.rb
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,15 @@
    #! /usr/bin/env ruby

    # This script can be used to parse and dump the information from
    # the 'html/contact_info.htm' file in a Facebook user data ZIP download.
    #
    # It dumps all cell phone call + SMS message + MMS records, plus a summary of each.
    #
    # Place this script inside the extracted Facebook data download folder
    # alongside the 'html' folder.
    #
    # This script requires Ruby and the Nokogiri library to be installed.

    require 'nokogiri'
    require 'byebug'
    require 'time'
  11. @dylanmckay dylanmckay revised this gist Mar 22, 2018. 1 changed file with 4 additions and 13 deletions.
    17 changes: 4 additions & 13 deletions facebook-contact-info-summary.rb
    Original file line number Diff line number Diff line change
    @@ -1,16 +1,7 @@
    #! /usr/bin/env ruby

    # This script can be used to parse and dump the information from
    # the 'html/contact_info.htm' file in a Facebook user data ZIP download.
    #
    # It dumps all cell phone call + SMS message + MMS records, plus a summary of each.
    #
    # Place this script inside the extracted Facebook data download folder
    # alongside the 'html' folder.
    #
    # This script requires Ruby and the Nokogiri library to be installed.

    require 'nokogiri'
    require 'byebug'
    require 'time'

    def hr
    @@ -101,13 +92,13 @@ def print_call_history(html_doc)
    $stdout.puts "A brief summary of phone records"
    hr
    $stdout.puts "There are phone records for #{phone_numbers.size} distinct phone numbers"
    $stdout.puts "There are records of #{call_metadata.size} distinct cell phone calls"
    $stdout.puts "There are records of #{call_metadata.flatten(1).size} distinct cell phone calls"
    indent(2) and print_timestamps(call_metadata, :metadata_name => "cell phone call")
    indent(2) and print_status_breakdown(call_metadata, :metadata_name => "cell phone calls")
    $stdout.puts "There are records of #{sms_metadata.size} distinct SMS messages"
    $stdout.puts "There are records of #{sms_metadata.flatten(1).size} distinct SMS messages"
    indent(2) and print_timestamps(sms_metadata, :metadata_name => "SMS message")
    indent(2) and print_status_breakdown(sms_metadata, :metadata_name => "SMS messages")
    $stdout.puts "There are records of #{mms_metadata.size} distinct MMS messages"
    $stdout.puts "There are records of #{mms_metadata.flatten(1).size} distinct MMS messages"
    indent(2) and print_timestamps(mms_metadata, :metadata_name => "MMS message")
    indent(2) and print_status_breakdown(mms_metadata, :metadata_name => "MMS messages")
    hr
  12. @dylanmckay dylanmckay created this gist Mar 22, 2018.
    121 changes: 121 additions & 0 deletions facebook-contact-info-summary.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,121 @@
    #! /usr/bin/env ruby

    # This script can be used to parse and dump the information from
    # the 'html/contact_info.htm' file in a Facebook user data ZIP download.
    #
    # It dumps all cell phone call + SMS message + MMS records, plus a summary of each.
    #
    # Place this script inside the extracted Facebook data download folder
    # alongside the 'html' folder.
    #
    # This script requires Ruby and the Nokogiri library to be installed.

    require 'nokogiri'
    require 'time'

    def hr
    $stdout.puts "-" * 24
    end

    def indent(level = 1)
    $stdout.print " " * (level - 1)
    $stdout.flush
    end

    def section(title, level: 1)
    indent(level) and hr
    indent(level) and $stdout.puts title
    indent(level) and $stdout.puts
    yield
    indent(level) and hr
    indent(level) and $stdout.puts
    end

    # Extracts metadata from a call/text/sms/mms table
    # Returns nil if there is no metadata in this table.
    # Returns a 2d list of row/colums
    def extract_table_metadata(metadata_table)
    records = metadata_table.css('tr')[1..-1]
    return nil if records.size == 0 # many tables are empty.

    records.map do |call_record|
    call_record.css('td').map(&:text).map(&:chomp)
    end
    end

    def dig_out_metadata(container:)
    contact_tables = container.children.select { |c| c.name == "table" }

    contact_tables.map do |contact_table|
    metadata_table = contact_table.css('table')[0]
    extract_table_metadata(metadata_table)
    end.compact
    end

    def print_metadata(metadata, metadata_title:)
    section(metadata_title) do
    byebug if metadata.include? nil
    metadata.each do |record|
    section("Another Phone Number", :level => 2) do
    indent(2) and puts record.join(", ")
    end
    end
    end
    end

    def print_timestamps(metadata, metadata_name:)
    timestamps = metadata.map { |r| r[1].to_s.chomp }.select { |s| s.size > 0 }.map { |t| Time.parse(t) }

    puts "The oldest #{metadata_name} is from #{timestamps.min.to_date}, the most recent at #{timestamps.max.to_date}"
    end

    def print_status_breakdown(metadata, metadata_name:)
    grouped_statuses = metadata.flatten(1).group_by(&:first)

    puts "This includes " + grouped_statuses.map { |status,records| "#{records.size} #{status.downcase} #{metadata_name}"}.join(", ")
    end

    def print_call_history(html_doc)
    call_history_container = html_doc.xpath("//h2[text()='Call History']/following-sibling::div")[0]
    sms_history_container = html_doc.xpath("//h2[text()='SMS History']/following-sibling::div")[0]
    mms_history_container = html_doc.xpath("//h2[text()='MMS History']/following-sibling::div")[0]

    call_metadata = dig_out_metadata(:container => call_history_container)
    sms_metadata = dig_out_metadata(:container => sms_history_container)
    mms_metadata = dig_out_metadata(:container => mms_history_container)

    phone_numbers = call_history_container.xpath("//b[text()='Number:']/following-sibling::text()")
    .map(&:text).sort.uniq

    print_metadata(call_metadata, :metadata_title => "Call History")
    print_metadata(sms_metadata, :metadata_title => "SMS History")
    print_metadata(mms_metadata, :metadata_title => "MMS History")

    section("The full list of phone numbers that have stored data") do
    phone_numbers.each_slice(8).to_a.map { |g| g.join(", ") }.each do |line|
    indent(2) and puts line
    end
    end


    $stdout.puts "A brief summary of phone records"
    hr
    $stdout.puts "There are phone records for #{phone_numbers.size} distinct phone numbers"
    $stdout.puts "There are records of #{call_metadata.size} distinct cell phone calls"
    indent(2) and print_timestamps(call_metadata, :metadata_name => "cell phone call")
    indent(2) and print_status_breakdown(call_metadata, :metadata_name => "cell phone calls")
    $stdout.puts "There are records of #{sms_metadata.size} distinct SMS messages"
    indent(2) and print_timestamps(sms_metadata, :metadata_name => "SMS message")
    indent(2) and print_status_breakdown(sms_metadata, :metadata_name => "SMS messages")
    $stdout.puts "There are records of #{mms_metadata.size} distinct MMS messages"
    indent(2) and print_timestamps(mms_metadata, :metadata_name => "MMS message")
    indent(2) and print_status_breakdown(mms_metadata, :metadata_name => "MMS messages")
    hr
    end

    html_text = File.read('html/contact_info.htm')
    html_doc = Nokogiri::HTML(html_text)

    print_call_history(html_doc)