Skip to content

Instantly share code, notes, and snippets.

@juan-fdz-hawa
Created June 28, 2022 01:16
Show Gist options
  • Save juan-fdz-hawa/a3eb1cf33f149f7473a37469ecb9feda to your computer and use it in GitHub Desktop.
Save juan-fdz-hawa/a3eb1cf33f149f7473a37469ecb9feda to your computer and use it in GitHub Desktop.

Revisions

  1. juan-fdz-hawa created this gist Jun 28, 2022.
    161 changes: 161 additions & 0 deletions stats.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,161 @@
    import json
    import csv
    from collections import defaultdict
    from difflib import SequenceMatcher
    from functools import lru_cache


    nvd = None
    with open('extract.json', 'r') as f_handler:
    nvd = json.load(f_handler)


    bundles = defaultdict(set)
    with open('bundles.csv', 'r') as f_handler:
    reader = csv.reader(f_handler)
    # Skip header
    next(reader, None)
    for row in reader:
    # Take everything up to the last part, this is how iOS 7 computes the 'vendor id'
    # https://developer.apple.com/documentation/uikit/uidevice/1620059-identifierforvendor
    bundle_id_parts = row[1].split('.')
    vendor = '.'.join(bundle_id_parts[:len(bundle_id_parts)-1])
    exec_name = row[0]
    bundle_name = row[2]
    bundles[vendor].add((exec_name, bundle_name,))

    software_found = []
    software_not_found = []
    matched_vendors = {}

    # Should black list all top level domains
    vendor_terms_blacklist = {
    'com',
    'app',
    'us',
    'net',
    'id',
    'ui',
    'ru',
    }

    vendor_terms_overrides = {
    'postmanlabs': 'getpostman',
    'tinyspeck': 'slack',
    'getdropbox': 'dropbox',
    'keepcoder': 'telegram',
    'virtualbox': 'oracle',
    'Cisco-Systems': 'cisco',
    'kovidgoyal': 'calibre-ebook',
    }

    for b in bundles:
    found = False
    for p in b.split('.'):
    p = vendor_terms_overrides.get(p, p).lower()

    if p in vendor_terms_blacklist:
    continue

    if p in nvd:
    found = True
    software_found.append(f"Found: {p} - {b}")
    matched_vendors[b] = p
    break

    if not found:
    software_not_found.append(f"Not found: {b}")

    print("----------------------------")
    print("VENDORS")
    print("----------------------------")
    for v in software_found:
    print(v)
    for v in software_not_found:
    print(v)
    print(len(software_found), len(bundles))


    print("\n\n----------------------------")
    print("Apps")
    print("----------------------------")

    software_found = []
    software_not_found = []

    apps_override = {
    'oracle': {
    'VirtualBox': 'vm_virtualbox',

    },
    'agilebits': {
    '1Password 7':'1password',
    },
    'zoom': {
    'zoom.us': 'zoom',
    },
    'microsoft': {
    'Microsoft AutoUpdate': 'autoupdate',
    'Microsoft Edge': 'edge',
    'Code': 'visual_studio_code',
    },
    'osquery': {
    'osqueryd': 'osquery'
    }

    }

    for v1, v2 in matched_vendors.items():
    installed = bundles[v1]
    nvd_entries = nvd[v2]

    for exec_name, bundle_name in installed:
    n_exec_name = exec_name
    n_bundle_name = bundle_name
    if v2 in apps_override:
    n_exec_name = apps_override[v2].get(exec_name, exec_name)
    n_bundle_name = apps_override[v2].get(bundle_name, bundle_name)

    n_exec_name = n_exec_name.lower().replace(' ', '_').replace('-', '_')
    n_bundle_name = n_bundle_name.lower().replace(' ', '_').replace('-', '_')
    found = False

    for nvd_e in nvd_entries:
    if nvd_e == n_exec_name:
    software_found.append(f"Vendor: {v2}, Exec name: {exec_name}, NVD entry: {nvd_e}")
    found = True
    break
    if nvd_e == n_bundle_name:
    software_found.append(f"Vendor: {v2}, Bundle name: {bundle_name}, NVD entry: {nvd_e}")
    found = True
    break

    # Approximate
    # if not found:
    # threshold = 0.8
    # for nvd_e in nvd_entries:
    # ratio = SequenceMatcher(None, nvd_e, exec_name).ratio()
    # if ratio > threshold:
    # found = True
    # software_found.append(f"Vendor: {v2}, Exec name: {exec_name}, NVD: {nvd_e}, Ratio: {ratio}")
    # break

    # ratio = SequenceMatcher(None, nvd_e, bundle_name).ratio()
    # if ratio > threshold:
    # found = True
    # software_found.append(f"Vendor: {v2}, Bundle name: {exec_name}, NVD: {nvd_e}, Ratio: {ratio}")
    # break


    if not found:
    software_not_found.append(f"Vendor: {v2}, Exec name: {exec_name}, Bundle name: {bundle_name}")

    software_found.sort()
    for v in software_found:
    print(f"Found: {v}")

    software_not_found.sort()
    for v in software_not_found:
    print(f"Not found: {v}")

    print(f"Found: {len(software_found)} Not found: {len(software_not_found)}")