Created
June 28, 2022 01:16
-
-
Save juan-fdz-hawa/a3eb1cf33f149f7473a37469ecb9feda to your computer and use it in GitHub Desktop.
Revisions
-
juan-fdz-hawa created this gist
Jun 28, 2022 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,161 @@ import json import csv from collections import defaultdict from difflib import SequenceMatcher from functools import lru_cache nvd = None with open('extract.json', 'r') as f_handler: nvd = json.load(f_handler) bundles = defaultdict(set) with open('bundles.csv', 'r') as f_handler: reader = csv.reader(f_handler) # Skip header next(reader, None) for row in reader: # Take everything up to the last part, this is how iOS 7 computes the 'vendor id' # https://developer.apple.com/documentation/uikit/uidevice/1620059-identifierforvendor bundle_id_parts = row[1].split('.') vendor = '.'.join(bundle_id_parts[:len(bundle_id_parts)-1]) exec_name = row[0] bundle_name = row[2] bundles[vendor].add((exec_name, bundle_name,)) software_found = [] software_not_found = [] matched_vendors = {} # Should black list all top level domains vendor_terms_blacklist = { 'com', 'app', 'us', 'net', 'id', 'ui', 'ru', } vendor_terms_overrides = { 'postmanlabs': 'getpostman', 'tinyspeck': 'slack', 'getdropbox': 'dropbox', 'keepcoder': 'telegram', 'virtualbox': 'oracle', 'Cisco-Systems': 'cisco', 'kovidgoyal': 'calibre-ebook', } for b in bundles: found = False for p in b.split('.'): p = vendor_terms_overrides.get(p, p).lower() if p in vendor_terms_blacklist: continue if p in nvd: found = True software_found.append(f"Found: {p} - {b}") matched_vendors[b] = p break if not found: software_not_found.append(f"Not found: {b}") print("----------------------------") print("VENDORS") print("----------------------------") for v in software_found: print(v) for v in software_not_found: print(v) print(len(software_found), len(bundles)) print("\n\n----------------------------") print("Apps") print("----------------------------") software_found = [] software_not_found = [] apps_override = { 'oracle': { 'VirtualBox': 'vm_virtualbox', }, 'agilebits': { '1Password 7':'1password', }, 'zoom': { 'zoom.us': 'zoom', }, 'microsoft': { 'Microsoft AutoUpdate': 'autoupdate', 'Microsoft Edge': 'edge', 'Code': 'visual_studio_code', }, 'osquery': { 'osqueryd': 'osquery' } } for v1, v2 in matched_vendors.items(): installed = bundles[v1] nvd_entries = nvd[v2] for exec_name, bundle_name in installed: n_exec_name = exec_name n_bundle_name = bundle_name if v2 in apps_override: n_exec_name = apps_override[v2].get(exec_name, exec_name) n_bundle_name = apps_override[v2].get(bundle_name, bundle_name) n_exec_name = n_exec_name.lower().replace(' ', '_').replace('-', '_') n_bundle_name = n_bundle_name.lower().replace(' ', '_').replace('-', '_') found = False for nvd_e in nvd_entries: if nvd_e == n_exec_name: software_found.append(f"Vendor: {v2}, Exec name: {exec_name}, NVD entry: {nvd_e}") found = True break if nvd_e == n_bundle_name: software_found.append(f"Vendor: {v2}, Bundle name: {bundle_name}, NVD entry: {nvd_e}") found = True break # Approximate # if not found: # threshold = 0.8 # for nvd_e in nvd_entries: # ratio = SequenceMatcher(None, nvd_e, exec_name).ratio() # if ratio > threshold: # found = True # software_found.append(f"Vendor: {v2}, Exec name: {exec_name}, NVD: {nvd_e}, Ratio: {ratio}") # break # ratio = SequenceMatcher(None, nvd_e, bundle_name).ratio() # if ratio > threshold: # found = True # software_found.append(f"Vendor: {v2}, Bundle name: {exec_name}, NVD: {nvd_e}, Ratio: {ratio}") # break if not found: software_not_found.append(f"Vendor: {v2}, Exec name: {exec_name}, Bundle name: {bundle_name}") software_found.sort() for v in software_found: print(f"Found: {v}") software_not_found.sort() for v in software_not_found: print(f"Not found: {v}") print(f"Found: {len(software_found)} Not found: {len(software_not_found)}")