// To compile: clang++ -arch x86_64 -arch arm64 -std=c++20 library_injector.cpp -lbsm -lEndpointSecurity -o library_injector, // then codesign with com.apple.developer.endpoint-security.client and run the // program as root. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __arm64__ #include #elif __x86_64__ #include #else #error "Only arm64 and x86_64 are currently supported" #endif #if __has_feature(ptrauth_calls) #include #endif #include #include #include #include #include #include #include #include #define ensure(condition) \ do { \ if (!(condition)) { \ throw std::runtime_error(std::string("") + "Check \"" + #condition "\" failed at " + \ __FILE__ + ":" + std::to_string(__LINE__) + " in function " + __FUNCTION__); \ } \ } while (0) #define CS_OPS_STATUS 0 #define CS_ENFORCEMENT 0x00001000 extern "C" { int csops(pid_t pid, unsigned int ops, void *useraddr, size_t usersize); }; auto is_translated(pid_t pid) { auto name = std::array{CTL_KERN, KERN_PROC, KERN_PROC_PID, pid}; kinfo_proc proc; size_t size = sizeof(proc); ensure(!sysctl(name.data(), name.size(), &proc, &size, nullptr, 0) && size == sizeof(proc)); return !!(proc.kp_proc.p_flag & P_TRANSLATED); } auto is_cs_enforced(pid_t pid) { int flags; ensure(!csops(pid, CS_OPS_STATUS, &flags, sizeof(flags))); return !!(flags & CS_ENFORCEMENT); } template T scan(task_port_t task, std::uintptr_t &address) { T t; vm_size_t count; ensure(vm_read_overwrite(task, address, sizeof(t), reinterpret_cast(&t), &count) == KERN_SUCCESS && count == sizeof(t)); address += sizeof(t); return t; } std::vector read_string_array(task_port_t task, std::uintptr_t &base) { auto strings = std::vector{}; std::uintptr_t string; do { string = scan(task, base); strings.push_back(string); } while (string); strings.pop_back(); return strings; } std::string read_string(task_port_t task, std::uintptr_t address) { auto string = std::string{}; char c; do { c = scan(task, address); string.push_back(c); } while (c); string.pop_back(); return string; } std::uintptr_t rearrange_stack(task_port_t task, const std::string &library, std::uintptr_t sp) { auto loadAddress = scan(task, sp); auto argc = scan(task, sp); auto argvAddresses = read_string_array(task, sp); auto envpAddresses = read_string_array(task, sp); auto appleAddresses = read_string_array(task, sp); auto stringReader = std::bind(read_string, task, std::placeholders::_1); auto argv = std::vector{}; std::transform(argvAddresses.begin(), argvAddresses.end(), std::back_inserter(argv), stringReader); auto envp = std::vector{}; std::transform(envpAddresses.begin(), envpAddresses.end(), std::back_inserter(envp), stringReader); auto apple = std::vector{}; std::transform(appleAddresses.begin(), appleAddresses.end(), std::back_inserter(apple), stringReader); auto dyld_insert_libraries = std::find_if(envp.begin(), envp.end(), [](const auto &string) { return string.starts_with("DYLD_INSERT_LIBRARIES="); }); if (dyld_insert_libraries != envp.end()) { *dyld_insert_libraries += ":" + library; } else { auto variable = "DYLD_INSERT_LIBRARIES=" + library; envp.push_back(variable); } envp.push_back("DYLD_SHARED_REGION=1"); argvAddresses.clear(); envpAddresses.clear(); appleAddresses.clear(); auto strings = std::vector{}; auto arrayGenerator = [&strings](auto &addresses, const auto &string) { addresses.push_back(strings.size()); std::copy(string.begin(), string.end(), std::back_inserter(strings)); strings.push_back('\0'); }; std::for_each(argv.begin(), argv.end(), std::bind(arrayGenerator, std::ref(argvAddresses), std::placeholders::_1)); std::for_each(envp.begin(), envp.end(), std::bind(arrayGenerator, std::ref(envpAddresses), std::placeholders::_1)); std::for_each(apple.begin(), apple.end(), std::bind(arrayGenerator, std::ref(appleAddresses), std::placeholders::_1)); sp -= strings.size(); sp = sp / sizeof(std::uintptr_t) * sizeof(std::uintptr_t); ensure(vm_write(task, sp, reinterpret_cast(strings.data()), strings.size()) == KERN_SUCCESS); auto rebaser = [sp](auto &&address) { address += sp; }; std::for_each(argvAddresses.begin(), argvAddresses.end(), rebaser); std::for_each(envpAddresses.begin(), envpAddresses.end(), rebaser); std::for_each(appleAddresses.begin(), appleAddresses.end(), rebaser); auto addresses = std::vector{}; std::copy(argvAddresses.begin(), argvAddresses.end(), std::back_inserter(addresses)); addresses.push_back(0); std::copy(envpAddresses.begin(), envpAddresses.end(), std::back_inserter(addresses)); addresses.push_back(0); std::copy(appleAddresses.begin(), appleAddresses.end(), std::back_inserter(addresses)); addresses.push_back(0); sp -= addresses.size() * sizeof(std::uintptr_t); ensure(vm_write(task, sp, reinterpret_cast(addresses.data()), addresses.size() * sizeof(std::uintptr_t)) == KERN_SUCCESS); sp -= sizeof(std::uintptr_t); ensure(vm_write(task, sp, reinterpret_cast(&argc), sizeof(std::uintptr_t)) == KERN_SUCCESS); sp -= sizeof(std::uintptr_t); ensure(vm_write(task, sp, reinterpret_cast(&loadAddress), sizeof(std::uintptr_t)) == KERN_SUCCESS); return sp; } __asm__( ".globl _amfi_flags_patch_start\n" ".globl _amfi_flags_patch_end\n" "_amfi_flags_patch_start:\n" #if __arm64__ "\tmov x2, #0x5f\n" "\tstr x2, [x1]\n" "\tmov x0, #0\n" "\tret\n" #elif __x86_64__ ".intel_syntax noprefix\n" "\tmov QWORD PTR [rsi], 0x5f\n" "\txor rax, rax\n" "\tret\n" #endif "_amfi_flags_patch_end:\n"); extern char amfi_flags_patch_start; extern char amfi_flags_patch_end; #if __arm64__ // This is a clever but incredibly lazy patch. On arm64, the first five // instructions of _dyld_start are as follows: // // mov x0, sp // and sp, x0, #~15 // mov fp, #0 // mov lr, #0 // b start // // We need to bump sp down a bit due to injecting DYLD_INSERT_LIBRARIES, but // because of thread_set_state_allowed we can't set it directly. So we inject // instructions to do it in here. At process startup fp and lr happen to be set // to 0 by the kernel already, which gives us the space to sneak in two extra // instructions. (If we wanted to be slightly less lazy, we could take advantage // of the kernel's laziness and align sp ourselves when writing the initial // stack. This would let us overwrite the instruction aligning sp.) __asm__( ".globl _dyld_start_patch_start\n" ".globl _dyld_start_patch_end\n" ".globl _dyld_start_check_start\n" ".globl _dyld_start_check_end\n" "\n" "_dyld_start_patch_start:\n" "_dyld_start_check_start:\n" /* sub sp, sp, [offset & 0xfff] */ // Added dynamically /* sub sp, sp, [offset & ~0xfff], lsl 12 */ // Added dynamically "mov x0, sp\n" "and sp, x0, #~15\n" "_dyld_start_patch_end:\n" // Used as a sanity check "mov fp, #0\n" "mov lr, #0\n" "_dyld_start_check_end:\n"); #elif __x86_64__ // A similar patch for x86_64. The initial sequence is this: // // mov rdi, rsp // and rsp, -16 // mov rbp, 0 // push 0 // jmp start // // We can golf it down with code that is equivalent (save for xor ebp, ebp, // which sets flags-but in this case it doesn't adjust them from what the // kernel sets already, and there isn't any code that relies on its value // anyway). __asm__( ".intel_syntax noprefix\n" ".globl _dyld_start_patch_start\n" ".globl _dyld_start_patch_end\n" ".globl _dyld_start_check_start\n" ".globl _dyld_start_check_end\n" "\n" "_dyld_start_patch_start:\n" /* sub rsp, [offset] */ // Added dynamically "push rsp\n" "pop rdi\n" "and rsp, -16\n" "xor ebp, ebp\n" "push rbp\n" "_dyld_start_patch_end:\n" "_dyld_start_check_start:\n" "mov rdi, rsp\n" "and rsp, -16\n" "mov rbp, 0\n" "push 0\n" "_dyld_start_check_end:\n"); #endif extern char dyld_start_patch_start; extern char dyld_start_patch_end; extern char dyld_start_check_start; extern char dyld_start_check_end; void write_patch(task_t task, std::uintptr_t address, void *patch_start, void *patch_end) { ensure(vm_protect(task, address / PAGE_SIZE * PAGE_SIZE, PAGE_SIZE, false, VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY) == KERN_SUCCESS); ensure(vm_write(task, address, reinterpret_cast(patch_start), reinterpret_cast(patch_end) - reinterpret_cast(patch_start)) == KERN_SUCCESS); ensure(vm_protect(task, address / PAGE_SIZE * PAGE_SIZE, PAGE_SIZE, false, VM_PROT_READ | VM_PROT_EXECUTE) == KERN_SUCCESS); } void patch_restrictions(task_t task, std::uintptr_t pc) { task_dyld_info_data_t dyldInfo; mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT; ensure(task_info(mach_task_self(), TASK_DYLD_INFO, reinterpret_cast(&dyldInfo), &count) == KERN_SUCCESS); auto all_image_infos = reinterpret_cast(dyldInfo.all_image_info_addr); const auto header = reinterpret_cast(all_image_infos->dyldImageLoadAddress); auto location = reinterpret_cast(header + 1); auto base = reinterpret_cast(header); for (unsigned i = 0; i < header->ncmds; ++i) { auto command = reinterpret_cast(location); if (command->cmd == LC_SYMTAB) { auto command = reinterpret_cast(location); auto symbols = std::span{reinterpret_cast(base + command->symoff), command->nsyms}; auto _dyld_start = std::find_if(symbols.begin(), symbols.end(), [base, command](const auto &symbol) { return !std::strcmp(reinterpret_cast(base + command->stroff) + symbol.n_un.n_strx, "__dyld_start"); }); auto amfi_check_dyld_policy_self = std::find_if(symbols.begin(), symbols.end(), [base, command](const auto &symbol) { return !std::strcmp(reinterpret_cast(base + command->stroff) + symbol.n_un.n_strx, "_amfi_check_dyld_policy_self"); }); write_patch(task, pc + amfi_check_dyld_policy_self->n_value - _dyld_start->n_value, &amfi_flags_patch_start, &amfi_flags_patch_end); return; } location += command->cmdsize; } ensure(false); } void inject(pid_t pid, const std::string &library) { task_port_t task; ensure(task_for_pid(mach_task_self(), pid, &task) == KERN_SUCCESS); thread_act_array_t threads; mach_msg_type_number_t count; ensure(task_threads(task, &threads, &count) == KERN_SUCCESS); ensure(count == 1); #if __arm64__ arm_thread_state64_t state; count = ARM_THREAD_STATE64_COUNT; thread_state_flavor_t flavor = ARM_THREAD_STATE64; #elif __x86_64__ x86_thread_state64_t state; count = x86_THREAD_STATE64_COUNT; thread_state_flavor_t flavor = x86_THREAD_STATE64; #endif ensure(thread_get_state(*threads, flavor, reinterpret_cast(&state), &count) == KERN_SUCCESS); #if __arm64__ ensure(thread_convert_thread_state(*threads, THREAD_CONVERT_THREAD_STATE_TO_SELF, flavor, reinterpret_cast(&state), count, reinterpret_cast(&state), &count) == KERN_SUCCESS); auto sp = rearrange_stack(task, library, arm_thread_state64_get_sp(state)); patch_restrictions(task, arm_thread_state64_get_pc(state)); if (__builtin_available(macOS 14.4, *)) { } else { arm_thread_state64_set_sp(state, sp); ensure(thread_convert_thread_state(*threads, THREAD_CONVERT_THREAD_STATE_FROM_SELF, flavor, reinterpret_cast(&state), count, reinterpret_cast(&state), &count) == KERN_SUCCESS); } #elif __x86_64__ auto sp = rearrange_stack(task, library, static_cast(state.__rsp)); state.__rsp = sp; patch_restrictions(task, state.__rip); #endif if (__builtin_available(macOS 14.4, *)) { #if __arm64__ auto address = arm_thread_state64_get_pc(state); #elif __x86_64__ auto address = state.__rip; #endif auto expected = std::span{&dyld_start_check_start, &dyld_start_check_end}; auto actual = std::vector(expected.begin(), expected.end()); vm_size_t count; ensure(vm_read_overwrite(task, address, actual.size(), reinterpret_cast(actual.data()), &count) == KERN_SUCCESS && count == expected.size()); ensure(std::equal(expected.begin(), expected.end(), actual.begin(), actual.end())); #if __arm64__ auto difference = arm_thread_state64_get_sp(state) - sp; auto stack_adjustment = std::array{ // sub sp, sp, difference & 0xfff std::byte{0xff}, static_cast(0x03 | (difference & 0x3f) << 2), static_cast(0x00 | (difference & 0xfc0) >> 6), std::byte{0xd1}, // sub sp, sp, difference & ~0xfff, lsl #12 std::byte{0xff}, static_cast(0x03 | ((difference >> 12) & 0x3f) << 2), static_cast(0x40 | ((difference >> 12) & 0xfc0) >> 6), std::byte{0xd1}, }; #elif __x86_64__ auto difference = state.__rsp - sp; auto stack_adjustment = std::array{ // sub rsp, difference std::byte{0x48}, std::byte{0x81}, std::byte{0xec}, static_cast((difference >> 0) & 0xff), static_cast((difference >> 8) & 0xff), static_cast((difference >> 16) & 0xff), static_cast((difference >> 24) & 0xff), }; #endif write_patch(task, address, stack_adjustment.begin(), stack_adjustment.end()); write_patch(task, address + stack_adjustment.size(), &dyld_start_patch_start, &dyld_start_patch_end); } else { ensure(thread_set_state(*threads, flavor, reinterpret_cast(&state), count) == KERN_SUCCESS); } mach_port_deallocate(mach_task_self(), *threads); vm_deallocate(mach_task_self(), (vm_address_t)threads, sizeof(*threads)); } int main(int argc, char **argv, char **envp) { if (!getenv("DYLD_SHARED_REGION")) { uint32_t length = 0; std::string path; _NSGetExecutablePath(path.data(), &length); path = std::string('0', length); ensure(!_NSGetExecutablePath(path.data(), &length)); std::vector environment; while (*envp) { environment.push_back(*envp++); } // This happens to disable dyld-in-cache. environment.push_back("DYLD_SHARED_REGION=1"); environment.push_back(nullptr); execve(path.c_str(), argv, const_cast(environment.data())); ensure(false); } if (argc < 3) { std::cerr << "Usage: " << *argv << " " << std::endl; std::exit(EXIT_FAILURE); } auto library = *++argv; std::vector processes; for (auto process : std::span(++argv, argc - 2)) { processes.push_back(std::regex(process)); } es_client_t *client = NULL; ensure(es_new_client(&client, ^(es_client_t *client, const es_message_t *message) { switch (message->event_type) { case ES_EVENT_TYPE_AUTH_EXEC: { const char *name = message->event.exec.target->executable->path.data; for (const auto &process : processes) { pid_t pid = audit_token_to_pid(message->process->audit_token); if (std::regex_search(name, process) && is_translated(getpid()) == is_translated(pid)) { if (is_cs_enforced(pid)) { ensure(!ptrace(PT_ATTACHEXC, pid, nullptr, 0)); // Work around FB9786809 dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 1'000'000'000), dispatch_get_main_queue(), ^{ ensure(!ptrace(PT_DETACH, pid, nullptr, 0)); }); } inject(pid, library); } } es_respond_auth_result(client, message, ES_AUTH_RESULT_ALLOW, false); break; } default: ensure(false && "Unexpected event type!"); } }) == ES_NEW_CLIENT_RESULT_SUCCESS); es_event_type_t events[] = {ES_EVENT_TYPE_AUTH_EXEC}; ensure(es_subscribe(client, events, sizeof(events) / sizeof(*events)) == ES_RETURN_SUCCESS); dispatch_main(); }