Last active
January 19, 2025 06:12
-
-
Save define-private-public/07945570cbfe49b98bd88f0f8ea66e19 to your computer and use it in GitHub Desktop.
Revisions
-
define-private-public revised this gist
Jan 19, 2025 . 2 changed files with 108 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,61 @@ .L44: divsd xmm6, QWORD PTR [rsp+8] comisd xmm6, QWORD PTR .LC1[rip] jnb .L30 pxor xmm0, xmm0 addsd xmm6, xmm0 ucomisd xmm0, xmm6 ja .L42 sqrtsd xmm6, xmm6 .L16: mov rax, QWORD PTR .LC1[rip] mov ecx, 2 pxor xmm7, xmm7 mov QWORD PTR [rsp+8], rax cmp r12, 624 je .L17 .L46: mov rdx, QWORD PTR [rsp+32+r12*8] add r12, 1 .L18: mov rax, rdx shr rax, 11 mov eax, eax xor rax, rdx mov rdx, rax sal rdx, 7 and edx, 2636928640 xor rax, rdx mov rdx, rax sal rdx, 15 and edx, 4022730752 xor rax, rdx mov rdx, rax shr rdx, 18 xor rax, rdx js .L21 pxor xmm0, xmm0 cvtsi2sd xmm0, rax .L22: mulsd xmm0, QWORD PTR [rsp+8] fld DWORD PTR .LC7[rip] fmul QWORD PTR [rsp+8] addsd xmm7, xmm0 fstp QWORD PTR [rsp+8] cmp ecx, 1 jne .L31 divsd xmm7, QWORD PTR [rsp+8] comisd xmm7, QWORD PTR .LC1[rip] jnb .L32 pxor xmm0, xmm0 lea rdi, [rsp+24] lea rsi, [rsp+16] movsd QWORD PTR [rsp+8], xmm6 addsd xmm7, xmm0 mulsd xmm7, QWORD PTR .LC8[rip] addsd xmm0, xmm7 call sincos movsd xmm6, QWORD PTR [rsp+8] movsd xmm0, QWORD PTR [rsp+16] movsd xmm1, QWORD PTR [rsp+24] mulsd xmm0, xmm6 This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,47 @@ .L16: movdqa xmm1, XMMWORD PTR [rax+8] movdqu xmm0, XMMWORD PTR [rax] add rax, 16 pand xmm0, xmm5 pand xmm1, xmm3 por xmm1, xmm0 movdqa xmm2, xmm1 pand xmm1, xmm4 movdqa xmm0, xmm1 psrlq xmm2, 1 pxor xmm2, XMMWORD PTR [rax-1832] psllq xmm0, 3 paddq xmm0, xmm1 psllq xmm0, 9 paddq xmm0, xmm1 psllq xmm0, 5 paddq xmm0, xmm1 psllq xmm0, 2 psubq xmm0, xmm1 psllq xmm0, 3 psubq xmm0, xmm1 movdqa xmm10, xmm0 psllq xmm10, 4 paddq xmm0, xmm10 psllq xmm0, 5 psubq xmm0, xmm1 pxor xmm2, xmm0 movups XMMWORD PTR [rax-16], xmm2 cmp rax, rsi jne .L16 mov rsi, QWORD PTR [rsp+16] mov rax, QWORD PTR [rsp+5000] mov r12d, 1 mov rdi, rsi and rax, -2147483648 and edi, 2147483647 or rax, rdi mov rdi, rax and eax, 1 neg rax shr rdi xor rdi, QWORD PTR [rsp+3184] and eax, 2567483615 xor rax, rdi mov QWORD PTR [rsp+5000], rax jmp .L14 -
define-private-public revised this gist
Jan 19, 2025 . 1 changed file with 0 additions and 92 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,95 +1,3 @@ RNG::analytical_in_unit_disk(): ; 49 instructions push rbp mov rbp, rsp -
define-private-public revised this gist
Jan 19, 2025 . 2 changed files with 59 additions and 72 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,60 @@ RNG::analytical_in_unit_disk(): ; 49 instructions push rbp mov rbp, rsp sub rsp, 64 mov QWORD PTR [rbp-56], rdi movsd xmm0, QWORD PTR .LC1[rip] mov rax, QWORD PTR [rbp-56] movapd xmm1, xmm0 mov rdx, QWORD PTR .LC2[rip] movq xmm0, rdx mov rdi, rax call RNG::num(double, double) ; 19 instructions movq rax, xmm0 movq xmm0, rax call sqrt movq rax, xmm0 mov QWORD PTR [rbp-8], rax movsd xmm0, QWORD PTR .LC3[rip] mov rax, QWORD PTR [rbp-56] movapd xmm1, xmm0 mov rdx, QWORD PTR .LC2[rip] movq xmm0, rdx mov rdi, rax call RNG::num(double, double) ; 19 instructions movq rax, xmm0 mov QWORD PTR [rbp-16], rax mov rax, QWORD PTR [rbp-16] movq xmm0, rax call cos movsd xmm1, QWORD PTR [rbp-8] mulsd xmm0, xmm1 movsd QWORD PTR [rbp-24], xmm0 mov rax, QWORD PTR [rbp-16] movq xmm0, rax call sin movsd xmm1, QWORD PTR [rbp-8] mulsd xmm0, xmm1 movsd QWORD PTR [rbp-32], xmm0 movsd xmm0, QWORD PTR [rbp-32] mov rdx, QWORD PTR [rbp-24] lea rax, [rbp-48] movapd xmm1, xmm0 movq xmm0, rdx mov rdi, rax call Vec2::Vec2(double, double) [complete object constructor] ; 14 instructions mov rax, QWORD PTR [rbp-48] mov rdx, QWORD PTR [rbp-40] movq xmm0, rax movq xmm1, rdx leave ret -------- The above method: (49 - 3) + 19 + 19 + 14: 98 instrunctions (albiet 6 calls)ec2::Vec2(double, double) [base object constructor]: ; 14 instructions push rbp mov rbp, rsp mov QWORD PTR [rbp-8], rdi @@ -84,10 +140,4 @@ RNG::analytical_in_unit_disk(): ; 49 instructions movq xmm0, rax movq xmm1, rdx leave ret This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,56 +1,3 @@ RNG::rejection_in_unit_disk(): ; 41 instructions push rbp mov rbp, rsp @@ -93,14 +40,4 @@ RNG::rejection_in_unit_disk(): ; 41 instructions movq xmm1, rdx mov rbx, QWORD PTR [rbp-8] leave ret -
define-private-public created this gist
Jan 19, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,93 @@ Vec2::Vec2(double, double) [base object constructor]: ; 14 instructions push rbp mov rbp, rsp mov QWORD PTR [rbp-8], rdi movsd QWORD PTR [rbp-16], xmm0 movsd QWORD PTR [rbp-24], xmm1 mov rax, QWORD PTR [rbp-8] movsd xmm0, QWORD PTR [rbp-16] movsd QWORD PTR [rax], xmm0 mov rax, QWORD PTR [rbp-8] movsd xmm0, QWORD PTR [rbp-24] movsd QWORD PTR [rax+8], xmm0 nop pop rbp ret RNG::num(double, double): ; 19 instructions push rbp mov rbp, rsp sub rsp, 48 mov QWORD PTR [rbp-24], rdi movsd QWORD PTR [rbp-32], xmm0 movsd QWORD PTR [rbp-40], xmm1 mov rax, QWORD PTR [rbp-24] lea rdx, [rax+5000] mov rax, QWORD PTR [rbp-24] mov rsi, rax mov rdi, rdx call double std::uniform_real_distribution<double>::operator()<std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul> >(std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>&) movq rax, xmm0 mov QWORD PTR [rbp-8], rax movsd xmm0, QWORD PTR [rbp-40] subsd xmm0, QWORD PTR [rbp-32] mulsd xmm0, QWORD PTR [rbp-8] addsd xmm0, QWORD PTR [rbp-32] leave ret RNG::analytical_in_unit_disk(): ; 49 instructions push rbp mov rbp, rsp sub rsp, 64 mov QWORD PTR [rbp-56], rdi movsd xmm0, QWORD PTR .LC1[rip] mov rax, QWORD PTR [rbp-56] movapd xmm1, xmm0 mov rdx, QWORD PTR .LC2[rip] movq xmm0, rdx mov rdi, rax call RNG::num(double, double) ; 19 instructions movq rax, xmm0 movq xmm0, rax call sqrt movq rax, xmm0 mov QWORD PTR [rbp-8], rax movsd xmm0, QWORD PTR .LC3[rip] mov rax, QWORD PTR [rbp-56] movapd xmm1, xmm0 mov rdx, QWORD PTR .LC2[rip] movq xmm0, rdx mov rdi, rax call RNG::num(double, double) ; 19 instructions movq rax, xmm0 mov QWORD PTR [rbp-16], rax mov rax, QWORD PTR [rbp-16] movq xmm0, rax call cos movsd xmm1, QWORD PTR [rbp-8] mulsd xmm0, xmm1 movsd QWORD PTR [rbp-24], xmm0 mov rax, QWORD PTR [rbp-16] movq xmm0, rax call sin movsd xmm1, QWORD PTR [rbp-8] mulsd xmm0, xmm1 movsd QWORD PTR [rbp-32], xmm0 movsd xmm0, QWORD PTR [rbp-32] mov rdx, QWORD PTR [rbp-24] lea rax, [rbp-48] movapd xmm1, xmm0 movq xmm0, rdx mov rdi, rax call Vec2::Vec2(double, double) [complete object constructor] ; 14 instructions mov rax, QWORD PTR [rbp-48] mov rdx, QWORD PTR [rbp-40] movq xmm0, rax movq xmm1, rdx leave ret -------- The above method: (49 - 3) + 19 + 19 + 14: 98 instrunctions (albiet 6 calls) This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,106 @@ Vec2::Vec2(double, double) [base object constructor]: ; 14 instructions push rbp mov rbp, rsp mov QWORD PTR [rbp-8], rdi movsd QWORD PTR [rbp-16], xmm0 movsd QWORD PTR [rbp-24], xmm1 mov rax, QWORD PTR [rbp-8] movsd xmm0, QWORD PTR [rbp-16] movsd QWORD PTR [rax], xmm0 mov rax, QWORD PTR [rbp-8] movsd xmm0, QWORD PTR [rbp-24] movsd QWORD PTR [rax+8], xmm0 nop pop rbp ret Vec2::length_squared() const: ; 16 instructions push rbp mov rbp, rsp mov QWORD PTR [rbp-8], rdi mov rax, QWORD PTR [rbp-8] movsd xmm1, QWORD PTR [rax] mov rax, QWORD PTR [rbp-8] movsd xmm0, QWORD PTR [rax] mulsd xmm1, xmm0 mov rax, QWORD PTR [rbp-8] movsd xmm2, QWORD PTR [rax+8] mov rax, QWORD PTR [rbp-8] movsd xmm0, QWORD PTR [rax+8] mulsd xmm0, xmm2 addsd xmm0, xmm1 pop rbp ret RNG::num(double, double): ; 19 instructions push rbp mov rbp, rsp sub rsp, 48 mov QWORD PTR [rbp-24], rdi movsd QWORD PTR [rbp-32], xmm0 movsd QWORD PTR [rbp-40], xmm1 mov rax, QWORD PTR [rbp-24] lea rdx, [rax+5000] mov rax, QWORD PTR [rbp-24] mov rsi, rax mov rdi, rdx call double std::uniform_real_distribution<double>::operator()<std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul> >(std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>&) movq rax, xmm0 mov QWORD PTR [rbp-8], rax movsd xmm0, QWORD PTR [rbp-40] subsd xmm0, QWORD PTR [rbp-32] mulsd xmm0, QWORD PTR [rbp-8] addsd xmm0, QWORD PTR [rbp-32] leave ret RNG::rejection_in_unit_disk(): ; 41 instructions push rbp mov rbp, rsp push rbx sub rsp, 40 mov QWORD PTR [rbp-40], rdi .L12: movsd xmm0, QWORD PTR .LC1[rip] mov rdx, QWORD PTR .LC3[rip] mov rax, QWORD PTR [rbp-40] movapd xmm1, xmm0 movq xmm0, rdx mov rdi, rax call RNG::num(double, double) ; 19 instructions movq rbx, xmm0 movsd xmm0, QWORD PTR .LC1[rip] mov rdx, QWORD PTR .LC3[rip] mov rax, QWORD PTR [rbp-40] movapd xmm1, xmm0 movq xmm0, rdx mov rdi, rax call RNG::num(double, double) ; 19 instructions movq rax, xmm0 lea rdx, [rbp-32] movq xmm1, rbx movq xmm0, rax mov rdi, rdx call Vec2::Vec2(double, double) [complete object constructor] ; 14 instructions lea rax, [rbp-32] mov rdi, rax call Vec2::length_squared() const ; 16 instructions movsd xmm1, QWORD PTR .LC1[rip] comisd xmm1, xmm0 seta al test al, al je .L12 ; Possible jump to repeat the above (29 instructions) mov rax, QWORD PTR [rbp-32] mov rdx, QWORD PTR [rbp-24] movq xmm0, rax movq xmm1, rdx mov rbx, QWORD PTR [rbp-8] leave ret -------- This needs to traverse quite a few instructions, perform calls (and can loop infinately). Let's say we had success on our first go: 41 + 19 + 19 + 14 + 16: 109 instructions (w/ 4 calls) If we had one failure but then a success, so the loop is run twice: 5 + 2 * (29 + 19 + 19 + 14 + 16) + 7: 207 instructions (w/ 8 calls) I may be a little off on the math