/* ---------------------------------------------- */ /* atomic86_64.S */ #ifdef __leading_underscore # define _(s) _##s #else # define _(s) s #endif #ifdef _WIN32 # define P1 %rcx # define P2 %rdx # define P2_8 %dl # define P2_16 %dx # define P2_32 %edx # define P2_64 %rdx # define P3 %r8 # define P3_8 %r8b # define P3_16 %r8w # define P3_32 %r8l # define P3_64 %r8 # define P4 %r9 #else # define P1 %rdi # define P2 %rsi # define P2_8 %sil # define P2_16 %si # define P2_32 %esi # define P2_64 %rsi # define P3 %rdx # define P3_8 %dl # define P3_16 %dx # define P3_32 %edx # define P3_64 %rdx # define P4 %rcx #endif #define RAX8 %al #define RAX16 %ax #define RAX32 %eax #define RAX64 %rax #define mkloader(size) \ _(__atomic_load_n ## size): \ mov (P1), RAX ## size; \ ret; \ _(__atomic_load ## size): \ mov (P1), RAX ## size; \ mov RAX ## size, (P2); \ ret mkloader(8) mkloader(16) mkloader(32) mkloader(64) #undef mkloader #define mkstorer(size) \ _(__atomic_store_relaxed ## size): \ _(__atomic_store_release ## size): \ mov (P2), P2_ ## size; \ _(__atomic_store_relaxed_n ## size): \ _(__atomic_store_release_n ## size): \ mov P2_ ## size, (P1); \ ret; \ \ _(__atomic_store_seq_cst ## size): \ mov (P2), P2_ ## size; \ _(__atomic_store_seq_cst_n ## size): \ xchg P2_ ## size, (P1); \ ret; mkstorer(8) mkstorer(16) mkstorer(32) mkstorer(64) #undef mkstorer #define mkexchanger(size) \ _(__atomic_exchange_n ## size): \ mov P2_ ## size, RAX ## size; \ xchg (P1), RAX ## size; \ ret; \ _(__atomic_exchange ## size): \ mov (P2), RAX ## size; \ xchg (P1), RAX ## size; \ mov RAX ## size, (P3); \ ret; mkexchanger(8) mkexchanger(16) mkexchanger(32) mkexchanger(64) #undef mkexchanger // nskip: how many bytes to skip (two or three)? // tcc's assembler doesn't know to produce a more compact encoding for small, local jumps #define mkcmpxchg(size, nskip) \ _(__atomic_compare_exchange ## size): \ mov (P3), P3_ ## size; \ _(__atomic_compare_exchange_n ## size): \ mov (P2), RAX ## size; \ lock cmpxchg P3_ ## size, (P1); \ .byte 0x74, nskip; /*jz skip*/ \ mov RAX ## size, (P3); \ /*skip:*/ \ setz %al; \ ret; \ mkcmpxchg(8, 2) mkcmpxchg(16, 3) mkcmpxchg(32, 2) mkcmpxchg(64, 3) #undef mkcmpxchg #define arithmetic_op(name, op, size) \ name: \ lock op P2_ ## size, (P1); \ ret; #define arithmetic_op_fetch(name, op, size, off) \ name: \ mov (P1), RAX ## size; \ mov RAX ## size, P3_ ## size; \ op P2_ ## size, P3_ ## size; \ lock cmpxchg P3_ ## size, (P1); \ .byte 0x75, off; /*jnz retry*/ \ mov P3_ ## size, RAX ## size; \ ret; #define arithmetic_op_all(op, size, fetch_off) \ arithmetic_op(_(__atomic_ ## op ## size), op, size) \ arithmetic_op_fetch(_(__atomic_ ## op ## _fetch ## size), op, size, fetch_off) #define arithmetic_ops(op) \ arithmetic_op_all(op, 8, -11) \ arithmetic_op_all(op, 16, -13) \ arithmetic_op_all(op, 32, -10) \ arithmetic_op_all(op, 64, -13) \ arithmetic_ops(and) arithmetic_ops(xor) arithmetic_ops(or) arithmetic_op(_(__atomic_sub8), sub, 8) arithmetic_op(_(__atomic_sub16), sub, 16) arithmetic_op(_(__atomic_sub32), sub, 32) arithmetic_op(_(__atomic_sub64), sub, 64) arithmetic_op(_(__atomic_add8), add, 8) arithmetic_op(_(__atomic_add16), add, 16) arithmetic_op(_(__atomic_add32), add, 32) arithmetic_op(_(__atomic_add64), add, 64) #undef arithmetic_ops #undef arithmetic_op_all #undef arithmetic_op_fetch #undef arithmetic_op #define arithmetic_add(size) \ _(__atomic_sub_fetch ## size): \ neg P2_ ## size; \ _(__atomic_add_fetch ## size): \ mov P2_ ## size, RAX ## size; \ lock xadd RAX ## size, (P1); \ add P2_ ## size, RAX ## size; \ ret; \ _(__atomic_fetch_add ## size): \ mov P2_ ## size, RAX ## size; \ lock xadd RAX ## size, (P1); \ ret arithmetic_add(8) arithmetic_add(16) arithmetic_add(32) arithmetic_add(64) #undef arithmetic_add #define arithmetic_sub(size) \ _(__atomic_fetch_sub ## size): \ mov P2_ ## size, RAX ## size; \ neg RAX ## size; \ lock xadd RAX ## size, (P1); \ ret arithmetic_sub(8) arithmetic_sub(16) arithmetic_sub(32) arithmetic_sub(64) //arithmetic_ops(_(__atomic_add), add, -11, -13, -10, -13) //arithmetic_ops(_(__atomic_sub), sub, -11, -13, -10, -13) //.globl _(TODO) //_(TODO): // // pop %rdx //#ifdef _WIN32 // mov %rcx,%rax //#else // mov %rdi,%rax //#endif // add $15,%rax // and $-16,%rax // jz p3 // //#ifdef _WIN32 //p1: // cmp $4096,%rax // jbe p2 // test %rax,-4096(%rsp) // sub $4096,%rsp // sub $4096,%rax // jmp p1 //p2: //#endif // sub %rax,%rsp // mov %rsp,%rax //p3: // push %rdx // ret /* ---------------------------------------------- */