1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| cp.async.ca.shared{::cta}.global{.level::cache_hint}{.level::prefetch_size} [dst], [src], cp-size{, src-size}{, cache-policy} ; cp.async.cg.shared{::cta}.global{.level::cache_hint}{.level::prefetch_size} [dst], [src], 16{, src-size}{, cache-policy} ; cp.async.ca.shared{::cta}.global{.level::cache_hint}{.level::prefetch_size} [dst], [src], cp-size{, ignore-src}{, cache-policy} ; cp.async.cg.shared{::cta}.global{.level::cache_hint}{.level::prefetch_size} [dst], [src], 16{, ignore-src}{, cache-policy} ; .level::cache_hint = { .L2::cache_hint } .level::prefetch_size = { .L2::64B, .L2::128B, .L2::256B } cp-size = { 4, 8, 16 }
TS const* gmem_ptr = &gmem_src; uint32_t smem_int_ptr = cast_smem_ptr_to_uint(&smem_dst); asm volatile("cp.async.ca.shared.global.L2::128B [%0], [%1], %2;\n" :: "r"(smem_int_ptr), "l"(gmem_ptr), "n"(sizeof(TS)));
|