Mojo struct
TensorCoreAsync
struct TensorCoreAsync[c_type: DType, a_type: DType, b_type: DType, mma_shape: IndexList[3], transpose_b: Bool = False]
Aliases
lhs_operand_type = LayoutTensor[a_type, _lhs_layout[stdlib::utils::index::IndexList[{3}, Int(), _lhs_layout[stdlib::utils::index::IndexList[{3}, Int().rank(), address_space=3]
:rhs_operand_type = LayoutTensor[b_type, _rhs_layout[stdlib::utils::index::IndexList[{3}, Int(), _rhs_layout[stdlib::utils::index::IndexList[{3}, Int().rank(), address_space=3]
:result_operand_type = SIMD[c_type, _output_register_size[stdlib::utils::index::IndexList[{3}, Int()]
:
Implemented traits
AnyType
,
UnknownDestructibility
Methods
__init__
__init__(out self)
__call__
static __call__(lhs: LayoutTensor[a_type, _lhs_layout[stdlib::utils::index::IndexList[{3}, Int(), _lhs_layout[stdlib::utils::index::IndexList[{3}, Int().rank(), address_space=3], rhs: LayoutTensor[b_type, _rhs_layout[stdlib::utils::index::IndexList[{3}, Int(), _rhs_layout[stdlib::utils::index::IndexList[{3}, Int().rank(), address_space=3], c_reg: SIMD[c_type, _output_register_size[stdlib::utils::index::IndexList[{3}, Int()]) -> SIMD[c_type, _output_register_size[stdlib::utils::index::IndexList[{3}, Int()]
wgmma
static wgmma(a_smem_tile: LayoutTensor[a_type, layout, rank, address_space=3, element_layout=element_layout, layout_bitwidth=layout_bitwidth, masked=masked, alignment=alignment], b_smem_tile: LayoutTensor[b_type, layout, rank, address_space=3, element_layout=element_layout, layout_bitwidth=layout_bitwidth, masked=masked, alignment=alignment], c_reg_tile: LayoutTensor[c_type, layout, rank, address_space=5, element_layout=element_layout, layout_bitwidth=layout_bitwidth, masked=masked, alignment=alignment])
arrive
static arrive()
commit_group
static commit_group()
wait_for_all
static wait_for_all()
store_result
static store_result(warp_group_tile: LayoutTensor[c_type, layout, rank, address_space=address_space, element_layout=element_layout, layout_bitwidth=layout_bitwidth, masked=masked, alignment=alignment], res_reg_tile: SIMD[c_type, _output_register_size[stdlib::utils::index::IndexList[{3}, Int()])
allocate_lhs
static allocate_lhs() -> LayoutTensor[a_type, _lhs_layout[stdlib::utils::index::IndexList[{3}, Int(), _lhs_layout[stdlib::utils::index::IndexList[{3}, Int().rank(), address_space=3]
allocate_rhs
static allocate_rhs() -> LayoutTensor[b_type, _rhs_layout[stdlib::utils::index::IndexList[{3}, Int(), _rhs_layout[stdlib::utils::index::IndexList[{3}, Int().rank(), address_space=3]
allocate_result
static allocate_result(initial_val: SIMD[c_type, 1]) -> SIMD[c_type, _output_register_size[stdlib::utils::index::IndexList[{3}, Int()]
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!