Skip to main content
Log in

Mojo struct

TensorCoreAsync

struct TensorCoreAsync[c_type: DType, a_type: DType, b_type: DType, mma_shape: IndexList[3], transpose_b: Bool = False]

Aliases

  • lhs_operand_type = LayoutTensor[a_type, _lhs_layout[stdlib::utils::index::IndexList[{3}, Int(), _lhs_layout[stdlib::utils::index::IndexList[{3}, Int().rank(), address_space=3]:
  • rhs_operand_type = LayoutTensor[b_type, _rhs_layout[stdlib::utils::index::IndexList[{3}, Int(), _rhs_layout[stdlib::utils::index::IndexList[{3}, Int().rank(), address_space=3]:
  • result_operand_type = SIMD[c_type, _output_register_size[stdlib::utils::index::IndexList[{3}, Int()]:

Implemented traits

AnyType, UnknownDestructibility

Methods

__init__

__init__(out self)

__call__

static __call__(lhs: LayoutTensor[a_type, _lhs_layout[stdlib::utils::index::IndexList[{3}, Int(), _lhs_layout[stdlib::utils::index::IndexList[{3}, Int().rank(), address_space=3], rhs: LayoutTensor[b_type, _rhs_layout[stdlib::utils::index::IndexList[{3}, Int(), _rhs_layout[stdlib::utils::index::IndexList[{3}, Int().rank(), address_space=3], c_reg: SIMD[c_type, _output_register_size[stdlib::utils::index::IndexList[{3}, Int()]) -> SIMD[c_type, _output_register_size[stdlib::utils::index::IndexList[{3}, Int()]

wgmma

static wgmma(a_smem_tile: LayoutTensor[a_type, layout, rank, address_space=3, element_layout=element_layout, layout_bitwidth=layout_bitwidth, masked=masked, alignment=alignment], b_smem_tile: LayoutTensor[b_type, layout, rank, address_space=3, element_layout=element_layout, layout_bitwidth=layout_bitwidth, masked=masked, alignment=alignment], mut c_reg: SIMD[c_type, size])

arrive

static arrive()

commit_group

static commit_group()

wait_for_all

static wait_for_all()

store_result

static store_result(warp_group_tile: LayoutTensor[c_type, layout, rank, address_space=address_space, element_layout=element_layout, layout_bitwidth=layout_bitwidth, masked=masked, alignment=alignment], res_reg_tile: SIMD[c_type, _output_register_size[stdlib::utils::index::IndexList[{3}, Int()])

allocate_lhs

static allocate_lhs() -> LayoutTensor[a_type, _lhs_layout[stdlib::utils::index::IndexList[{3}, Int(), _lhs_layout[stdlib::utils::index::IndexList[{3}, Int().rank(), address_space=3]

allocate_rhs

static allocate_rhs() -> LayoutTensor[b_type, _rhs_layout[stdlib::utils::index::IndexList[{3}, Int(), _rhs_layout[stdlib::utils::index::IndexList[{3}, Int().rank(), address_space=3]

allocate_result

static allocate_result(initial_val: SIMD[c_type, 1]) -> SIMD[c_type, _output_register_size[stdlib::utils::index::IndexList[{3}, Int()]