Mojo struct
TMemAccumulator
@register_passable(trivial)
struct TMemAccumulator[dtype_: DType, MMA_M: Int, MMA_N: Int, num_m_mmas: Int, num_n_mmas: Int, num_softmax_threads: Int]
Fields
- tmem_addr (
UInt32
):
Implemented traits
AccumulatorTile
,
AnyType
,
Copyable
,
ImplicitlyCopyable
,
Movable
,
UnknownDestructibility
Aliases
__copyinit__is_trivial
alias __copyinit__is_trivial = True
__del__is_trivial
alias __del__is_trivial = True
__moveinit__is_trivial
alias __moveinit__is_trivial = True
dtype
alias dtype = dtype_
element_layout
alias element_layout = Layout.row_major(1, 2)
frag_size
alias frag_size = 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)
layout_t
alias layout_t = RegisterAccumulatorLayout[MMA_M, MMA_N, num_m_mmas, num_n_mmas, num_softmax_threads]
rows_of_frags_layout
alias rows_of_frags_layout = Layout.row_major((num_m_mmas * num_n_mmas), 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value))
vec_output_layout
alias vec_output_layout = Layout.__init__(IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, num_m_mmas), IntTuple[__origin_of()](((div_s 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)._mlir_value, 4) + -1) if ((0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) < 0) & (((rem_s 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)._mlir_value, 4) == 0) ^ True)) else (div_s 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)._mlir_value, 4), num_n_mmas), Tuple[]()), IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)), IntTuple[__origin_of()](4, (0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) * num_m_mmas)), Tuple[]()))
Methods
__init__
__init__(tmem_addr: UInt32) -> Self
__getitem__
__getitem__(self, i: UInt32) -> Self
check_constraints
static check_constraints()
offset
rows_of_frags
static rows_of_frags(src: LayoutTensor[dtype_, Layout.__init__(IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, num_m_mmas), IntTuple[__origin_of()](((div_s 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)._mlir_value, 4) + -1) if ((0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) < 0) & (((rem_s 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)._mlir_value, 4) == 0) ^ True)) else (div_s 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)._mlir_value, 4), num_n_mmas), Tuple[]()), IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)), IntTuple[__origin_of()](4, (0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) * num_m_mmas)), Tuple[]())), MutableAnyOrigin, address_space=AddressSpace(5), element_layout=Layout.row_major(1, 2)]) -> LayoutTensor[dtype_, Layout.row_major((num_m_mmas * num_n_mmas), 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)), MutableAnyOrigin, address_space=AddressSpace(5)]
Returns:
allocate_register_tile
static allocate_register_tile() -> LayoutTensor[dtype_, Layout.__init__(IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, num_m_mmas), IntTuple[__origin_of()](((div_s 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)._mlir_value, 4) + -1) if ((0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) < 0) & (((rem_s 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)._mlir_value, 4) == 0) ^ True)) else (div_s 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)._mlir_value, 4), num_n_mmas), Tuple[]()), IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)), IntTuple[__origin_of()](4, (0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) * num_m_mmas)), Tuple[]())), MutableAnyOrigin, address_space=AddressSpace(5), element_layout=Layout.row_major(1, 2)]
Returns:
copy_from
copy_from(self, src: LayoutTensor[dtype_, Layout.__init__(IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, num_m_mmas), IntTuple[__origin_of()](((div_s 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)._mlir_value, 4) + -1) if ((0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) < 0) & (((rem_s 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)._mlir_value, 4) == 0) ^ True)) else (div_s 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)._mlir_value, 4), num_n_mmas), Tuple[]()), IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)), IntTuple[__origin_of()](4, (0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) * num_m_mmas)), Tuple[]())), MutableAnyOrigin, address_space=AddressSpace(5), element_layout=Layout.row_major(1, 2)])
copy_to
copy_to(self, dst: LayoutTensor[dtype_, Layout.__init__(IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, num_m_mmas), IntTuple[__origin_of()](((div_s 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)._mlir_value, 4) + -1) if ((0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) < 0) & (((rem_s 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)._mlir_value, 4) == 0) ^ True)) else (div_s 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)._mlir_value, 4), num_n_mmas), Tuple[]()), IntTuple.__init__[__origin_of()](IntTuple[__origin_of()](2, 0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value)), IntTuple[__origin_of()](4, (0 if (num_softmax_threads == 0) else ((div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) + -1) if ((((rem_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) == 0) ^ True) & (((MMA_M * MMA_N) < 0) ^ (num_softmax_threads < 0))) else (div_s (MMA_M * MMA_N), 1 if (num_softmax_threads == 0) else num_softmax_threads._mlir_value) * num_m_mmas)), Tuple[]())), MutableAnyOrigin, address_space=AddressSpace(5), element_layout=Layout.row_major(1, 2)])
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!