Mojo module
matmul_dispatch_sm90
Aliases
DISPATCH_HIT
alias DISPATCH_HIT = 1
DISPATCH_MISS
alias DISPATCH_MISS = 0
llama_405b_fp8_list
alias llama_405b_fp8_list = List(TuningConfigSM90(64, 16384, 2048, IndexList(64, 128, 32, Tuple()), Index(64, 128, 128), UInt(8), Index(1, 1, 1), UInt(1), False, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem({128}), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(128, 16384, 2048, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(256, 16384, 2048, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(512, 16384, 2048, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(1024, 16384, 2048, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(Int(SIMD(max_or_inf[::DType]())), 16384, 2048, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(2, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem({8}), store_to_mem(cond(and(lt(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 0), ne(rem_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8), 0)), {value = add(div_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8), -1)}, {value = div_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8)}))), 0}), MatmulSchedule(1)), TuningConfigSM90(64, 2304, 16384, IndexList(64, 48, 32, Tuple()), Index(64, 48, 128), UInt(8), Index(1, 1, 1), UInt(1), False, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(128, 2304, 16384, IndexList(64, 48, 32, Tuple()), Index(64, 48, 128), UInt(8), Index(1, 1, 1), UInt(1), False, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(256, 2304, 16384, IndexList(64, 96, 32, Tuple()), Index(64, 96, 128), UInt(4), Index(1, 1, 1), UInt(1), False, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(512, 2304, 16384, IndexList(64, 144, 32, Tuple()), Index(128, 144, 128), UInt(4), Index(1, 1, 1), UInt(2), False, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(1024, 2304, 16384, IndexList(64, 144, 32, Tuple()), Index(128, 144, 128), UInt(4), Index(1, 1, 1), UInt(2), False, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(2048, 2304, 16384, IndexList(64, 144, 32, Tuple()), Index(128, 144, 128), UInt(4), Index(2, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem({16}), store_to_mem({8})), 0}), MatmulSchedule(1)), TuningConfigSM90(Int(SIMD(max_or_inf[::DType]())), 2304, 16384, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(2, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:i1 0, 1}), MatmulSchedule(1)), TuningConfigSM90(64, 13312, 16384, IndexList(64, 128, 32, Tuple()), Index(64, 128, 128), UInt(8), Index(1, 1, 1), UInt(1), False, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem({128}), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(128, 13312, 16384, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:i1 0, 1}), MatmulSchedule(-1)), TuningConfigSM90(256, 13312, 16384, IndexList(64, 208, 32, Tuple()), Index(128, 208, 128), UInt(4), Index(1, 2, 1), UInt(2), True, OptionalReg[IndexList[2]]({:i1 0, 1}), MatmulSchedule(-1)), TuningConfigSM90(512, 13312, 16384, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:i1 0, 1}), MatmulSchedule(-1)), TuningConfigSM90(1024, 13312, 16384, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:i1 0, 1}), MatmulSchedule(-1)), TuningConfigSM90(Int(SIMD(max_or_inf[::DType]())), 13312, 16384, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(2, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem({8}), store_to_mem(cond(and(lt(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 0), ne(rem_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8), 0)), {value = add(div_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8), -1)}, {value = div_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8)}))), 0}), MatmulSchedule(1)), TuningConfigSM90(64, 16384, 6656, IndexList(64, 128, 32, Tuple()), Index(64, 128, 128), UInt(8), Index(1, 1, 1), UInt(1), False, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem({128}), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(1024, 16384, 6656, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:i1 0, 1}), MatmulSchedule(-1)), TuningConfigSM90(Int(SIMD(max_or_inf[::DType]())), 16384, 6656, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(2, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem({8}), store_to_mem(cond(and(lt(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 0), ne(rem_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8), 0)), {value = add(div_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8), -1)}, {value = div_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8)}))), 0}), MatmulSchedule(1)), Tuple())
llama_405b_fp8_table
alias llama_405b_fp8_table = Table(List(TuningConfigSM90(64, 16384, 2048, IndexList(64, 128, 32, Tuple()), Index(64, 128, 128), UInt(8), Index(1, 1, 1), UInt(1), False, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem({128}), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(128, 16384, 2048, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(256, 16384, 2048, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(512, 16384, 2048, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(1024, 16384, 2048, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(Int(SIMD(max_or_inf[::DType]())), 16384, 2048, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(2, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem({8}), store_to_mem(cond(and(lt(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 0), ne(rem_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8), 0)), {value = add(div_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8), -1)}, {value = div_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8)}))), 0}), MatmulSchedule(1)), TuningConfigSM90(64, 2304, 16384, IndexList(64, 48, 32, Tuple()), Index(64, 48, 128), UInt(8), Index(1, 1, 1), UInt(1), False, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(128, 2304, 16384, IndexList(64, 48, 32, Tuple()), Index(64, 48, 128), UInt(8), Index(1, 1, 1), UInt(1), False, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(256, 2304, 16384, IndexList(64, 96, 32, Tuple()), Index(64, 96, 128), UInt(4), Index(1, 1, 1), UInt(1), False, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(512, 2304, 16384, IndexList(64, 144, 32, Tuple()), Index(128, 144, 128), UInt(4), Index(1, 1, 1), UInt(2), False, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(1024, 2304, 16384, IndexList(64, 144, 32, Tuple()), Index(128, 144, 128), UInt(4), Index(1, 1, 1), UInt(2), False, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem(#lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(2048, 2304, 16384, IndexList(64, 144, 32, Tuple()), Index(128, 144, 128), UInt(4), Index(2, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem({16}), store_to_mem({8})), 0}), MatmulSchedule(1)), TuningConfigSM90(Int(SIMD(max_or_inf[::DType]())), 2304, 16384, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(2, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:i1 0, 1}), MatmulSchedule(1)), TuningConfigSM90(64, 13312, 16384, IndexList(64, 128, 32, Tuple()), Index(64, 128, 128), UInt(8), Index(1, 1, 1), UInt(1), False, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem({128}), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(128, 13312, 16384, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:i1 0, 1}), MatmulSchedule(-1)), TuningConfigSM90(256, 13312, 16384, IndexList(64, 208, 32, Tuple()), Index(128, 208, 128), UInt(4), Index(1, 2, 1), UInt(2), True, OptionalReg[IndexList[2]]({:i1 0, 1}), MatmulSchedule(-1)), TuningConfigSM90(512, 13312, 16384, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:i1 0, 1}), MatmulSchedule(-1)), TuningConfigSM90(1024, 13312, 16384, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:i1 0, 1}), MatmulSchedule(-1)), TuningConfigSM90(Int(SIMD(max_or_inf[::DType]())), 13312, 16384, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(2, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem({8}), store_to_mem(cond(and(lt(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 0), ne(rem_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8), 0)), {value = add(div_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8), -1)}, {value = div_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8)}))), 0}), MatmulSchedule(1)), TuningConfigSM90(64, 16384, 6656, IndexList(64, 128, 32, Tuple()), Index(64, 128, 128), UInt(8), Index(1, 1, 1), UInt(1), False, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem({128}), store_to_mem({1})), 0}), MatmulSchedule(2)), TuningConfigSM90(1024, 16384, 6656, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:i1 0, 1}), MatmulSchedule(-1)), TuningConfigSM90(Int(SIMD(max_or_inf[::DType]())), 16384, 6656, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(4), Index(2, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem({8}), store_to_mem(cond(and(lt(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 0), ne(rem_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8), 0)), {value = add(div_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8), -1)}, {value = div_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8)}))), 0}), MatmulSchedule(1)), Tuple()), "llama_405b_fp8")
llama_8b_fp8_list
alias llama_8b_fp8_list = List(TuningConfigSM90(128, -1, -1, IndexList(64, 128, 32, Tuple()), Index(64, 128, 128), UInt(8), Index(1, 1, 1), UInt(1), True, OptionalReg[IndexList[2]]({:i1 0, 1}), MatmulSchedule(-1)), TuningConfigSM90(1024, -1, -1, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(6), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:i1 0, 1}), MatmulSchedule(-1)), TuningConfigSM90(Int(SIMD(max_or_inf[::DType]())), -1, -1, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(6), Index(2, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem({8}), store_to_mem(cond(and(lt(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 0), ne(rem_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8), 0)), {value = add(div_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8), -1)}, {value = div_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8)}))), 0}), MatmulSchedule(1)), Tuple())
llama_8b_fp8_table
alias llama_8b_fp8_table = Table(List(TuningConfigSM90(128, -1, -1, IndexList(64, 128, 32, Tuple()), Index(64, 128, 128), UInt(8), Index(1, 1, 1), UInt(1), True, OptionalReg[IndexList[2]]({:i1 0, 1}), MatmulSchedule(-1)), TuningConfigSM90(1024, -1, -1, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(6), Index(1, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:i1 0, 1}), MatmulSchedule(-1)), TuningConfigSM90(Int(SIMD(max_or_inf[::DType]())), -1, -1, IndexList(64, 128, 32, Tuple()), Index(128, 128, 128), UInt(6), Index(2, 1, 1), UInt(2), True, OptionalReg[IndexList[2]]({:@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}> apply(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm #lit.comptime.origin> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> rebind(:!lit.generator<[2]("x": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,0]> read_mem, "y": !lit.ref<@stdlib::@builtin::@int::@Int, imm *[0,1]> read_mem) -> !lit.struct<@stdlib::@utils::@index::@IndexList<:@stdlib::@builtin::@int::@Int {2}, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>>> @stdlib::@utils::@index::@"Index[::Intable,::Intable,::DType]($0,$1)"<:trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :trait<@stdlib::@builtin::@int::@Intable> @stdlib::@builtin::@int::@Int, :@stdlib::@builtin::@dtype::@DType {:dtype si64}>), store_to_mem({8}), store_to_mem(cond(and(lt(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 0), ne(rem_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8), 0)), {value = add(div_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8), -1)}, {value = div_s(#lit.struct.extract<:@stdlib::@builtin::@int::@Int #lit.struct.extract<:@stdlib::@gpu::@host::@info::@GPUInfo apply(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut #lit.comptime.origin> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> rebind(:!lit.generator<[1]("name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "vendor": !lit.ref<@stdlib::@gpu::@host::@info::@Vendor, mut *[0,0]> owned_in_mem, "api": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "arch_name": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "compute": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>, "version": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "sm_count": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_sm": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_warp": !lit.struct<@stdlib::@builtin::@int::@Int>, "warps_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "threads_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "thread_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_file_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "register_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "allocation_granularity": !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>, "max_registers_per_thread": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_registers_per_block": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_blocks_per_multiprocessor": !lit.struct<@stdlib::@builtin::@int::@Int>, "shared_memory_allocation_unit_size": !lit.struct<@stdlib::@builtin::@int::@Int>, "warp_allocation_granularity": !lit.struct<@stdlib::@builtin::@int::@Int>, "max_thread_block_size": !lit.struct<@stdlib::@builtin::@int::@Int>) -> !lit.struct<@stdlib::@gpu::@host::@info::@GPUInfo>> @stdlib::@gpu::@host::@info::@GPUInfo::@"__init__(::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Vendor,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::SIMD[::DType(float32), ::Int(1)],::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::Int,::StringSlice[::Bool(False), ::Origin[::Bool(False)](StaticConstantOrigin)],::Int,::Int,::Int,::Int,::Int,::Int)"), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "H100">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "H100">, *?), rebind(:!lit.ref<@stdlib::@gpu::@host::@info::@Vendor, imm #lit.comptime.origin> store_to_mem(apply(:!lit.generator<("_value": !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>) -> !lit.struct<@stdlib::@gpu::@host::@info::@Vendor>> @stdlib::@gpu::@host::@info::@Vendor::@"__init__(::SIMD[::DType(int8), ::Int(1)])", apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@int_literal::@IntLiteral<:!pop.int_literal 2>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.int_literal](::IntLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype si8}, :@stdlib::@builtin::@int::@Int {1}, :!pop.int_literal 2>, *?)))), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "cuda">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "cuda">, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "hopper">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "hopper">, *?), apply(:!lit.generator<("value": !lit.struct<@stdlib::@builtin::@float_literal::@FloatLiteral<:!pop.float_literal #pop.float_literal<9|1>>>, |) -> !lit.struct<@stdlib::@builtin::@simd::@SIMD<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}>>> @stdlib::@builtin::@simd::@SIMD::@"__init__[__mlir_type.!pop.float_literal](::FloatLiteral[$2])"<:@stdlib::@builtin::@dtype::@DType {:dtype f32}, :@stdlib::@builtin::@int::@Int {1}, :!pop.float_literal #pop.float_literal<9|1>>, *?), apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "sm_90a">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "sm_90a">, *?), {132}, {32}, {2048}, {32}, {64}, {2048}, {32}, {233472}, {65536}, {256}, apply(:!lit.generator<("lit": !lit.struct<@stdlib::@builtin::@string_literal::@StringLiteral<:string "warp">>) -> !lit.struct<@stdlib::@collections::@string::@string_slice::@StringSlice<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}>>> @stdlib::@collections::@string::@string_slice::@StringSlice::@"__init__[__mlir_type.!kgen.string](::StringLiteral[$2])"<:@stdlib::@builtin::@bool::@Bool {:i1 0}, :@stdlib::@builtin::@type_aliases::@Origin<:@stdlib::@builtin::@bool::@Bool {:i1 0}> {_mlir_origin: origin<0> = #lit.origin.field<#lit.static.origin : !lit.origin<0>, "__constants__">}, :string "warp">, *?), {255}, {65536}, {32}, {128}, {4}, {1024}), "sm_count">, "value">, 8)}))), 0}), MatmulSchedule(1)), Tuple()), "llama_8b_fp8")
MAX_M
alias MAX_M = Int(SIMD(max_or_inf[::DType]()))
Structs
Functions
Was this page helpful?
Thank you! We'll create more content like this.
Thank you for helping us improve!