[general] name = "layer_norm" universal = false [torch] src = [ "torch-ext/torch_binding.cpp", "torch-ext/torch_binding.h", ] [kernel.layer_norm] depends = ["torch"] backend = "cuda" cuda-capabilities = [ "8.0", "8.9", "9.0", "10.0", "12.0", ] include = ["."] src = [ "layer_norm/ln.h", "layer_norm/ln_api.cpp", "layer_norm/ln_bwd_1024.cu", "layer_norm/ln_bwd_1280.cu", "layer_norm/ln_bwd_1536.cu", "layer_norm/ln_bwd_2048.cu", "layer_norm/ln_bwd_256.cu", "layer_norm/ln_bwd_2560.cu", "layer_norm/ln_bwd_3072.cu", "layer_norm/ln_bwd_4096.cu", "layer_norm/ln_bwd_512.cu", "layer_norm/ln_bwd_5120.cu", "layer_norm/ln_bwd_6144.cu", "layer_norm/ln_bwd_7168.cu", "layer_norm/ln_bwd_768.cu", "layer_norm/ln_bwd_8192.cu", "layer_norm/ln_bwd_kernels.cuh", "layer_norm/ln_fwd_1024.cu", "layer_norm/ln_fwd_1280.cu", "layer_norm/ln_fwd_1536.cu", "layer_norm/ln_fwd_2048.cu", "layer_norm/ln_fwd_256.cu", "layer_norm/ln_fwd_2560.cu", "layer_norm/ln_fwd_3072.cu", "layer_norm/ln_fwd_4096.cu", "layer_norm/ln_fwd_512.cu", "layer_norm/ln_fwd_5120.cu", "layer_norm/ln_fwd_6144.cu", "layer_norm/ln_fwd_7168.cu", "layer_norm/ln_fwd_768.cu", "layer_norm/ln_fwd_8192.cu", "layer_norm/ln_fwd_kernels.cuh", "layer_norm/ln_kernel_traits.h", "layer_norm/ln_parallel_bwd_1024.cu", "layer_norm/ln_parallel_bwd_1280.cu", "layer_norm/ln_parallel_bwd_1536.cu", "layer_norm/ln_parallel_bwd_2048.cu", "layer_norm/ln_parallel_bwd_256.cu", "layer_norm/ln_parallel_bwd_2560.cu", "layer_norm/ln_parallel_bwd_3072.cu", "layer_norm/ln_parallel_bwd_4096.cu", "layer_norm/ln_parallel_bwd_512.cu", "layer_norm/ln_parallel_bwd_5120.cu", "layer_norm/ln_parallel_bwd_6144.cu", "layer_norm/ln_parallel_bwd_7168.cu", "layer_norm/ln_parallel_bwd_768.cu", "layer_norm/ln_parallel_bwd_8192.cu", "layer_norm/ln_parallel_fwd_1024.cu", "layer_norm/ln_parallel_fwd_1280.cu", "layer_norm/ln_parallel_fwd_1536.cu", "layer_norm/ln_parallel_fwd_2048.cu", "layer_norm/ln_parallel_fwd_256.cu", "layer_norm/ln_parallel_fwd_2560.cu", "layer_norm/ln_parallel_fwd_3072.cu", "layer_norm/ln_parallel_fwd_4096.cu", "layer_norm/ln_parallel_fwd_512.cu", "layer_norm/ln_parallel_fwd_5120.cu", "layer_norm/ln_parallel_fwd_6144.cu", "layer_norm/ln_parallel_fwd_7168.cu", "layer_norm/ln_parallel_fwd_768.cu", "layer_norm/ln_parallel_fwd_8192.cu", "layer_norm/ln_parallel_residual_bwd_kernels.cuh", "layer_norm/ln_parallel_residual_fwd_kernels.cuh", "layer_norm/ln_utils.cuh", "layer_norm/static_switch.h" ] cxx-flags = ["-DFLASHATTENTION_DISABLE_PYBIND", "-mcmodel=large"] cuda-flags = [ "-O3", "-U__CUDA_NO_HALF_OPERATORS__", "-U__CUDA_NO_HALF_CONVERSIONS__", "-U__CUDA_NO_BFLOAT16_OPERATORS__", "-U__CUDA_NO_BFLOAT16_CONVERSIONS__", "-U__CUDA_NO_BFLOAT162_OPERATORS__", "-U__CUDA_NO_BFLOAT162_CONVERSIONS__", "--expt-relaxed-constexpr", "--expt-extended-lambda", "--use_fast_math", ]