ergo
|
Template for matrix matrix multiplication that wraps around a kernel given as template argument. More...
#include <mm_kernel_outer_A.h>
Classes | |
class | Pack |
Template for for translations between unpacked and packed matrix storage. More... | |
Public Types | |
typedef T_gemm_kernel::real | real |
Real number type (usually float or double) More... | |
typedef Ordering_col_wise | Ordering_block_A |
typedef Ordering_col_wise | Ordering_block_B |
typedef Ordering_col_wise | Ordering_block_C |
typedef Pack< M_block, K_block, Ordering_block_A, typename T_gemm_kernel::Pack_type_A > | Pack_type_A |
typedef Pack< K_block, N_block, Ordering_block_B, typename T_gemm_kernel::Pack_type_B > | Pack_type_B |
typedef Pack< M_block, N_block, Ordering_block_C, typename T_gemm_kernel::Pack_type_C > | Pack_type_C |
Static Public Member Functions | |
static void | exec (real const *const *const A, real const *const *const B, real *const C, int const i=1) |
Executes the matrix-matrix multiply C += A B with the three matrices A, B, and C stored using the packing types of this class. More... | |
Static Public Attributes | |
static int const | M_kernel = T_gemm_kernel::M |
Number of rows of A and C kernels. More... | |
static int const | N_kernel = T_gemm_kernel::N |
Number of columns of B and C kernels. More... | |
static int const | K_kernel = T_gemm_kernel::K |
Number of columns of A kernels and rows of B kernels. More... | |
static int const | M_block = T_M_block |
Number of rows of A and C (blocks). More... | |
static int const | N_block = T_N_block |
Number of columns of B and C (blocks). More... | |
static int const | K_block = 1 |
Number of columns of A and rows of B (blocks). More... | |
static int const | M = M_kernel * M_block |
Number of rows of A and C. More... | |
static int const | N = N_kernel * N_block |
Number of columns of B and C. More... | |
static int const | K = K_kernel * K_block |
Number of columns of A and rows of B. More... | |
Template for matrix matrix multiplication that wraps around a kernel given as template argument.
The idea is that the inner kernel should be fully unrolled and block for registers.
typedef Ordering_col_wise MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Ordering_block_A |
typedef Ordering_col_wise MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Ordering_block_B |
typedef Ordering_col_wise MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Ordering_block_C |
typedef Pack< M_block, K_block, Ordering_block_A, typename T_gemm_kernel::Pack_type_A > MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Pack_type_A |
typedef Pack< K_block, N_block, Ordering_block_B, typename T_gemm_kernel::Pack_type_B > MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Pack_type_B |
typedef Pack< M_block, N_block, Ordering_block_C, typename T_gemm_kernel::Pack_type_C > MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Pack_type_C |
typedef T_gemm_kernel::real MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::real |
Real number type (usually float or double)
|
static |
|
static |
Number of columns of A and rows of B.
|
static |
Number of columns of A and rows of B (blocks).
|
static |
Number of columns of A kernels and rows of B kernels.
|
static |
Number of rows of A and C.
|
static |
Number of rows of A and C (blocks).
|
static |
Number of rows of A and C kernels.
|
static |
Number of columns of B and C.
|
static |
Number of columns of B and C (blocks).
|
static |
Number of columns of B and C kernels.