Generalized matrix matrix multiplication using SSE intrinsics.
More...
#include <stdexcept>
#include "mm_kernel_inner_sse2_A.h"
#include "mm_kernel_outer_A.h"
Go to the source code of this file.
|
template<typename real , typename regType , int m_kernel, int n_kernel, int k_kernel, int m_block, int n_block> |
static void | gemm_sse (real const *const A, real const *const B, real *C, size_t const m, size_t const n, size_t const k, real *A_packed, real *B_packed, real *C_packed, size_t const ap_size, size_t const bp_size, size_t const cp_size) |
|
template<typename real > |
static void | gemm_sse (real const *const A, real const *const B, real *C, size_t const m, size_t const n, size_t const k, real *A_packed, real *B_packed, real *C_packed, size_t const ap_size, size_t const bp_size, size_t const cp_size) |
|
template<> |
void | gemm_sse (double const *const A, double const *const B, double *C, size_t const m, size_t const n, size_t const k, double *A_packed, double *B_packed, double *C_packed, size_t const ap_size, size_t const bp_size, size_t const cp_size) |
|
template<> |
void | gemm_sse (float const *const A, float const *const B, float *C, size_t const m, size_t const n, size_t const k, float *A_packed, float *B_packed, float *C_packed, size_t const ap_size, size_t const bp_size, size_t const cp_size) |
|
Generalized matrix matrix multiplication using SSE intrinsics.
- Author
- Emanuel H. Rubensson
- Date
- 2009
◆ gemm_sse() [1/4]
template<>
void gemm_sse |
( |
double const *const |
A, |
|
|
double const *const |
B, |
|
|
double * |
C, |
|
|
size_t const |
m, |
|
|
size_t const |
n, |
|
|
size_t const |
k, |
|
|
double * |
A_packed, |
|
|
double * |
B_packed, |
|
|
double * |
C_packed, |
|
|
size_t const |
ap_size, |
|
|
size_t const |
bp_size, |
|
|
size_t const |
cp_size |
|
) |
| |
◆ gemm_sse() [2/4]
template<>
void gemm_sse |
( |
float const *const |
A, |
|
|
float const *const |
B, |
|
|
float * |
C, |
|
|
size_t const |
m, |
|
|
size_t const |
n, |
|
|
size_t const |
k, |
|
|
float * |
A_packed, |
|
|
float * |
B_packed, |
|
|
float * |
C_packed, |
|
|
size_t const |
ap_size, |
|
|
size_t const |
bp_size, |
|
|
size_t const |
cp_size |
|
) |
| |
◆ gemm_sse() [3/4]
template<typename real , typename regType , int m_kernel, int n_kernel, int k_kernel, int m_block, int n_block>
static void gemm_sse |
( |
real const *const |
A, |
|
|
real const *const |
B, |
|
|
real * |
C, |
|
|
size_t const |
m, |
|
|
size_t const |
n, |
|
|
size_t const |
k, |
|
|
real * |
A_packed, |
|
|
real * |
B_packed, |
|
|
real * |
C_packed, |
|
|
size_t const |
ap_size, |
|
|
size_t const |
bp_size, |
|
|
size_t const |
cp_size |
|
) |
| |
|
static |
◆ gemm_sse() [4/4]
template<typename real >
static void gemm_sse |
( |
real const *const |
A, |
|
|
real const *const |
B, |
|
|
real * |
C, |
|
|
size_t const |
m, |
|
|
size_t const |
n, |
|
|
size_t const |
k, |
|
|
real * |
A_packed, |
|
|
real * |
B_packed, |
|
|
real * |
C_packed, |
|
|
size_t const |
ap_size, |
|
|
size_t const |
bp_size, |
|
|
size_t const |
cp_size |
|
) |
| |
|
static |