ergo
MM_kernel_inner_sse2_A< T_real, T_reg, T_M, T_N, T_K >::Pack< T_rows, T_cols, T_ordering_kernel, T_repetitions > Class Template Reference

Template for packing of matrix elements. More...

#include <mm_kernel_inner_sse2_A.h>

Classes

struct  Assign_to_packed
 
struct  Extract_from_packed
 

Public Types

typedef real real
 Real number type (usually float or double)
 
typedef Pack< M, K, Ordering_col_wise, 1 > Pack_type_A
 Type that can (should) be used to pack A.
 
typedef Pack< K, N, Ordering_row_wise, floats_per_registerPack_type_B
 Type that can (should) be used to pack B.
 
typedef Pack< M, N, Ordering_col_wise, 1 > Pack_type_C
 Type that can (should) be used to pack C.
 

Static Public Member Functions

template<template< typename T_ordering > class T_assign, typename T_ordering_matrix>
static void exec (typename T_assign< T_ordering_matrix >::PtrType X, typename T_assign< T_ordering_matrix >::PtrTypePacked X_packed, int const rows_total_matrix, int const cols_total_matrix)
 
template<typename T_ordering_matrix>
static void pack (real const *const X, real *X_packed, int const rows_total_matrix, int const cols_total_matrix)
 Convenience function for assignments to packed matrix.
 
template<typename T_ordering_matrix>
static void unpack (real *X, real const *const X_packed, int const rows_total_matrix, int const cols_total_matrix)
 Convenience function for extracting matrix from packed matrix.
 
static void exec (real const *const *const A, real const *const *const B, real *const C, int const i=1, int const offset_A=0, int const offset_B=0, int const offset_C=0)
 Executes the matrix-matrix multiply C += A B with the three matrices A, B, and C stored according to the static members and typedefs of this class.
 
static void exec (real const *const *const A, real const *const *const B, real *const C, int const i=1)
 

Static Public Attributes

static int const size_packed = T_rows * T_cols * T_repetitions
 
static int const rows = T_rows
 
static int const cols = T_cols
 
static int const M
 Number of rows of A and C.
 
static int const N
 Number of columns of B and C.
 
static int const K
 Number of columns of A and rows of B.
 

Static Protected Attributes

static int const floats_per_register
 Number of real numbers that fit in one register.
 

Detailed Description

template<typename T_real, typename T_reg, int T_M, int T_N, int T_K>
template<int T_rows, int T_cols, typename T_ordering_kernel, int T_repetitions>
class MM_kernel_inner_sse2_A< T_real, T_reg, T_M, T_N, T_K >::Pack< T_rows, T_cols, T_ordering_kernel, T_repetitions >

Template for packing of matrix elements.

Class template for packing of matrix elements prior to matrix-matrix multiply.

Member Typedef Documentation

◆ Pack_type_A

typedef Pack< M, K, Ordering_col_wise, 1 > MM_kernel_inner_sse2_A< real, T_reg, T_M, T_N, T_K >::Pack_type_A

Type that can (should) be used to pack A.

◆ Pack_type_B

typedef Pack< K, N, Ordering_row_wise, floats_per_register > MM_kernel_inner_sse2_A< real, T_reg, T_M, T_N, T_K >::Pack_type_B

Type that can (should) be used to pack B.

◆ Pack_type_C

typedef Pack< M, N, Ordering_col_wise, 1 > MM_kernel_inner_sse2_A< real, T_reg, T_M, T_N, T_K >::Pack_type_C

Type that can (should) be used to pack C.

◆ real

typedef real MM_kernel_inner_sse2_A< real, T_reg, T_M, T_N, T_K >::real

Real number type (usually float or double)

Member Function Documentation

◆ exec() [1/3]

void MM_kernel_inner_sse2_A< real, T_reg, T_M, T_N, T_K >::exec ( real const *const *const A,
real const *const *const B,
real *const C,
int const i = 1 )
static

◆ exec() [2/3]

void MM_kernel_inner_sse2_A< real, T_reg, T_M, T_N, T_K >::exec ( real const *const *const A,
real const *const *const B,
real *const C,
int const i = 1,
int const offset_A = 0,
int const offset_B = 0,
int const offset_C = 0 )
static

Executes the matrix-matrix multiply C += A B with the three matrices A, B, and C stored according to the static members and typedefs of this class.

◆ exec() [3/3]

template<typename T_real, typename T_reg, int T_M, int T_N, int T_K>
template<int T_rows, int T_cols, typename T_ordering_kernel, int T_repetitions>
template<template< typename T_ordering > class T_assign, typename T_ordering_matrix>
static void MM_kernel_inner_sse2_A< T_real, T_reg, T_M, T_N, T_K >::Pack< T_rows, T_cols, T_ordering_kernel, T_repetitions >::exec ( typename T_assign< T_ordering_matrix >::PtrType X,
typename T_assign< T_ordering_matrix >::PtrTypePacked X_packed,
int const rows_total_matrix,
int const cols_total_matrix )
inlinestatic

◆ pack()

template<typename T_real, typename T_reg, int T_M, int T_N, int T_K>
template<int T_rows, int T_cols, typename T_ordering_kernel, int T_repetitions>
template<typename T_ordering_matrix>
static void MM_kernel_inner_sse2_A< T_real, T_reg, T_M, T_N, T_K >::Pack< T_rows, T_cols, T_ordering_kernel, T_repetitions >::pack ( real const *const X,
real * X_packed,
int const rows_total_matrix,
int const cols_total_matrix )
inlinestatic

Convenience function for assignments to packed matrix.

The template argument specifies how the original (unpacked) matrix is stored (e.g. column- or rowwise)

◆ unpack()

template<typename T_real, typename T_reg, int T_M, int T_N, int T_K>
template<int T_rows, int T_cols, typename T_ordering_kernel, int T_repetitions>
template<typename T_ordering_matrix>
static void MM_kernel_inner_sse2_A< T_real, T_reg, T_M, T_N, T_K >::Pack< T_rows, T_cols, T_ordering_kernel, T_repetitions >::unpack ( real * X,
real const *const X_packed,
int const rows_total_matrix,
int const cols_total_matrix )
inlinestatic

Convenience function for extracting matrix from packed matrix.

The template argument specifies how the unpacked matrix is stored (e.g. column- or rowwise)

Member Data Documentation

◆ cols

template<typename T_real, typename T_reg, int T_M, int T_N, int T_K>
template<int T_rows, int T_cols, typename T_ordering_kernel, int T_repetitions>
int const MM_kernel_inner_sse2_A< T_real, T_reg, T_M, T_N, T_K >::Pack< T_rows, T_cols, T_ordering_kernel, T_repetitions >::cols = T_cols
static

◆ floats_per_register

int const MM_kernel_inner_sse2_A< real, T_reg, T_M, T_N, T_K >::floats_per_register
staticprotected

Number of real numbers that fit in one register.

◆ K

int const MM_kernel_inner_sse2_A< real, T_reg, T_M, T_N, T_K >::K
static

Number of columns of A and rows of B.

◆ M

int const MM_kernel_inner_sse2_A< real, T_reg, T_M, T_N, T_K >::M
static

Number of rows of A and C.

◆ N

int const MM_kernel_inner_sse2_A< real, T_reg, T_M, T_N, T_K >::N
static

Number of columns of B and C.

◆ rows

template<typename T_real, typename T_reg, int T_M, int T_N, int T_K>
template<int T_rows, int T_cols, typename T_ordering_kernel, int T_repetitions>
int const MM_kernel_inner_sse2_A< T_real, T_reg, T_M, T_N, T_K >::Pack< T_rows, T_cols, T_ordering_kernel, T_repetitions >::rows = T_rows
static

◆ size_packed

template<typename T_real, typename T_reg, int T_M, int T_N, int T_K>
template<int T_rows, int T_cols, typename T_ordering_kernel, int T_repetitions>
int const MM_kernel_inner_sse2_A< T_real, T_reg, T_M, T_N, T_K >::Pack< T_rows, T_cols, T_ordering_kernel, T_repetitions >::size_packed = T_rows * T_cols * T_repetitions
static

The documentation for this class was generated from the following file: