Lesson of 16 February 2010¶

Some simple C/C++ programs
Simple macro examples
Example of FORTRAN linkage
/*
  I am a comment
 */
 
// include the standard definition for I/O
#include <iostream>
// include the definition I/O manipulation command like setw, oct etc.
#include <iomanip>

// load the standard namespace
using namespace std ;

// define the fortran name as c name + underscore
#define F77NAME(A) A##_
    
typedef double valueType ;

/* prototype of BLAS dgemm routine (fortran linkage) */

extern "C" // telling the compiler that dgemm_ has C linkage convection
void
F77NAME(dgemm)( char   const   TRANSA[], // pass a string address
                char   const   TRANSB[], // pass a string address
                int    const * M,        // pass the address of the integer N
                int    const * N,        // pass the address of the integer M
                int    const * K,        // pass the address of the integer K
                double const * ALPHA,    // pass the address of the double ALPHA
                double const   A[],      // pass the address of the vector/matrix A
                int    const * LDA,      // pass the address of the row dimension of A
                double const   B[],      // pass the address of the vector/matrix A
                int    const * LDB,      // pass the address of the row dimension of B
                double const * BETA,     // pass the address of the double BETA
                double         C[],      // pass the address of the row dimension of C
                int    const * LDC ) ;   // pass the address of the vector/matrix A

extern "C" // telling the compiler that sgemm_ has C linkage convection
void
F77NAME(sgemm)( char   const   TRANSA[],
                char   const   TRANSB[],
                int    const * M,
                int    const * N,
                int    const * K,
                float  const * ALPHA,
                float  const   A[],
                int    const * LDA,
                float  const   B[],
                int    const * LDB,
                float  const * BETA,
                float         C[],
                int    const * LDC ) ;
/*
  FORTRAN do not support overloading.
  We use inline + C++ overloading to simulate it.
  In particular we define two routine "gemm" with 
  float and double argument respectively.
  The use of inline results in a DIRECT call to the fortran 
  routine. No penalty in calling fotran routine!.
 */

inline
void
gemm( char   const TRANSA[],
      char   const TRANSB[],
      int    const M,
      int    const N,
      int    const K,
      float  const ALPHA,
      float  const A[],
      int    const LDA,
      float  const B[],
      int    const LDB,
      float  const BETA,
      float        C[],
      int    const LDC ) {
  F77NAME(sgemm)( TRANSA, TRANSB, &M, &N, &K, &ALPHA, A, &LDA, B, &LDB, &BETA, C, &LDC ) ;
}

inline
void
gemm( char   const TRANSA[],
      char   const TRANSB[],
      int    const M,
      int    const N,
      int    const K,
      double const ALPHA,
      double const A[],
      int    const LDA,
      double const B[],
      int    const LDB,
      double const BETA,
      double        C[],
      int    const LDC ) {
  F77NAME(dgemm)( TRANSA, TRANSB, &M, &N, &K, &ALPHA, A, &LDA, B, &LDB, &BETA, C, &LDC ) ;
}

int // the type of return value
main() { // brackets contains the bosy of the program
  
  // define matrix A and initialize  
  valueType A[3][3] = { {1, 2, 3},
                        {4, 5, 6},
                        {7, 8, 9} } ;

  // define matrix B and initialize  
  valueType B[3][2] = { {1, 2},
                        {3, 4},
                        {5, 6} } ;

  // define matrix C and DO NOT initialize  
  valueType C[10][10] ;

  /* Due to the fact that C matrix are store by ROW and not by COLUMN
     they are "seen" as transposed by the fortran routine.
     Moreover (AB)^T = B^T A^T so that the 
     product of AB in C row major order is the 
     product BA in column major order!. */

  // alpha * A * B + beta * C --> C
  gemm( "N",
        "N",
        2,
        3,
        3,
        1, 
        (valueType*)B, 2, // perform casting of the pointer to make compiler happy
        (valueType*)A, 3, // perform casting of the pointer to make compiler happy
        0,
        (valueType*)C, // perform casting of the pointer to make compiler happy
        10 ) ;

  // display the result
  for ( int i = 0 ; i < 3 ; ++i ) {
    for ( int j = 0 ; j < 2 ; ++j ) {
      cout << setw(10) << C[i][j] << " " ;
    }
    cout << '\n' ;
  }

  return 0 ; // return 0 to the OS
}

/*

  Example of the assembler of the generated dgemm call
  
  icc -S -fsource-asm -fcode-asm

  ;;;   gemm( "N",
  ;;;         "N",
  ;;;         2,
  ;;;         3,
  ;;;         3,
  ;;;         1, 
  ;;;         (valueType*)B, 2, // perform casting of the pointer to make compiler happy
  ;;;         (valueType*)A, 3, // perform casting of the pointer to make compiler happy
  ;;;         0,
  ;;;         (valueType*)C, // perform casting of the pointer to make compiler happy

        movl      $2, %ecx                                      #example7.cc:119.3
        movl      $3, %r8d                                      #example7.cc:119.3
        lea       928(%rsp), %r9                                #example7.cc:125.21
        movq      %r11, 952(%rsp)                               #example7.cc:105.21
        lea       1088(%rsp), %rax                              #example7.cc:119.3
        lea       976(%rsp), %r10                               #example7.cc:126.21
        lea       1096(%rsp), %r11                              #example7.cc:119.3
        movq      %rdi, 960(%rsp)                               #example7.cc:105.21
        lea       1056(%rsp), %rdi                              #example7.cc:119.3
        movq      %rsi, 968(%rsp)                               #example7.cc:105.21
        lea       128(%rsp), %rsi                               #example7.cc:128.21
        movl      %ecx, 1064(%rsp)                              #example7.cc:119.3
        movl      %r8d, 1072(%rsp)                              #example7.cc:119.3
        movl      %r8d, 1080(%rsp)                              #example7.cc:119.3
        movq      %rdx, 1048(%rsp)                              #example7.cc:119.3
        lea       1104(%rsp), %rdx                              #example7.cc:119.3
        movl      %ecx, 1088(%rsp)                              #example7.cc:119.3
        lea       1072(%rsp), %rcx                              #example7.cc:119.3
        movl      %r8d, 1096(%rsp)                              #example7.cc:119.3
        lea       1080(%rsp), %r8                               #example7.cc:119.3
        movq      $0, 1056(%rsp)                                #example7.cc:119.3
        movl      $10, 1104(%rsp)                               #example7.cc:119.3
        movq      %r9, (%rsp)                                   #example7.cc:125.21
        lea       1048(%rsp), %r9                               #example7.cc:119.3
        movq      %rax, 8(%rsp)                                 #example7.cc:119.3
        movq      %r10, 16(%rsp)                                #example7.cc:126.21
        movq      %r11, 24(%rsp)                                #example7.cc:119.3
        movq      %rdi, 32(%rsp)                                #example7.cc:119.3
        lea       L_2__STRING.0(%rip), %rdi                     #example7.cc:119.3
        movq      %rsi, 40(%rsp)                                #example7.cc:128.21
        movq      %rdi, %rsi                                    #example7.cc:119.3
        movq      %rdx, 48(%rsp)                                #example7.cc:119.3
        lea       1064(%rsp), %rdx                              #example7.cc:119.3
        call      _dgemm_                                       #example7.cc:119.3

*/
Some free numerical linear algebra library
The Lapack homepage
The Blas homepage
The ATLAS homepage