Lesson of 17 February 2010¶
Lesson file in a zip file
Alglin interface with Lapack/Atlas
File alglin.hh
1/*--------------------------------------------------------------------------*\
2 | |
3 | This program is free software; you can redistribute it and/or modify |
4 | it under the terms of the GNU General Public License as published by |
5 | the Free Software Foundation; either version 2, or (at your option) |
6 | any later version. |
7 | |
8 | This program is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | GNU General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU General Public License |
14 | along with this program; if not, write to the Free Software |
15 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
16 | |
17 | Copyright (C) 2008 |
18 | |
19 | , __ , __ |
20 | /|/ \ /|/ \ |
21 | | __/ _ ,_ | __/ _ ,_ |
22 | | \|/ / | | | | \|/ / | | | |
23 | |(__/|__/ |_/ \_/|/|(__/|__/ |_/ \_/|/ |
24 | /| /| |
25 | \| \| |
26 | |
27 | Enrico Bertolazzi |
28 | Dipartimento di Ingegneria Meccanica e Strutturale |
29 | Universita` degli Studi di Trento |
30 | Via Mesiano 77, I-38050 Trento, Italy |
31 | email: enrico.bertolazzi@unitn.it |
32 | |
33\*--------------------------------------------------------------------------*/
34
35///
36/// file: alglin.hh
37///
38
39#ifndef ALGLIN_HH
40#define ALGLIN_HH
41
42#ifndef BLASNAME
43 #define BLASNAME(A) A##_
44#endif
45
46#ifndef LAPACKNAME
47 #define LAPACKNAME(A) A##_
48#endif
49
50#ifndef ATLASNAME
51 #define ATLASNAME(A) ATL_##A
52#endif
53
54#include <stdexcept>
55#include <cmath>
56
57namespace alglin {
58
59 using namespace std ;
60
61 typedef char character ;
62 typedef int integer ;
63 typedef float single_precision ;
64 typedef double double_precision ;
65
66 enum ATLAS_ORDER { AtlasRowMajor = 101, AtlasColMajor = 102 };
67 enum ATLAS_TRANS { AtlasNoTrans = 111, AtlasTrans = 112,
68 AtlasConjTrans = 113, AtlasConj = 114 };
69 enum ATLAS_UPLO { AtlasUpper = 121, AtlasLower = 122 };
70 enum ATLAS_DIAG { AtlasNonUnit = 131, AtlasUnit = 132 };
71
72 enum ATLAS_SIDE { AtlasLeft=141, AtlasRight=142 } ;
73
74 inline
75 ATLAS_TRANS
76 trans_atlas(character const TRANS ) {
77 switch ( TRANS ) {
78 case 'N': case 'n': return AtlasNoTrans ;
79 case 'T': case 't': return AtlasTrans ;
80 case 'C': case 'c': return AtlasConjTrans ;
81 default: throw runtime_error("bad traspose mode selection\n") ;
82 }
83 }
84
85 inline
86 ATLAS_UPLO
87 uplo_atlas(character const UPLO ) {
88 switch ( UPLO ) {
89 case 'U': case 'u': return AtlasUpper ;
90 case 'L': case 'l': return AtlasLower ;
91 default: throw runtime_error("bad UP/LOW mode selection\n") ;
92 }
93 }
94
95 inline
96 ATLAS_DIAG
97 diag_atlas(character const DIAG ) {
98 switch ( DIAG ) {
99 case 'U': case 'u': return AtlasUnit ;
100 case 'N': case 'n': return AtlasNonUnit ;
101 default: throw runtime_error("bad diagonal mode selection\n") ;
102 }
103 }
104
105 inline
106 ATLAS_SIDE
107 side_atlas(character const SIDE ) {
108 switch ( SIDE ) {
109 case 'L': case 'l': return AtlasLeft ;
110 case 'R': case 'r': return AtlasRight ;
111 default: throw runtime_error("bad side mode selection\n") ;
112 }
113 }
114
115 /*
116 // ___ ___ _ __ _ _
117 // / __/ _ \| '_ \| | | |
118 // | (_| (_) | |_) | |_| |
119 // \___\___/| .__/ \__, |
120 // |_| |___/
121 */
122 extern "C" {
123 void
124 BLASNAME(scopy)( integer const * N,
125 single_precision const X[],
126 integer const * INCX,
127 single_precision Y[],
128 integer const * INCY ) ;
129 void
130 BLASNAME(dcopy)( integer const * N,
131 double_precision const X[],
132 integer const * INCX,
133 double_precision Y[],
134 integer const * INCY ) ;
135 void
136 ATLASNAME(scopy)( const int N,
137 const float *X, const int incX,
138 float *Y, const int incY );
139 void
140 ATLASNAME(dcopy)( const int N,
141 const double *X, const int incX,
142 double *Y, const int incY );
143 }
144
145 inline
146 void
147 copy( integer const N,
148 single_precision const X[],
149 integer const INCX,
150 single_precision Y[],
151 integer const INCY )
152 #ifdef USE_ATLAS
153 { ATLASNAME(scopy)( N, X, INCX, Y, INCY ) ; }
154 #else
155 { BLASNAME(scopy)( &N, X, &INCX, Y, &INCY ) ; }
156 #endif
157
158 inline
159 void
160 copy( integer const N,
161 double_precision const X[],
162 integer const INCX,
163 double_precision Y[],
164 integer const INCY )
165 #ifdef USE_ATLAS
166 { ATLASNAME(dcopy)( N, X, INCX, Y, INCY ) ; }
167 #else
168 { BLASNAME(dcopy)( &N, X, &INCX, Y, &INCY ) ; }
169 #endif
170
171 /*
172 // _____ ____ _ _ __
173 // / __\ \ /\ / / _` | '_ \
174 // \__ \\ V V / (_| | |_) |
175 // |___/ \_/\_/ \__,_| .__/
176 // |_|
177 */
178 extern "C" {
179 void
180 BLASNAME(sswap)( integer const * N,
181 single_precision X[],
182 integer const * INCX,
183 single_precision Y[],
184 integer const * INCY ) ;
185 void
186 BLASNAME(dswap)( integer const * N,
187 double_precision X[],
188 integer const * INCX,
189 double_precision Y[],
190 integer const * INCY ) ;
191 void
192 ATLASNAME(sswap)( const int N,
193 float *X, const int incX,
194 float *Y, const int incY );
195 void
196 ATLASNAME(dswap)( const int N,
197 double *X, const int incX,
198 double *Y, const int incY );
199 void
200 BLASNAME(slaswp)( integer const * NCOL,
201 single_precision A[],
202 integer const * LDA,
203 integer const * K1,
204 integer const * K2,
205 integer const IPIV[],
206 integer const * INC ) ;
207 void
208 BLASNAME(dlaswp)( integer const * NCOL,
209 double_precision A[],
210 integer const * LDA,
211 integer const * K1,
212 integer const * K2,
213 integer const IPIV[],
214 integer const * INC ) ;
215 }
216
217 inline
218 void
219 swap( integer const N,
220 single_precision X[],
221 integer const INCX,
222 single_precision Y[],
223 integer const INCY )
224 #ifdef USE_ATLAS
225 { ATLASNAME(sswap)( N, X, INCX, Y, INCY ) ; }
226 #else
227 { BLASNAME(sswap)( &N, X, &INCX, Y, &INCY ) ; }
228 #endif
229
230 inline
231 void
232 swap( integer const N,
233 double_precision X[],
234 integer const INCX,
235 double_precision Y[],
236 integer const INCY )
237 #ifdef USE_ATLAS
238 { ATLASNAME(dswap)( N, X, INCX, Y, INCY ) ; }
239 #else
240 { BLASNAME(dswap)( &N, X, &INCX, Y, &INCY ) ; }
241 #endif
242
243 inline
244 void
245 swaps( integer const NCOL,
246 single_precision A[],
247 integer const LDA,
248 integer const I1,
249 integer const I2,
250 integer const IPIV[],
251 integer const INC ) {
252 integer K1 = I1+1 ;
253 integer K2 = I2+1 ;
254 BLASNAME(slaswp)( &NCOL, A, &LDA, &K1, &K2, IPIV, &INC ) ;
255 }
256
257 inline
258 void
259 swaps( integer const NCOL,
260 double_precision A[],
261 integer const LDA,
262 integer const I1,
263 integer const I2,
264 integer const IPIV[],
265 integer const INC ) {
266 integer K1 = I1+1 ;
267 integer K2 = I2+1 ;
268 BLASNAME(dlaswp)( &NCOL, A, &LDA, &K1, &K2, IPIV, &INC ) ;
269 }
270
271 /*
272 // _
273 // ___ ___ __ _| |
274 // / __|/ __/ _` | |
275 // \__ \ (_| (_| | |
276 // |___/\___\__,_|_|
277 */
278 extern "C" {
279 void
280 BLASNAME(sscal)( integer const * N,
281 single_precision const * S,
282 single_precision X[],
283 integer const * INCX ) ;
284 void
285 BLASNAME(dscal)( integer const * N,
286 double_precision const * S,
287 double_precision X[],
288 integer const * INCX ) ;
289 void
290 ATLASNAME(sscal)( const int N,
291 const float alpha,
292 float *X, const int incX );
293 void
294 ATLASNAME(dscal)( const int N,
295 const double alpha,
296 double *X, const int incX );
297 }
298
299 inline
300 void
301 scal( integer const N,
302 single_precision const S,
303 single_precision X[],
304 integer const INCX )
305 #ifdef USE_ATLAS
306 { ATLASNAME(sscal)(N, S, X, INCX) ; }
307 #else
308 { BLASNAME(sscal)(&N, &S, X, &INCX) ; }
309 #endif
310
311 inline
312 void
313 scal( integer const N,
314 double_precision const S,
315 double_precision X[],
316 integer const INCX )
317 #ifdef USE_ATLAS
318 { ATLASNAME(dscal)(N, S, X, INCX) ; }
319 #else
320 { BLASNAME(dscal)(&N, &S, X, &INCX) ; }
321 #endif
322
323 /*
324 // __ ___ ___ __ _ _
325 // / _` \ \/ / '_ \| | | |
326 // | (_| |> <| |_) | |_| |
327 // \__,_/_/\_\ .__/ \__, |
328 // |_| |___/
329 */
330 extern "C" {
331 void
332 BLASNAME(saxpy)( integer const * N,
333 single_precision const * A,
334 single_precision const X[],
335 integer const * INCX,
336 single_precision Y[],
337 integer const * INCY ) ;
338 void
339 BLASNAME(daxpy)( integer const * N,
340 double_precision const * A,
341 double_precision const X[],
342 integer const * INCX,
343 double_precision Y[],
344 integer const * INCY ) ;
345 void
346 ATLASNAME(saxpy)( const int N,
347 const float alpha,
348 const float *X, const int incX,
349 float *Y, const int incY );
350 void
351 ATLASNAME(daxpy)( const int N,
352 const double alpha,
353 const double *X, const int incX,
354 double *Y, const int incY );
355 }
356
357 inline
358 void
359 axpy( integer const N,
360 single_precision const A,
361 single_precision const X[],
362 integer const INCX,
363 single_precision Y[],
364 integer const INCY )
365 #ifdef USE_ATLAS
366 { ATLASNAME(saxpy)( N, A, X, INCX, Y, INCY ) ; }
367 #else
368 { BLASNAME(saxpy)( &N, &A, X, &INCX, Y, &INCY ) ; }
369 #endif
370
371 inline
372 void
373 axpy( integer const N,
374 double_precision const A,
375 double_precision const X[],
376 integer const INCX,
377 double_precision Y[],
378 integer const INCY )
379 #ifdef USE_ATLAS
380 { ATLASNAME(daxpy)( N, A, X, INCX, Y, INCY ) ; }
381 #else
382 { BLASNAME(daxpy)( &N, &A, X, &INCX, Y, &INCY ) ; }
383 #endif
384
385 /*
386 // _______ _ __ ___
387 // |_ / _ \ '__/ _ \
388 // / / __/ | | (_) |
389 // /___\___|_| \___/
390 */
391 extern "C" {
392 void
393 ATLASNAME(szero)( const int N, float *X, const int incX );
394 void
395 ATLASNAME(dzero)( const int N, double *X, const int incX );
396 }
397
398 inline
399 void
400 zero( integer const N,
401 single_precision X[],
402 integer const INCX )
403 #ifdef USE_ATLAS
404 { ATLASNAME(szero)( N, X, INCX ) ; }
405 #else
406 { single_precision z = 0 ;
407 integer iz = 0 ;
408 BLASNAME(scopy)( &N, &z, &iz, X, &INCX ) ; }
409 #endif
410
411 inline
412 void
413 zero( integer const N,
414 double_precision X[],
415 integer const INCX )
416 #ifdef USE_ATLAS
417 { ATLASNAME(dzero)( N, X, INCX ) ; }
418 #else
419 { double_precision z = 0 ;
420 integer iz = 0 ;
421 BLASNAME(dcopy)( &N, &z, &iz, X, &INCX ) ; }
422 #endif
423
424 /*
425 // __ _ ___ _ __
426 // / _` |/ _ \ '__|
427 // | (_| | __/ |
428 // \__, |\___|_|
429 // |___/
430 */
431 extern "C" {
432 void
433 BLASNAME(sger)( integer const * M,
434 integer const * N,
435 single_precision const * ALPHA,
436 single_precision const X[],
437 integer const * INCX,
438 single_precision const Y[],
439 integer const * INCY,
440 single_precision A[],
441 integer const * LDA ) ;
442 void
443 BLASNAME(dger)( integer const * M,
444 integer const * N,
445 double_precision const * ALPHA,
446 double_precision const X[],
447 integer const * INCX,
448 double_precision const Y[],
449 integer const * INCY,
450 double_precision A[],
451 integer const * LDA ) ;
452 void
453 ATLASNAME(sger)( const int M, const int N,
454 const float alpha,
455 const float *X, const int incX,
456 const float *Y, const int incY,
457 float *A, const int lda );
458 void
459 ATLASNAME(dger)( const int M, const int N,
460 const double alpha,
461 const double *X, const int incX,
462 const double *Y, const int incY,
463 double *A, const int lda );
464 }
465
466 inline
467 void
468 ger( integer const M,
469 integer const N,
470 single_precision const ALPHA,
471 single_precision const X[],
472 integer const INCX,
473 single_precision const Y[],
474 integer const INCY,
475 single_precision A[],
476 integer const LDA )
477 #ifdef USE_ATLAS
478 { ATLASNAME(sger)( M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) ; }
479 #else
480 { BLASNAME(sger)( &M, &N, &ALPHA, X, &INCX, Y, &INCY, A, &LDA ) ; }
481 #endif
482
483 inline
484 void
485 ger( integer const M,
486 integer const N,
487 double_precision const ALPHA,
488 double_precision const X[],
489 integer const INCX,
490 double_precision const Y[],
491 integer const INCY,
492 double_precision A[],
493 integer const LDA )
494 #ifdef USE_ATLAS
495 { ATLASNAME(dger)( M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) ; }
496 #else
497 { BLASNAME(dger)( &M, &N, &ALPHA, X, &INCX, Y, &INCY, A, &LDA ) ; }
498 #endif
499
500 /*
501 // __ _ ___ _ __ _____ __
502 // / _` |/ _ \ '_ ` _ \ \ / /
503 // | (_| | __/ | | | | \ V /
504 // \__, |\___|_| |_| |_|\_/
505 // |___/
506 */
507 extern "C" {
508 void
509 BLASNAME(sgemv)( character const TRANS[],
510 integer const * M,
511 integer const * N,
512 single_precision const * ALPHA,
513 single_precision const A[],
514 integer const * LDA,
515 single_precision const X[],
516 integer const * INCX,
517 single_precision const * BETA,
518 single_precision Y[],
519 integer const * INCY ) ;
520 void
521 BLASNAME(dgemv)( character const TRANS[],
522 integer const * M,
523 integer const * N,
524 double_precision const * ALPHA,
525 double_precision const A[],
526 integer const * LDA,
527 double_precision const X[],
528 integer const * INCX,
529 double_precision const * BETA,
530 double_precision Y[],
531 integer const * INCY ) ;
532 void
533 ATLASNAME(sgemv)( const enum ATLAS_TRANS TransA,
534 const int M, const int N,
535 const float alpha,
536 const float *A, const int lda,
537 const float *X, const int incX,
538 const float beta,
539 float *Y, const int incY );
540 void
541 ATLASNAME(dgemv)( const enum ATLAS_TRANS TransA,
542 const int M, const int N,
543 const double alpha,
544 const double *A, const int lda,
545 const double *X, const int incX,
546 const double beta,
547 double *Y, const int incY );
548 }
549
550 inline
551 void
552 gemv( character const TRANS[],
553 integer const M,
554 integer const N,
555 single_precision const ALPHA,
556 single_precision const A[],
557 integer const LDA,
558 single_precision const X[],
559 integer const INCX,
560 single_precision const BETA,
561 single_precision Y[],
562 integer const INCY )
563 #ifdef USE_ATLAS
564 { ATLASNAME(sgemv)( trans_atlas(TRANS[0]), M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) ; }
565 #else
566 { BLASNAME(sgemv)( TRANS, &M, &N, &ALPHA, A, &LDA, X, &INCX, &BETA, Y, &INCY ) ; }
567 #endif
568
569 inline
570 void
571 gemv( character const TRANS[],
572 integer const M,
573 integer const N,
574 double_precision const ALPHA,
575 double_precision const A[],
576 integer const LDA,
577 double_precision const X[],
578 integer const INCX,
579 double_precision const BETA,
580 double_precision Y[],
581 integer const INCY )
582 #ifdef USE_ATLAS
583 { ATLASNAME(dgemv)( trans_atlas(TRANS[0]), M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) ; }
584 #else
585 { BLASNAME(dgemv)( TRANS, &M, &N, &ALPHA, A, &LDA, X, &INCX, &BETA, Y, &INCY ) ; }
586 #endif
587
588 /*
589 // __ _ ___ _ __ ___ _ __ ___
590 // / _` |/ _ \ '_ ` _ \| '_ ` _ \
591 // | (_| | __/ | | | | | | | | | |
592 // \__, |\___|_| |_| |_|_| |_| |_|
593 // |___/
594 */
595 extern "C" {
596 void
597 BLASNAME(sgemm)( character const TRANSA[],
598 character const TRANSB[],
599 integer const * M,
600 integer const * N,
601 integer const * K,
602 single_precision const * ALPHA,
603 single_precision const A[],
604 integer const * LDA,
605 single_precision const B[],
606 integer const * LDB,
607 single_precision const * BETA,
608 single_precision C[],
609 integer const * LDC ) ;
610 void
611 BLASNAME(dgemm)( character const TRANSA[],
612 character const TRANSB[],
613 integer const * M,
614 integer const * N,
615 integer const * K,
616 double_precision const * ALPHA,
617 double_precision const A[],
618 integer const * LDA,
619 double_precision const B[],
620 integer const * LDB,
621 double_precision const * BETA,
622 double_precision C[],
623 integer const * LDC ) ;
624 void
625 ATLASNAME(sgemm)( const enum ATLAS_TRANS TransA,
626 const enum ATLAS_TRANS TransB,
627 const int M, const int N, const int K,
628 const float alpha,
629 const float *A, const int lda,
630 const float *B, const int ldb,
631 const float beta,
632 float *C, const int ldc );
633 void
634 ATLASNAME(dgemm)( const enum ATLAS_TRANS TransA,
635 const enum ATLAS_TRANS TransB,
636 const int M, const int N, const int K,
637 const double alpha,
638 const double *A, const int lda,
639 const double *B, const int ldb,
640 const double beta,
641 double *C, const int ldc );
642 }
643
644 inline
645 void
646 gemm( character const TRANSA[],
647 character const TRANSB[],
648 integer const M,
649 integer const N,
650 integer const K,
651 single_precision const ALPHA,
652 single_precision const A[],
653 integer const LDA,
654 single_precision const B[],
655 integer const LDB,
656 single_precision const BETA,
657 single_precision C[],
658 integer const LDC )
659 #ifdef USE_ATLAS
660 { ATLASNAME(sgemm)( trans_atlas(TRANSA[0]), trans_atlas(TRANSB[0]),
661 M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) ; }
662 #else
663 { BLASNAME(sgemm)( TRANSA, TRANSB,
664 &M, &N, &K,
665 &ALPHA, A, &LDA,
666 B, &LDB,
667 &BETA, C, &LDC ) ; }
668 #endif
669
670 inline
671 void
672 gemm( character const TRANSA[],
673 character const TRANSB[],
674 integer const M,
675 integer const N,
676 integer const K,
677 double_precision const ALPHA,
678 double_precision const A[],
679 integer const LDA,
680 double_precision const B[],
681 integer const LDB,
682 double_precision const BETA,
683 double_precision C[],
684 integer const LDC )
685 #ifdef USE_ATLAS
686 { ATLASNAME(dgemm)( trans_atlas(TRANSA[0]), trans_atlas(TRANSB[0]),
687 M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) ; }
688 #else
689 { BLASNAME(dgemm)( TRANSA, TRANSB,
690 &M, &N, &K,
691 &ALPHA, A, &LDA,
692 B, &LDB,
693 &BETA, C, &LDC ) ; }
694 #endif
695
696 /*
697 // ____
698 // _ __ _ __ _ __ ___ |___ \
699 // | '_ \| '__| '_ ` _ \ __) |
700 // | | | | | | | | | | |/ __/
701 // |_| |_|_| |_| |_| |_|_____|
702 */
703 extern "C" {
704 single_precision
705 BLASNAME(snrm2)( integer const * N, single_precision const X[], integer const * INCX ) ;
706
707 double_precision
708 BLASNAME(dnrm2)( integer const * N, double_precision const X[], integer const * INCX ) ;
709
710 float
711 ATLASNAME(snrm2)( const int N, const float *X, const int incX ) ;
712
713 double
714 ATLASNAME(dnrm2)( const int N, const double *X, const int incX ) ;
715 }
716
717 inline
718 single_precision
719 nrm2( integer const N,
720 single_precision const X[],
721 integer const INCX )
722 #ifdef USE_ATLAS
723 { return ATLASNAME(snrm2)( N, X, INCX ) ; }
724 #else
725 { return BLASNAME(snrm2)( &N, X, &INCX ) ; }
726 #endif
727
728 inline
729 double_precision
730 nrm2( integer const N,
731 double_precision const X[],
732 integer const INCX )
733 #ifdef USE_ATLAS
734 { return ATLASNAME(dnrm2)( N, X, INCX ) ; }
735 #else
736 { return BLASNAME(dnrm2)( &N, X, &INCX ) ; }
737 #endif
738
739 /*
740 // __ _ ___ _ _ _ __ ___
741 // / _` / __| | | | '_ ` _ \
742 // | (_| \__ \ |_| | | | | | |
743 // \__,_|___/\__,_|_| |_| |_|
744 */
745 extern "C" {
746
747 single_precision
748 BLASNAME(sasum)( integer const * N, single_precision const X[], integer const * INCX ) ;
749
750 double_precision
751 BLASNAME(dasum)( integer const * N, double_precision const X[], integer const * INCX ) ;
752
753 float
754 ATLASNAME(sasum)( const int N, const float *X, const int incX );
755
756 double
757 ATLASNAME(dasum)( const int N, const double *X, const int incX );
758 }
759
760 inline
761 single_precision
762 asum(integer const N,
763 single_precision const X[],
764 integer const INCX)
765 #ifdef USE_ATLAS
766 { return ATLASNAME(sasum)( N, X, INCX ) ; }
767 #else
768 { return BLASNAME(sasum)( &N, X, &INCX ) ; }
769 #endif
770
771 inline
772 double_precision
773 asum(integer const N,
774 double_precision const X[],
775 integer const INCX)
776 #ifdef USE_ATLAS
777 { return ATLASNAME(dasum)( N, X, INCX ) ; }
778 #else
779 { return BLASNAME(dasum)( &N, X, &INCX ) ; }
780 #endif
781
782 /*
783 // __ _ _ __ ___ __ ___ __
784 // / _` | '_ ` _ \ / _` \ \/ /
785 // | (_| | | | | | | (_| |> <
786 // \__,_|_| |_| |_|\__,_/_/\_\
787 */
788 extern "C" {
789
790 integer
791 BLASNAME(isamax)( integer const * N, single_precision const X[], integer const * INCX ) ;
792
793 integer
794 BLASNAME(idamax)( integer const * N, double_precision const X[], integer const * INCX ) ;
795
796 int
797 ATLASNAME(isamax)( const int N, const float *X, const int incX );
798
799 int
800 ATLASNAME(idamax)( const int N, const double *X, const int incX );
801 }
802
803 inline
804 integer
805 iamax( integer const N,
806 single_precision const X[],
807 integer const INCX )
808 #ifdef USE_ATLAS
809 { return ATLASNAME(isamax)( N, X, INCX )-1 ; }
810 #else
811 { return BLASNAME(isamax)( &N, X, &INCX )-1 ; }
812 #endif
813
814 inline
815 integer
816 iamax( integer const N,
817 double_precision const X[],
818 integer const INCX )
819 #ifdef USE_ATLAS
820 { return ATLASNAME(idamax)( N, X, INCX )-1 ; }
821 #else
822 { return BLASNAME(idamax)( &N, X, &INCX )-1 ; }
823 #endif
824
825 inline
826 single_precision
827 amax( integer const N,
828 single_precision const X[],
829 integer const INCX )
830 { return abs(X[iamax(N,X,INCX)]) ; }
831
832 inline
833 double_precision
834 amax( integer const N,
835 double_precision const X[],
836 integer const INCX )
837 { return abs(X[iamax(N,X,INCX)]) ; }
838
839 /*
840 // _ _
841 // __| | ___ | |_
842 // / _` |/ _ \| __|
843 // | (_| | (_) | |_
844 // \__,_|\___/ \__|
845 */
846 extern "C" {
847 single_precision
848 BLASNAME(sdot)( integer const * N,
849 single_precision const SX[],
850 integer const * INCX,
851 single_precision const SY[],
852 integer const * INCY ) ;
853
854 double_precision
855 BLASNAME(ddot)( integer const * N,
856 double_precision const SX[],
857 integer const * INCX,
858 double_precision const SY[],
859 integer const * INCY ) ;
860 float
861 ATLASNAME(sdot)( const int N,
862 const float *X, const int incX,
863 const float *Y, const int incY );
864 double
865 ATLASNAME(ddot)( const int N,
866 const double *X, const int incX,
867 const double *Y, const int incY );
868 }
869
870 inline
871 single_precision
872 dot( integer const N,
873 single_precision const SX[],
874 integer const INCX,
875 single_precision const SY[],
876 integer const INCY )
877 #ifdef USE_ATLAS
878 { return ATLASNAME(sdot)( N, SX, INCX, SY, INCY ) ; }
879 #else
880 { return BLASNAME(sdot)( &N, SX, &INCX, SY, &INCY ) ; }
881 #endif
882
883 inline
884 double_precision
885 dot( integer const N,
886 double_precision const SX[],
887 integer const INCX,
888 double_precision const SY[],
889 integer const INCY )
890 #ifdef USE_ATLAS
891 { return ATLASNAME(ddot)( N, SX, INCX, SY, INCY ) ; }
892 #else
893 { return BLASNAME(ddot)( &N, SX, &INCX, SY, &INCY ) ; }
894 #endif
895
896 /*
897 // __ _ ___ ___ ___ _ __ _ _
898 // / _` |/ _ \/ __/ _ \| '_ \| | | |
899 // | (_| | __/ (_| (_) | |_) | |_| |
900 // \__, |\___|\___\___/| .__/ \__, |
901 // |___/ |_| |___/
902 */
903 extern "C" {
904 void
905 LAPACKNAME(slacpy)( character const UPLO[],
906 integer const * M,
907 integer const * N,
908 single_precision const A[],
909 integer const * LDA,
910 single_precision B[],
911 integer const * LDB ) ;
912 void
913 LAPACKNAME(dlacpy)( character const UPLO[],
914 integer const * M,
915 integer const * N,
916 double_precision const A[],
917 integer const * LDA,
918 double_precision B[],
919 integer const * LDB ) ;
920 void
921 ATLASNAME(sgecopy)( const int M, const int N,
922 const float *A, const int lda,
923 float *C, const int ldc ) ;
924 void
925 ATLASNAME(dgecopy)( const int M, const int N,
926 const double *A, const int lda,
927 double *C, const int ldc ) ;
928 }
929
930 inline
931 void
932 gecopy( integer const M,
933 integer const N,
934 single_precision const A[],
935 integer const LDA,
936 single_precision B[],
937 integer const LDB )
938 #ifdef USE_ATLAS
939 { ATLASNAME(sgecopy)( M, N, A, LDA, B, LDB ) ; }
940 #else
941 { LAPACKNAME(slacpy)( "A", &M, &N, A, &LDA, B, &LDB ) ; }
942 #endif
943
944 inline
945 void
946 gecopy( integer const M,
947 integer const N,
948 double_precision const A[],
949 integer const LDA,
950 double_precision B[],
951 integer const LDB )
952 #ifdef USE_ATLAS
953 { ATLASNAME(dgecopy)( M, N, A, LDA, B, LDB ) ; }
954 #else
955 { LAPACKNAME(dlacpy)( "A", &M, &N, A, &LDA, B, &LDB ) ; }
956 #endif
957
958 /*
959 // __ _ ___ _______ _ __ ___
960 // / _` |/ _ \_ / _ \ '__/ _ \
961 // | (_| | __// / __/ | | (_) |
962 // \__, |\___/___\___|_| \___/
963 // |___/
964 */
965 extern "C" {
966 void
967 LAPACKNAME(slaset)( character const UPLO[],
968 integer const * M,
969 integer const * N,
970 single_precision const * ALPHA,
971 single_precision const * BETA,
972 single_precision A[],
973 integer const * LDA ) ;
974 void
975 LAPACKNAME(dlaset)( character const UPLO[],
976 integer const * M,
977 integer const * N,
978 double_precision const * ALPHA,
979 double_precision const * BETA,
980 double_precision A[],
981 integer const * LDA ) ;
982 void
983 ATLASNAME(sgezero)( const int M, const int N, float *C, const int ldc ) ;
984 void
985 ATLASNAME(dgezero)( const int M, const int N, double *C, const int ldc ) ;
986
987 }
988
989 inline
990 void
991 gezero( integer const M,
992 integer const N,
993 single_precision A[],
994 integer const LDA )
995 #ifdef USE_ATLAS
996 { ATLASNAME(sgezero)( M, N, A, LDA ) ; }
997 #else
998 { single_precision zero = 0 ;
999 LAPACKNAME(slaset)( "A", &M, &N, &zero, &zero, A, &LDA ) ; }
1000 #endif
1001
1002 inline
1003 void
1004 gezero( integer const M,
1005 integer const N,
1006 double_precision A[],
1007 integer const LDA )
1008 #ifdef USE_ATLAS
1009 { ATLASNAME(dgezero)( M, N, A, LDA ) ; }
1010 #else
1011 { double_precision zero = 0 ;
1012 LAPACKNAME(dlaset)( "A", &M, &N, &zero, &zero, A, &LDA ) ; }
1013 #endif
1014
1015 /*
1016 // _ _
1017 // __ _ ___ __ _ __| | __| |
1018 // / _` |/ _ \/ _` |/ _` |/ _` |
1019 // | (_| | __/ (_| | (_| | (_| |
1020 // \__, |\___|\__,_|\__,_|\__,_|
1021 // |___/
1022 */
1023 extern "C" {
1024 void
1025 ATLASNAME(sgeadd)( const int M, const int N,
1026 const float alpha, const float *A, const int lda,
1027 const float beta, float *C, const int ldc ) ;
1028 void
1029 ATLASNAME(dgeadd)( const int M, const int N,
1030 const double alpha, const double *A, const int lda,
1031 const double beta, double *C, const int ldc ) ;
1032 }
1033 /*
1034 * B <- alpha*A + beta*B
1035 */
1036 inline
1037 void
1038 geadd( integer const M,
1039 integer const N,
1040 single_precision const alpha,
1041 single_precision const A[],
1042 integer const LDA,
1043 single_precision const beta,
1044 single_precision B[],
1045 integer const LDB )
1046 #ifdef USE_ATLAS
1047 { ATLASNAME(sgeadd)( M, N, alpha, A, LDA, beta, B, LDB ) ; }
1048 #else
1049 { for ( integer j = 0 ; j < N ; ++j ) {
1050 single_precision const * Aj = A + j * LDA ;
1051 single_precision * Bj = B + j * LDB ;
1052 alglin::scal( M, beta, Bj, 1 ) ;
1053 alglin::axpy( M, alpha, Aj, 1, Bj, 1 ) ;
1054 }
1055 }
1056 #endif
1057
1058 inline
1059 void
1060 geadd( integer const M,
1061 integer const N,
1062 double_precision const alpha,
1063 double_precision const A[],
1064 integer const LDA,
1065 double_precision const beta,
1066 double_precision B[],
1067 integer const LDB )
1068 #ifdef USE_ATLAS
1069 { ATLASNAME(dgeadd)( M, N, alpha, A, LDA, beta, B, LDB ) ; }
1070 #else
1071 { for ( integer j = 0 ; j < N ; ++j ) {
1072 double_precision const * Aj = A + j * LDA ;
1073 double_precision * Bj = B + j * LDB ;
1074 alglin::scal( M, beta, Bj, 1 ) ;
1075 alglin::axpy( M, alpha, Aj, 1, Bj, 1 ) ;
1076 }
1077 }
1078 #endif
1079
1080 /*
1081 // _ __
1082 // __ _ ___| |_ _ __ / _|
1083 // / _` |/ _ \ __| '__| |_
1084 // | (_| | __/ |_| | | _|
1085 // \__, |\___|\__|_| |_|
1086 // |___/
1087 */
1088 extern "C" {
1089 void
1090 LAPACKNAME(sgetrf)( integer const * N,
1091 integer const * M,
1092 single_precision A[],
1093 integer const * LDA,
1094 integer IPIV[],
1095 integer * INFO ) ;
1096 void
1097 LAPACKNAME(dgetrf)( integer const * N,
1098 integer const * M,
1099 double_precision A[],
1100 integer const * LDA,
1101 integer IPIV[],
1102 integer * INFO ) ;
1103 int
1104 ATLASNAME(sgetrf)( const enum ATLAS_ORDER Order,
1105 const int M, const int N,
1106 float *A, const int lda,
1107 int *ipiv ) ;
1108 int
1109 ATLASNAME(dgetrf)( const enum ATLAS_ORDER Order,
1110 const int M, const int N,
1111 double *A, const int lda,
1112 int *ipiv ) ;
1113 }
1114
1115 inline
1116 integer
1117 getrf( integer const N,
1118 integer const M,
1119 single_precision A[],
1120 integer const LDA,
1121 integer IPIV[] )
1122 #ifdef USE_ATLAS
1123 { return ATLASNAME(sgetrf)( AtlasColMajor, N, M, A, LDA, IPIV ) ; }
1124 #else
1125 { integer INFO ;
1126 LAPACKNAME(sgetrf)( &N, &M, A, &LDA, IPIV, &INFO ) ;
1127 return INFO ; }
1128 #endif
1129
1130 inline
1131 integer
1132 getrf( integer const N,
1133 integer const M,
1134 double_precision A[],
1135 integer const LDA,
1136 integer IPIV[] )
1137 #ifdef USE_ATLAS
1138 { return ATLASNAME(dgetrf)( AtlasColMajor, N, M, A, LDA, IPIV ) ; }
1139 #else
1140 { integer INFO ;
1141 LAPACKNAME(dgetrf)( &N, &M, A, &LDA, IPIV, &INFO ) ;
1142 return INFO ; }
1143 #endif
1144
1145 /*
1146 // _
1147 // __ _ ___| |_ _ __ ___
1148 // / _` |/ _ \ __| '__/ __|
1149 // | (_| | __/ |_| | \__ \
1150 // \__, |\___|\__|_| |___/
1151 // |___/
1152 */
1153 extern "C" {
1154 void
1155 LAPACKNAME(sgetrs)( character const TRANS[],
1156 integer const * N,
1157 integer const * NRHS,
1158 single_precision const A[],
1159 integer const * LDA,
1160 integer const IPIV[],
1161 single_precision B[],
1162 integer const * LDB,
1163 integer * INFO ) ;
1164 void
1165 LAPACKNAME(dgetrs)( character const TRANS[],
1166 integer const * N,
1167 integer const * NRHS,
1168 double_precision const A[],
1169 integer const * LDA,
1170 integer const IPIV[],
1171 double_precision B[],
1172 integer const * LDB,
1173 integer * INFO ) ;
1174 void
1175 ATLASNAME(sgetrs)( const enum ATLAS_ORDER Order,
1176 const enum ATLAS_TRANS Trans,
1177 const int N,
1178 const int NRHS,
1179 const float *A, const int lda,
1180 const int *ipiv,
1181 float *B, const int ldb );
1182 void
1183 ATLASNAME(dgetrs)( const enum ATLAS_ORDER Order,
1184 const enum ATLAS_TRANS Trans,
1185 const int N,
1186 const int NRHS,
1187 const double *A, const int lda,
1188 const int *ipiv,
1189 double *B, const int ldb );
1190 }
1191
1192 inline
1193 integer
1194 getrs( character const TRANS[],
1195 integer const N,
1196 integer const NRHS,
1197 single_precision const A[],
1198 integer const LDA,
1199 integer const IPIV[],
1200 single_precision B[],
1201 integer const LDB )
1202 #ifdef USE_ATLAS
1203 { ATLASNAME(sgetrs)( AtlasColMajor, trans_atlas(TRANS[0]), N, NRHS, A, LDA, IPIV, B, LDB ) ;
1204 return 0 ; }
1205 #else
1206 { integer INFO ;
1207 LAPACKNAME(sgetrs)( TRANS, &N, &NRHS, A, &LDA, IPIV, B, &LDB, &INFO ) ;
1208 return INFO ; }
1209 #endif
1210
1211 inline
1212 integer
1213 getrs( character const TRANS[],
1214 integer const N,
1215 integer const NRHS,
1216 double_precision const A[],
1217 integer const LDA,
1218 integer const IPIV[],
1219 double_precision B[],
1220 integer const LDB )
1221 #ifdef USE_ATLAS
1222 { ATLASNAME(dgetrs)( AtlasColMajor, trans_atlas(TRANS[0]), N, NRHS, A, LDA, IPIV, B, LDB ) ;
1223 return 0 ; }
1224 #else
1225 { integer INFO ;
1226 LAPACKNAME(dgetrs)( TRANS, &N, &NRHS, A, &LDA, IPIV, B, &LDB, &INFO ) ;
1227 return INFO ; }
1228 #endif
1229
1230 /*
1231 // __ _ ___ _____ __
1232 // / _` |/ _ \/ __\ \ / /
1233 // | (_| | __/\__ \\ V /
1234 // \__, |\___||___/ \_/
1235 // |___/
1236 */
1237 extern "C" {
1238 void
1239 LAPACKNAME(sgesv)( integer const * N,
1240 integer const * NRHS,
1241 single_precision A[],
1242 integer const * LDA,
1243 integer IPIV[],
1244 single_precision B[],
1245 integer const * LDB,
1246 integer * INFO ) ;
1247 void
1248 LAPACKNAME(dgesv)( integer const * N,
1249 integer const * NRHS,
1250 double_precision A[],
1251 integer const * LDA,
1252 integer IPIV[],
1253 double_precision B[],
1254 integer const * LDB,
1255 integer * INFO ) ;
1256 }
1257
1258 inline
1259 integer
1260 gesv( integer const N,
1261 integer const NRHS,
1262 single_precision A[],
1263 integer const LDA,
1264 integer IPIV[],
1265 single_precision B[],
1266 integer const LDB )
1267 #ifdef USE_ATLAS
1268 { integer INFO = ATLASNAME(sgetrf)(AtlasColMajor, N, N, A, LDA, IPIV);
1269 if ( INFO == 0 ) ATLASNAME(sgetrs)(AtlasColMajor, AtlasNoTrans,
1270 N, NRHS, A, LDA, IPIV, B, LDB) ;
1271 return INFO ; }
1272 #else
1273 { integer INFO ;
1274 LAPACKNAME(sgesv)( &N, &NRHS, A, &LDA, IPIV, B, &LDB, &INFO ) ;
1275 return INFO ; }
1276 #endif
1277
1278 inline
1279 integer
1280 gesv( integer const N,
1281 integer const NRHS,
1282 double_precision A[],
1283 integer const LDA,
1284 integer IPIV[],
1285 double_precision B[],
1286 integer const LDB )
1287 #ifdef USE_ATLAS
1288 { integer INFO = ATLASNAME(dgetrf)(AtlasColMajor, N, N, A, LDA, IPIV);
1289 if ( INFO == 0 ) ATLASNAME(dgetrs)(AtlasColMajor, AtlasNoTrans,
1290 N, NRHS, A, LDA, IPIV, B, LDB) ;
1291 return INFO ; }
1292 #else
1293 { integer INFO ;
1294 LAPACKNAME(dgesv)( &N, &NRHS, A, &LDA, IPIV, B, &LDB, &INFO ) ;
1295 return INFO ; }
1296 #endif
1297
1298 /*
1299 // _ _ __
1300 // __ _| |_| |_ _ __ / _|
1301 // / _` | __| __| '__| |_
1302 // | (_| | |_| |_| | | _|
1303 // \__, |\__|\__|_| |_|
1304 // |___/
1305 */
1306 extern "C" {
1307 void
1308 LAPACKNAME(sgttrf)( integer const * N,
1309 single_precision DL[],
1310 single_precision D[],
1311 single_precision DU[],
1312 single_precision DU2[],
1313 integer IPIV[],
1314 integer * INFO ) ;
1315 void
1316 LAPACKNAME(dgttrf)( integer const * N,
1317 double_precision DL[],
1318 double_precision D[],
1319 double_precision DU[],
1320 double_precision DU2[],
1321 integer IPIV[],
1322 integer * INFO ) ;
1323 }
1324
1325 inline
1326 integer
1327 gttrf( integer const N,
1328 single_precision DL[],
1329 single_precision D[],
1330 single_precision DU[],
1331 single_precision DU2[],
1332 integer IPIV[] )
1333 { integer INFO ;
1334 LAPACKNAME(sgttrf)( &N, DL, D, DU, DU2, IPIV, &INFO) ;
1335 return INFO ; }
1336
1337 inline
1338 integer
1339 gttrf( integer const N,
1340 double_precision DL[],
1341 double_precision D[],
1342 double_precision DU[],
1343 double_precision DU2[],
1344 integer IPIV[] )
1345 { integer INFO ;
1346 LAPACKNAME(dgttrf)( &N, DL, D, DU, DU2, IPIV, &INFO) ;
1347 return INFO ; }
1348
1349 /*
1350 // _ _
1351 // __ _| |_| |_ _ __ ___
1352 // / _` | __| __| '__/ __|
1353 // | (_| | |_| |_| | \__ \
1354 // \__, |\__|\__|_| |___/
1355 // |___/
1356 */
1357 extern "C" {
1358 void
1359 LAPACKNAME(sgttrs)( character TRANS[],
1360 integer const * N,
1361 integer const * NRHS,
1362 single_precision const DL[],
1363 single_precision const D[],
1364 single_precision const DU[],
1365 single_precision const DU2[],
1366 integer const IPIV[],
1367 single_precision B[],
1368 integer const * LDB,
1369 integer * INFO ) ;
1370
1371 void
1372 LAPACKNAME(dgttrs)( character TRANS[],
1373 integer const * N,
1374 integer const * NRHS,
1375 double_precision const DL[],
1376 double_precision const D[],
1377 double_precision const DU[],
1378 double_precision const DU2[],
1379 integer const IPIV[],
1380 double_precision B[],
1381 integer const * LDB,
1382 integer * INFO ) ;
1383
1384 }
1385
1386 inline
1387 integer
1388 gttrs( character TRANS[],
1389 integer const N,
1390 integer const NRHS,
1391 single_precision const DL[],
1392 single_precision const D[],
1393 single_precision const DU[],
1394 single_precision const DU2[],
1395 integer const IPIV[],
1396 single_precision B[],
1397 integer const LDB )
1398 { integer INFO ;
1399 LAPACKNAME(sgttrs)( TRANS, &N, &NRHS, DL, D, DU, DU2, IPIV, B, &LDB, &INFO) ;
1400 return INFO ; }
1401
1402 inline
1403 integer
1404 gttrs( character TRANS[],
1405 integer const N,
1406 integer const NRHS,
1407 double_precision const DL[],
1408 double_precision const D[],
1409 double_precision const DU[],
1410 double_precision const DU2[],
1411 integer const IPIV[],
1412 double_precision B[],
1413 integer const LDB )
1414 { integer INFO ;
1415 LAPACKNAME(dgttrs)( TRANS, &N, &NRHS, DL, D, DU, DU2, IPIV, B, &LDB, &INFO) ;
1416 return INFO ; }
1417
1418 /*
1419 // _
1420 // __ _| |_ _____ __
1421 // / _` | __/ __\ \ / /
1422 // | (_| | |_\__ \\ V /
1423 // \__, |\__|___/ \_/
1424 // |___/
1425 */
1426 extern "C" {
1427 void
1428 LAPACKNAME(sgtsv)( integer const * N,
1429 integer const * NRHS,
1430 single_precision DL[],
1431 single_precision D[],
1432 single_precision DU[],
1433 single_precision B[],
1434 integer const * LDB,
1435 integer * INFO ) ;
1436
1437 void
1438 LAPACKNAME(dgtsv)( integer const * N,
1439 integer const * NRHS,
1440 double_precision DL[],
1441 double_precision D[],
1442 double_precision DU[],
1443 double_precision B[],
1444 integer const * LDB,
1445 integer * INFO ) ;
1446 }
1447
1448 inline
1449 integer
1450 gtsv( integer const N,
1451 integer const NRHS,
1452 single_precision DL[],
1453 single_precision D[],
1454 single_precision DU[],
1455 single_precision B[],
1456 integer const LDB )
1457 { integer INFO ;
1458 LAPACKNAME(sgtsv)( &N, &NRHS, DL, D, DU, B, &LDB, &INFO ) ;
1459 return INFO ; }
1460
1461 inline
1462 integer
1463 gtsv( integer const N,
1464 integer const NRHS,
1465 double_precision DL[],
1466 double_precision D[],
1467 double_precision DU[],
1468 double_precision B[],
1469 integer const LDB )
1470 { integer INFO ;
1471 LAPACKNAME(dgtsv)( &N, &NRHS, DL, D, DU, B, &LDB, &INFO ) ;
1472 return INFO ; }
1473
1474 /*
1475 // _
1476 // _ __ ___ _ __ _ __ ___ / |
1477 // | '_ \ / _ \| '__| '_ ` _ \| |
1478 // | | | | (_) | | | | | | | | |
1479 // |_| |_|\___/|_| |_| |_| |_|_|
1480 */
1481 extern "C" {
1482
1483 // 'M' or 'm' maxabs
1484 // '1', 'O' or 'o' norm1
1485 // 'I' or 'i' normI
1486 // 'F', 'f', 'E' or 'e' normF
1487
1488 double_precision
1489 LAPACKNAME(dlange)( char const NORM[],
1490 integer const * M,
1491 integer const * N,
1492 double_precision const * A,
1493 integer const * LDA,
1494 double_precision * WORK ) ;
1495
1496 single_precision
1497 LAPACKNAME(slange)( char const NORM[],
1498 integer const * M,
1499 integer const * N,
1500 single_precision const * A,
1501 integer const * LDA,
1502 single_precision * WORK ) ;
1503
1504 }
1505
1506 inline
1507 single_precision
1508 normI( integer const N,
1509 integer const M,
1510 single_precision const A[],
1511 integer const LDA,
1512 single_precision WORK[] )
1513 { return LAPACKNAME(slange)( "I", &N, &M, A, &LDA, WORK ) ; }
1514
1515 inline
1516 double_precision
1517 normI( integer const N,
1518 integer const M,
1519 double_precision const A[],
1520 integer const LDA,
1521 double_precision WORK[] )
1522 { return LAPACKNAME(dlange)( "I", &N, &M, A, &LDA, WORK ) ; }
1523
1524 ///////////////////
1525
1526 inline
1527 single_precision
1528 norm1( integer const N,
1529 integer const M,
1530 single_precision const A[],
1531 integer const LDA )
1532 { return LAPACKNAME(slange)( "1", &N, &M, A, &LDA, NULL ) ; }
1533
1534 inline
1535 double_precision
1536 norm1( integer const N,
1537 integer const M,
1538 double_precision const A[],
1539 integer const LDA )
1540 { return LAPACKNAME(dlange)( "1", &N, &M, A, &LDA, NULL ) ; }
1541
1542 ///////////////////
1543
1544 inline
1545 single_precision
1546 normF( integer const N,
1547 integer const M,
1548 single_precision const A[],
1549 integer const LDA )
1550 { return LAPACKNAME(slange)( "F", &N, &M, A, &LDA, NULL ) ; }
1551
1552 inline
1553 double_precision
1554 normF( integer const N,
1555 integer const M,
1556 double_precision const A[],
1557 integer const LDA )
1558 { return LAPACKNAME(dlange)( "F", &N, &M, A, &LDA, NULL ) ; }
1559
1560 ///////////////////
1561
1562 inline
1563 single_precision
1564 maxabs( integer const N,
1565 integer const M,
1566 single_precision const A[],
1567 integer const LDA )
1568 { return LAPACKNAME(slange)( "M", &N, &M, A, &LDA, NULL ) ; }
1569
1570 inline
1571 double_precision
1572 maxabs( integer const N,
1573 integer const M,
1574 double_precision const A[],
1575 integer const LDA )
1576 { return LAPACKNAME(dlange)( "M", &N, &M, A, &LDA, NULL ) ; }
1577
1578 /*
1579 // _
1580 // | |_ _ __ _ __ _____ __
1581 // | __| '__| '_ ` _ \ \ / /
1582 // | |_| | | | | | | \ V /
1583 // \__|_| |_| |_| |_|\_/
1584 */
1585 extern "C" {
1586 void
1587 BLASNAME(strmv)( char const UPLO[],
1588 char const TRANS[],
1589 char const DIAG[],
1590 integer const * N,
1591 single_precision const A[],
1592 integer const * LDA,
1593 single_precision X[],
1594 integer const * INCX ) ;
1595 void
1596 BLASNAME(dtrmv)( char const UPLO[],
1597 char const TRANS[],
1598 char const DIAG[],
1599 integer const * N,
1600 double_precision const A[],
1601 integer const * LDA,
1602 double_precision X[],
1603 integer const * INCX ) ;
1604 void
1605 ATLASNAME(strmv)( const enum ATLAS_UPLO Uplo,
1606 const enum ATLAS_TRANS TransA,
1607 const enum ATLAS_DIAG Diag,
1608 const int N,
1609 const float *A, const int lda,
1610 float *X, const int incX ) ;
1611 void
1612 ATLASNAME(dtrmv)( const enum ATLAS_UPLO Uplo,
1613 const enum ATLAS_TRANS TransA,
1614 const enum ATLAS_DIAG Diag,
1615 const int N,
1616 const double *A, const int lda,
1617 double *X, const int incX ) ;
1618 }
1619
1620 inline
1621 void
1622 trmv( char const UPLO[],
1623 char const TRANS[],
1624 char const DIAG[],
1625 integer const N,
1626 single_precision const A[],
1627 integer const LDA,
1628 single_precision X[],
1629 integer const INCX )
1630 #ifdef USE_ATLAS
1631 { ATLASNAME(strmv)( uplo_atlas(UPLO[0]),
1632 trans_atlas(TRANS[0]),
1633 diag_atlas(DIAG[0]),
1634 N, A, LDA, X, INCX ) ; }
1635 #else
1636 { BLASNAME(strmv)( UPLO, TRANS, DIAG, &N, A, &LDA, X, &INCX ) ; }
1637 #endif
1638
1639 inline
1640 void
1641 trmv( char const UPLO[],
1642 char const TRANS[],
1643 char const DIAG[],
1644 integer const N,
1645 double_precision const A[],
1646 integer const LDA,
1647 double_precision X[],
1648 integer const INCX )
1649 #ifdef USE_ATLAS
1650 { ATLASNAME(dtrmv)( uplo_atlas(UPLO[0]),
1651 trans_atlas(TRANS[0]),
1652 diag_atlas(DIAG[0]),
1653 N, A, LDA, X, INCX ) ; }
1654 #else
1655 { BLASNAME(dtrmv)( UPLO, TRANS, DIAG, &N, A, &LDA, X, &INCX ) ; }
1656 #endif
1657
1658 /*
1659 // _
1660 // | |_ _ __ _____ __
1661 // | __| '__/ __\ \ / /
1662 // | |_| | \__ \\ V /
1663 // \__|_| |___/ \_/
1664 */
1665 extern "C" {
1666 void
1667 BLASNAME(strsv)( character const UPLO[],
1668 character const TRANS[],
1669 character const DIAG[],
1670 integer const * N,
1671 single_precision const A[],
1672 integer const * LDA,
1673 single_precision X[],
1674 integer const * INCX ) ;
1675 void
1676 BLASNAME(dtrsv)( character const UPLO[],
1677 character const TRANS[],
1678 character const DIAG[],
1679 integer const * N,
1680 double_precision const A[],
1681 integer const * LDA,
1682 double_precision X[],
1683 integer const * INCX ) ;
1684 void
1685 ATLASNAME(strsv)( const enum ATLAS_UPLO Uplo,
1686 const enum ATLAS_TRANS TransA,
1687 const enum ATLAS_DIAG Diag,
1688 const int N,
1689 const float *A, const int lda,
1690 float *X, const int incX ) ;
1691 void
1692 ATLASNAME(dtrsv)( const enum ATLAS_UPLO Uplo,
1693 const enum ATLAS_TRANS TransA,
1694 const enum ATLAS_DIAG Diag,
1695 const int N,
1696 const double *A, const int lda,
1697 double *X, const int incX );
1698 }
1699
1700 inline
1701 void
1702 trsv( character const UPLO[],
1703 character const TRANS[],
1704 character const DIAG[],
1705 integer const N,
1706 single_precision const A[],
1707 integer const LDA,
1708 single_precision X[],
1709 integer const INCX )
1710 #ifdef USE_ATLAS
1711 { ATLASNAME(strsv)( uplo_atlas(UPLO[0]),
1712 trans_atlas(TRANS[0]),
1713 diag_atlas(DIAG[0]),
1714 N, A, LDA, X, INCX ) ; }
1715 #else
1716 { BLASNAME(strsv)( UPLO, TRANS, DIAG, &N, A, &LDA, X, &INCX ) ; }
1717 #endif
1718
1719 inline
1720 void
1721 trsv( character const UPLO[],
1722 character const TRANS[],
1723 character const DIAG[],
1724 integer const N,
1725 double_precision const A[],
1726 integer const LDA,
1727 double_precision X[],
1728 integer const INCX )
1729 #ifdef USE_ATLAS
1730 { ATLASNAME(dtrsv)( uplo_atlas(UPLO[0]),
1731 trans_atlas(TRANS[0]),
1732 diag_atlas(DIAG[0]),
1733 N, A, LDA, X, INCX ) ; }
1734 #else
1735 { BLASNAME(dtrsv)( UPLO, TRANS, DIAG, &N, A, &LDA, X, &INCX ) ; }
1736 #endif
1737
1738 /*
1739 // _
1740 // | |_ _ __ _ __ ___ _ __ ___
1741 // | __| '__| '_ ` _ \| '_ ` _ \
1742 // | |_| | | | | | | | | | | | |
1743 // \__|_| |_| |_| |_|_| |_| |_|
1744 */
1745 extern "C" {
1746 void
1747 BLASNAME(strmm) ( char const SIDE[], // "L" or "R"
1748 char const UPLO[], // "U" or "L"
1749 char const TRANSA[], // "N", "T", "C"
1750 char const DIAG[], // "N", "U"
1751 integer const * M,
1752 integer const * N,
1753 single_precision const * ALPHA,
1754 single_precision const A[],
1755 integer const * LDA,
1756 single_precision B[],
1757 integer const * LDB ) ;
1758 void
1759 BLASNAME(dtrmm) ( char const SIDE[], // "L" or "R"
1760 char const UPLO[], // "U" or "L"
1761 char const TRANSA[], // "N", "T", "C"
1762 char const DIAG[], // "N", "U"
1763 integer const * M,
1764 integer const * N,
1765 double_precision const * ALPHA,
1766 double_precision const A[],
1767 integer const * LDA,
1768 double_precision B[],
1769 integer const * LDB ) ;
1770
1771 void
1772 ATLASNAME(strmm)( const enum ATLAS_SIDE Side,
1773 const enum ATLAS_UPLO Uplo,
1774 const enum ATLAS_TRANS TransA,
1775 const enum ATLAS_DIAG Diag,
1776 const int M,
1777 const int N,
1778 const float alpha,
1779 const float *A, const int lda,
1780 float *B, const int ldb ) ;
1781 void
1782 ATLASNAME(dtrmm)( const enum ATLAS_SIDE Side,
1783 const enum ATLAS_UPLO Uplo,
1784 const enum ATLAS_TRANS TransA,
1785 const enum ATLAS_DIAG Diag,
1786 const int M,
1787 const int N,
1788 const double alpha,
1789 const double *A, const int lda,
1790 double *B, const int ldb ) ;
1791 }
1792
1793 inline
1794 void
1795 trmm( character const SIDE[],
1796 character const UPLO[],
1797 character const TRANSA[],
1798 character const DIAG[],
1799 integer const M,
1800 integer const N,
1801 double_precision const alpha,
1802 double_precision const A[],
1803 integer const LDA,
1804 double_precision B[],
1805 integer const LDB )
1806 #ifdef USE_ATLAS
1807 { ATLASNAME(dtrmm)( side_atlas(SIDE[0]),
1808 uplo_atlas(UPLO[0]),
1809 trans_atlas(TRANSA[0]),
1810 diag_atlas(DIAG[0]),
1811 M, N, alpha, A, LDA, B, LDB ) ; }
1812 #else
1813 { BLASNAME(dtrmm) ( SIDE, UPLO, TRANSA, DIAG, &M, &N, &alpha, A, &LDA, B, &LDB ) ; }
1814 #endif
1815
1816 /*
1817 // _
1818 // | |_ _ __ ___ _ __ ___
1819 // | __| '__/ __| '_ ` _ \
1820 // | |_| | \__ \ | | | | |
1821 // \__|_| |___/_| |_| |_|
1822 */
1823 extern "C" {
1824 void
1825 BLASNAME(strsm)( character const SIDE[],
1826 character const UPLO[],
1827 character const TRANSA[],
1828 character const DIAG[],
1829 integer const * M,
1830 integer const * N,
1831 single_precision const * alpha,
1832 single_precision const A[],
1833 integer const * LDA,
1834 single_precision B[],
1835 integer const * LDB ) ;
1836 void
1837 BLASNAME(dtrsm)( character const SIDE[],
1838 character const UPLO[],
1839 character const TRANSA[],
1840 character const DIAG[],
1841 integer const * M,
1842 integer const * N,
1843 double_precision const * alpha,
1844 double_precision const A[],
1845 integer const * LDA,
1846 double_precision B[],
1847 integer const * LDB ) ;
1848 void
1849 ATLASNAME(strsm)( const enum ATLAS_SIDE Side,
1850 const enum ATLAS_UPLO Uplo,
1851 const enum ATLAS_TRANS TransA,
1852 const enum ATLAS_DIAG Diag,
1853 const int M,
1854 const int N,
1855 const float alpha,
1856 const float *A, const int lda,
1857 float *B, const int ldb ) ;
1858 void
1859 ATLASNAME(dtrsm)( const enum ATLAS_SIDE Side,
1860 const enum ATLAS_UPLO Uplo,
1861 const enum ATLAS_TRANS TransA,
1862 const enum ATLAS_DIAG Diag,
1863 const int M,
1864 const int N,
1865 const double alpha,
1866 const double *A, const int lda,
1867 double *B, const int ldb ) ;
1868 }
1869
1870 inline
1871 void
1872 trsm( character const SIDE[],
1873 character const UPLO[],
1874 character const TRANS[],
1875 character const DIAG[],
1876 integer const M,
1877 integer const N,
1878 single_precision const alpha,
1879 single_precision const A[],
1880 integer const LDA,
1881 single_precision B[],
1882 integer const LDB )
1883 #ifdef USE_ATLAS
1884 { ATLASNAME(strsm)( side_atlas(SIDE[0]),
1885 uplo_atlas(UPLO[0]),
1886 trans_atlas(TRANS[0]),
1887 diag_atlas(DIAG[0]),
1888 M, N, alpha, A, LDA, B, LDB ) ; }
1889 #else
1890 { BLASNAME(strsm)( SIDE, UPLO, TRANS, DIAG, &M, &N, &alpha, A, &LDA, B, &LDB ) ; }
1891 #endif
1892
1893 inline
1894 void
1895 trsm( character const SIDE[],
1896 character const UPLO[],
1897 character const TRANS[],
1898 character const DIAG[],
1899 integer const M,
1900 integer const N,
1901 double_precision const alpha,
1902 double_precision const A[],
1903 integer const LDA,
1904 double_precision B[],
1905 integer const LDB )
1906 #ifdef USE_ATLAS
1907 { ATLASNAME(dtrsm)( side_atlas(SIDE[0]),
1908 uplo_atlas(UPLO[0]),
1909 trans_atlas(TRANS[0]),
1910 diag_atlas(DIAG[0]),
1911 M, N, alpha, A, LDA, B, LDB ) ; }
1912 #else
1913 { BLASNAME(dtrsm)( SIDE, UPLO, TRANS, DIAG, &M, &N, &alpha, A, &LDA, B, &LDB ) ; }
1914 #endif
1915
1916} // end namespace alglin
1917
1918#endif
1919
1920///
1921/// eof: alglin.hh
1922///
A timing class
File timing.hh
1/*--------------------------------------------------------------------------*\
2 | |
3 | Copyright (C) 2010 |
4 | |
5 | , __ , __ |
6 | /|/ \ /|/ \ |
7 | | __/ _ ,_ | __/ _ ,_ |
8 | | \|/ / | | | | \|/ / | | | |
9 | |(__/|__/ |_/ \_/|/|(__/|__/ |_/ \_/|/ |
10 | /| /| |
11 | \| \| |
12 | |
13 | Enrico Bertolazzi |
14 | Dipartimento di Ingegneria Meccanica e Strutturale |
15 | Universita` degli Studi di Trento |
16 | Via Mesiano 77, I-38050 Trento, Italy |
17 | email: enrico.bertolazzi@unitn.it |
18 | |
19 | version: 0.1 04-01-2010 |
20 | |
21\*--------------------------------------------------------------------------*/
22
23#ifndef TIMING_HH
24#define TIMING_HH
25
26class Timing {
27
28 long sec, usec ;
29
30 Timing( Timing const & ) ;
31 Timing const & operator = ( Timing const & ) const ;
32
33public:
34
35 Timing() ;
36 ~Timing() {} ;
37
38 void start() ;
39 double seconds() ;
40 double milliseconds() ;
41
42} ;
43
44#endif
45
46// EOF timing.hh
File timing.cc
1/*--------------------------------------------------------------------------*\
2 | |
3 | Copyright (C) 2010 |
4 | |
5 | , __ , __ |
6 | /|/ \ /|/ \ |
7 | | __/ _ ,_ | __/ _ ,_ |
8 | | \|/ / | | | | \|/ / | | | |
9 | |(__/|__/ |_/ \_/|/|(__/|__/ |_/ \_/|/ |
10 | /| /| |
11 | \| \| |
12 | |
13 | Enrico Bertolazzi |
14 | Dipartimento di Ingegneria Meccanica e Strutturale |
15 | Universita` degli Studi di Trento |
16 | Via Mesiano 77, I-38050 Trento, Italy |
17 | email: enrico.bertolazzi@unitn.it |
18 | |
19 | version: 0.1 04-01-2010 |
20 | |
21 \*--------------------------------------------------------------------------*/
22
23#include "timing.hh"
24
25using namespace std ;
26
27#ifdef _WIN32
28 #ifndef WIN32_LEAN_AND_MEAN
29 #define WIN32_LEAN_AND_MEAN
30 #endif
31 #include <windows.h>
32#else
33 #include <sys/time.h>
34#endif
35
36/*
37// _ _____ _
38// __ _ ___| ||_ _(_)_ __ ___ ___
39// / _` |/ _ \ __|| | | | '_ ` _ \ / _ \
40// | (_| | __/ |_ | | | | | | | | | __/
41// \__, |\___|\__||_| |_|_| |_| |_|\___|
42// |___/
43*/
44
45bool getTime( long & sec, long & usec ) ;
46
47#ifdef _WIN32
48 bool
49 getTime( long & sec, long & usec ) {
50 FILETIME ft ;
51 GetSystemTimeAsFileTime( &ft ) ;
52
53 // FILETIME To UNIX Time
54 unsigned __int64 u_sec = ft . dwHighDateTime ;
55 u_sec <<= 32 ;
56 u_sec |= ft . dwLowDateTime ;
57 u_sec -= 116444736000000000ULL ;
58 u_sec /= 10ULL ;
59
60 sec = long( u_sec / 1000000L ) ;
61 usec = long( u_sec % 1000000L ) ;
62 return true ;
63 }
64#else
65 bool
66 getTime( long & sec, long & usec ) {
67 struct timeval now ;
68 bool ok = gettimeofday(&now, NULL) == 0 ;
69 if ( ok ) {
70 sec = now . tv_sec;
71 usec = now . tv_usec;
72 } else {
73 sec = usec = 0 ;
74 }
75 return ok ;
76 }
77#endif
78
79Timing::Timing() {
80 start() ;
81}
82
83void
84Timing::start() {
85 getTime( sec, usec ) ;
86}
87
88double
89Timing::seconds() {
90 long new_sec, new_usec ;
91 getTime( new_sec, new_usec ) ;
92 return (new_sec-sec) + (new_usec-usec)*1E-6 ;
93}
94
95double
96Timing::milliseconds() {
97 return seconds() * 1E3 ;
98}
99
100// EOF timing.cc
Matrix-matrix product
File mm.hh
1/*--------------------------------------------------------------------------*\
2 | |
3 | Copyright (C) 2010 |
4 | |
5 | , __ , __ |
6 | /|/ \ /|/ \ |
7 | | __/ _ ,_ | __/ _ ,_ |
8 | | \|/ / | | | | \|/ / | | | |
9 | |(__/|__/ |_/ \_/|/|(__/|__/ |_/ \_/|/ |
10 | /| /| |
11 | \| \| |
12 | |
13 | Enrico Bertolazzi |
14 | Dipartimento di Ingegneria Meccanica e Strutturale |
15 | Universita` degli Studi di Trento |
16 | Via Mesiano 77, I-38050 Trento, Italy |
17 | email: enrico.bertolazzi@unitn.it |
18 | |
19 | version: 0.1 04-01-2010 |
20 | |
21\*--------------------------------------------------------------------------*/
22
23// To avoid double inclusion (old trick)
24#ifndef MM_HH
25#define MM_HH
26
27#ifndef REAL_TYPE
28#define REAL_TYPE double
29#endif
30
31#ifndef INTEGER_TYPE
32#define INTEGER_TYPE int
33#endif
34
35typedef REAL_TYPE valueType ;
36typedef INTEGER_TYPE indexType ;
37
38//#define ROW_MAJOR
39#ifdef ROW_MAJOR
40 #define ADDR(M,I,J) M[(I)*ld##M+(J)]
41 #define NEXT_COLUMN(M) ++p##M
42 #define NEXT_ROW(M) p##M += ld##M
43#else
44 #define ADDR(M,I,J) M[(I)+(J)*ld##M]
45 #define NEXT_COLUMN(M) p##M += ld##M
46 #define NEXT_ROW(M) ++p##M
47#endif
48
49void
50MM_normal( valueType const a[], indexType lda, // n x p
51 valueType const b[], indexType ldb, // p x m
52 valueType c[], indexType ldc,
53 indexType n,
54 indexType p,
55 indexType m ) ;
56
57void
58MM_temp( valueType const a[], indexType lda, // n x p
59 valueType const b[], indexType ldb, // p x m
60 valueType c[], indexType ldc,
61 indexType n,
62 indexType p,
63 indexType m ) ;
64
65void
66MM_unroll4( valueType const a[], indexType lda, // n x p
67 valueType const b[], indexType ldb, // p x m
68 valueType c[], indexType ldc,
69 indexType n,
70 indexType p,
71 indexType m ) ;
72
73void
74MM_unroll8( valueType const a[], indexType lda, // n x p
75 valueType const b[], indexType ldb, // p x m
76 valueType c[], indexType ldc,
77 indexType n,
78 indexType p,
79 indexType m ) ;
80
81void
82MM_unroll16( valueType const a[], indexType lda, // n x p
83 valueType const b[], indexType ldb, // p x m
84 valueType c[], indexType ldc,
85 indexType n,
86 indexType p,
87 indexType m ) ;
88
89void
90MM_pointer( valueType const a[], indexType lda, // n x p
91 valueType const b[], indexType ldb, // p x m
92 valueType c[], indexType ldc,
93 indexType n,
94 indexType p,
95 indexType m ) ;
96
97void
98MM_lam( valueType const a[], indexType lda, // n x p
99 valueType const b[], indexType ldb, // p x m
100 valueType c[], indexType ldc,
101 indexType n,
102 indexType p,
103 indexType m ) ;
104
105void
106MM_tiling( valueType const a[], indexType lda, // n x p
107 valueType const b[], indexType ldb, // p x m
108 valueType c[], indexType ldc,
109 indexType n,
110 indexType p,
111 indexType m,
112 indexType step ) ;
113
114void
115MM_maeno( valueType const a[], indexType lda, // n x p
116 valueType const b[], indexType ldb, // p x m
117 valueType c[], indexType ldc,
118 indexType n,
119 indexType p,
120 indexType m,
121 indexType lparm ) ;
122
123void
124MM_warner( valueType const a[], indexType lda, // n x p
125 valueType const b[], indexType ldb, // p x m
126 valueType c[], indexType ldc,
127 indexType n,
128 indexType p,
129 indexType m,
130 indexType nb ) ;
131
132void
133MM_blas( valueType const a[], indexType lda, // n x p
134 valueType const b[], indexType ldb, // p x m
135 valueType c[], indexType ldc,
136 indexType n,
137 indexType p,
138 indexType m ) ;
139
140void
141MM_blocking( valueType const a[], indexType lda, // n x p
142 valueType const b[], indexType ldb, // p x m
143 valueType c[], indexType ldc,
144 indexType n,
145 indexType p,
146 indexType m,
147 indexType step ) ;
148
149void
150MM_strassen( valueType const a[], indexType lda, // n x p
151 valueType const b[], indexType ldb, // p x m
152 valueType c[], indexType ldc,
153 indexType n,
154 indexType p,
155 indexType m ) ;
156
157#endif
158
159// EOF mm.hh
File mm.cc
1/*--------------------------------------------------------------------------*\
2 | |
3 | Copyright (C) 2010 |
4 | |
5 | , __ , __ |
6 | /|/ \ /|/ \ |
7 | | __/ _ ,_ | __/ _ ,_ |
8 | | \|/ / | | | | \|/ / | | | |
9 | |(__/|__/ |_/ \_/|/|(__/|__/ |_/ \_/|/ |
10 | /| /| |
11 | \| \| |
12 | |
13 | Enrico Bertolazzi |
14 | Dipartimento di Ingegneria Meccanica e Strutturale |
15 | Universita` degli Studi di Trento |
16 | Via Mesiano 77, I-38050 Trento, Italy |
17 | email: enrico.bertolazzi@unitn.it |
18 | |
19 | version: 0.1 04-01-2010 |
20 | |
21\*--------------------------------------------------------------------------*/
22
23#include "mm.hh"
24#include "alglin.hh"
25#include "strassen.hh"
26#include <algorithm>
27
28using namespace std ;
29
30/*
31 * matrix multiply tests -- C language, version 1.0, May 1993
32 *
33 * Contact Mark Smotherman (mark@cs.clemson.edu)
34 */
35
36#define A(I,J) ADDR(a,I,J)
37#define B(I,J) ADDR(b,I,J)
38#define C(I,J) ADDR(c,I,J)
39
40///////////////////////////////////////////////////////
41
42void
43MM_normal( valueType const a[], indexType lda, // n x p
44 valueType const b[], indexType ldb, // p x m
45 valueType c[], indexType ldc,
46 indexType n,
47 indexType p,
48 indexType m ) {
49
50 for ( indexType i = 0 ; i < n ; ++i ) {
51 for ( indexType j = 0 ; j < m ; ++j ) {
52 C(i,j) = 0 ;
53 for ( indexType k = 0 ; k < p ; ++k ) {
54 C(i,j) += A(i,k)*B(k,j) ;
55 }
56 }
57 }
58}
59
60///////////////////////////////////////////////////////
61
62void
63MM_temp( valueType const a[], indexType lda, // n x p
64 valueType const b[], indexType ldb, // p x m
65 valueType c[], indexType ldc,
66 indexType n,
67 indexType p,
68 indexType m ) {
69
70 for ( indexType i = 0 ; i < n ; ++i ) {
71 for ( indexType j = 0 ; j < m ; ++j ) {
72 valueType temp = 0 ;
73 for ( indexType k = 0 ; k < p ; ++k ) {
74 temp += A(i,k)*B(k,j) ;
75 }
76 C(i,j) = temp ;
77 }
78 }
79}
80
81///////////////////////////////////////////////////////
82
83void
84MM_unroll4( valueType const a[], indexType lda, // n x p
85 valueType const b[], indexType ldb, // p x m
86 valueType c[], indexType ldc,
87 indexType n,
88 indexType p,
89 indexType m ) {
90
91 for ( indexType i = 0 ; i < n ; ++i ) {
92 for ( indexType j = 0 ; j < m ; ++j ) {
93 indexType k = 0 ;
94 valueType temp = 0 ;
95 for ( ; k < (p-3) ; k += 4 ) {
96 temp += A(i,k+0) * B(k+0,j) ;
97 temp += A(i,k+1) * B(k+1,j) ;
98 temp += A(i,k+2) * B(k+2,j) ;
99 temp += A(i,k+3) * B(k+3,j) ;
100 }
101 for ( ; k < p ; ++k ) temp += A(i,k)*B(k,j) ;
102 C(i,j) = temp ;
103 }
104 }
105}
106
107///////////////////////////////////////////////////////
108
109void
110MM_unroll8( valueType const a[], indexType lda, // n x p
111 valueType const b[], indexType ldb, // p x m
112 valueType c[], indexType ldc,
113 indexType n,
114 indexType p,
115 indexType m ) {
116
117 for ( indexType i = 0 ; i < n ; ++i ) {
118 for ( indexType j = 0 ; j < m ; ++j ) {
119 indexType k = 0 ;
120 valueType temp = 0 ;
121 for ( ; k < (p-7) ; k += 8 ) {
122 temp += A(i,k+0) * B(k+0,j) ;
123 temp += A(i,k+1) * B(k+1,j) ;
124 temp += A(i,k+2) * B(k+2,j) ;
125 temp += A(i,k+3) * B(k+3,j) ;
126 temp += A(i,k+4) * B(k+4,j) ;
127 temp += A(i,k+5) * B(k+5,j) ;
128 temp += A(i,k+6) * B(k+6,j) ;
129 temp += A(i,k+7) * B(k+7,j) ;
130 }
131 for ( ; k < p ; ++k ) temp += A(i,k)*B(k,j) ;
132 C(i,j) = temp ;
133 }
134 }
135}
136
137///////////////////////////////////////////////////////
138
139void
140MM_unroll16( valueType const a[], indexType lda, // n x p
141 valueType const b[], indexType ldb, // p x m
142 valueType c[], indexType ldc,
143 indexType n,
144 indexType p,
145 indexType m ) {
146
147 for ( indexType i = 0 ; i < n ; ++i ) {
148 for ( indexType j = 0 ; j < m ; ++j ) {
149 indexType k = 0 ;
150 valueType temp = 0 ;
151 for ( ; k < (p-15) ; k += 16 ) {
152 temp += A(i,k+ 0) * B(k+ 0,j) ;
153 temp += A(i,k+ 1) * B(k+ 1,j) ;
154 temp += A(i,k+ 2) * B(k+ 2,j) ;
155 temp += A(i,k+ 3) * B(k+ 3,j) ;
156 temp += A(i,k+ 4) * B(k+ 4,j) ;
157 temp += A(i,k+ 5) * B(k+ 5,j) ;
158 temp += A(i,k+ 6) * B(k+ 6,j) ;
159 temp += A(i,k+ 7) * B(k+ 7,j) ;
160 temp += A(i,k+ 8) * B(k+ 8,j) ;
161 temp += A(i,k+ 9) * B(k+ 9,j) ;
162 temp += A(i,k+10) * B(k+10,j) ;
163 temp += A(i,k+11) * B(k+11,j) ;
164 temp += A(i,k+12) * B(k+12,j) ;
165 temp += A(i,k+13) * B(k+13,j) ;
166 temp += A(i,k+14) * B(k+14,j) ;
167 temp += A(i,k+15) * B(k+15,j) ;
168 }
169 for ( ; k < p ; ++k ) temp += A(i,k)*B(k,j) ;
170 C(i,j) = temp ;
171 }
172 }
173}
174
175///////////////////////////////////////////////////////
176
177void
178MM_pointer( valueType const a[], indexType lda, // n x p
179 valueType const b[], indexType ldb, // p x m
180 valueType c[], indexType ldc,
181 indexType n,
182 indexType p,
183 indexType m ) {
184
185 for ( indexType i = 0 ; i < n ; ++i ) {
186 valueType * pc = &C(i,0) ;
187 for ( indexType j = 0 ; j < m ; ++j ) {
188 valueType temp = 0 ;
189 valueType const * pa = &A(i,0) ;
190 valueType const * pb = &B(0,j) ;
191 for ( indexType k = 0 ; k < p ; ++k ) {
192 temp += (*pa) * (*pb) ;
193 NEXT_COLUMN(a) ;
194 NEXT_ROW(b) ;
195 }
196 *pc = temp ;
197 NEXT_COLUMN(c) ;
198 }
199 }
200}
201
202/* from Monica Lam ASPLOS-IV paper */
203void
204MM_lam( valueType const a[], indexType lda, // n x p
205 valueType const b[], indexType ldb, // p x m
206 valueType c[], indexType ldc,
207 indexType n,
208 indexType p,
209 indexType m ) {
210
211 for ( indexType i = 0 ; i < n ; ++i )
212 for ( indexType j = 0 ; j < m ; ++j )
213 C(i,j) = 0 ;
214
215 for ( indexType i = 0 ; i < n ; ++i )
216 for ( indexType k = 0 ; k < p ; ++k ) {
217 valueType a_entry = A(i,k) ;
218 for ( indexType j = 0 ; j < m ; ++j )
219 C(i,j) += a_entry*B(k,j) ;
220 }
221}
222
223/* from Monica Lam ASPLOS-IV paper */
224void
225MM_tiling( valueType const a[], indexType lda, // n x p
226 valueType const b[], indexType ldb, // p x m
227 valueType c[], indexType ldc,
228 indexType n,
229 indexType p,
230 indexType m,
231 indexType step ) {
232
233 for ( indexType i = 0 ; i < n ; ++i )
234 for ( indexType j = 0 ; j < m ; ++j )
235 C(i,j) = 0 ;
236
237 for( indexType kk = 0 ; kk < p ; kk += step )
238 for( indexType jj = 0 ; jj < m ; jj += step )
239 for ( indexType i = 0 ; i < n ; ++i )
240 for ( indexType k = kk ; k < min(kk+step,p) ; ++k ) {
241 valueType a_entry = A(i,k) ;
242 for ( indexType j = jj ; j < min(jj+step,m) ; ++j )
243 C(i,j) += a_entry*B(k,j) ;
244 }
245}
246
247/* Matrix Multiply tuned for SS-10/30;
248 * Maeno Toshinori
249 * Tokyo Institute of Technology
250 *
251 * Using gcc-2.4.1 (-O2), this program ends in 12 seconds on SS-10/30.
252 *
253 * in original algorithm - sub-area for cache tiling
254 * #define L 20
255 * #define L2 20
256 * three 20x20 matrices reside in cache; two may be enough
257 */
258void
259MM_maeno( valueType const a[], indexType lda, // n x p
260 valueType const b[], indexType ldb, // p x m
261 valueType c[], indexType ldc,
262 indexType n,
263 indexType p,
264 indexType m,
265 indexType lparm ) {
266
267 for ( indexType i = 0 ; i < n ; ++i )
268 for ( indexType j = 0 ; j < m ; ++j )
269 C(i,j) = 0 ;
270
271 for ( indexType i2 = 0 ; i2 < n ; i2 += lparm )
272 for ( indexType kk = 0 ; kk < p ; kk += lparm ) {
273 indexType it = i2 + lparm ;
274 indexType kt = kk + lparm ;
275 for ( indexType j = 0 ; j < m ; j += 4 )
276 for( indexType i = i2 ; i < min(n,it) ; i += 2 ) {
277 valueType t0=0,
278 t1=0,
279 t2=0,
280 t3=0,
281 t4=0,
282 t5=0,
283 t6=0,
284 t7=0 ;
285 for( indexType k = kk ; k < min(p,kt) ; ++k ) {
286 valueType s = A(i,k) ;
287 t0 += s*B(k,j+0);
288 t1 += s*B(k,j+1);
289 t2 += s*B(k,j+2);
290 t3 += s*B(k,j+3);
291 s = A(i+1,k) ;
292 t4 += s*B(k,j+0);
293 t5 += s*B(k,j+1);
294 t6 += s*B(k,j+2);
295 t7 += s*B(k,j+3);
296 }
297 C(i+0,j+0) += t0 ;
298 C(i+0,j+1) += t1 ;
299 C(i+0,j+2) += t2 ;
300 C(i+0,j+3) += t3 ;
301 C(i+1,j+0) += t4 ;
302 C(i+1,j+1) += t5 ;
303 C(i+1,j+2) += t6 ;
304 C(i+1,j+3) += t7 ;
305 }
306 }
307}
308
309/*
310 * Matrix Multiply by Dan Warner, Dept. of Mathematics, Clemson University
311 *
312 * mmbu2.f multiplies matrices a and b
313 * a and b are n by n matrices
314 * nb is the blocking parameter.
315 * the tuning guide indicates nb = 50 is reasonable for the
316 * ibm model 530 hence 25 should be reasonable for the 320
317 * since the 320 has 32k rather than 64k of cache.
318 * Inner loops unrolled to depth of 2
319 * The loop functions without clean up code at the end only
320 * if the unrolling occurs to a depth k which divides into n
321 * in this case n must be divisible by 2.
322 * The blocking parameter nb must divide into n if the
323 * multiply is to succeed without clean up code at the end.
324 *
325 * converted to c by Mark Smotherman
326 * note that nb must also be divisible by 2 => cannot use 25, so use 20
327 */
328
329void
330MM_warner( valueType const a[], indexType lda, // n x p
331 valueType const b[], indexType ldb, // p x m
332 valueType c[], indexType ldc,
333 indexType n,
334 indexType p,
335 indexType m,
336 indexType nb ) {
337
338 for( indexType ii = 0 ; ii < n ; ii += nb ) {
339 for( indexType jj = 0 ; jj < m ; jj += nb ) {
340
341 for( indexType i = ii ; i < min(n,ii + nb) ; ++i )
342 for( indexType j = jj ; j < min(m,jj + nb) ; ++j )
343 C(i,j) = 0.0;
344
345 for( indexType kk = 0 ; kk < p ; kk += nb ) {
346 for( indexType i = ii ; i < min(n-1,ii + nb) ; i += 2 ) {
347 for( indexType j = jj ; j < min(m-1,jj + nb) ; j += 2 ) {
348 valueType s00 = C(i+0,j+0) ;
349 valueType s01 = C(i+0,j+1) ;
350 valueType s10 = C(i+1,j+0) ;
351 valueType s11 = C(i+1,j+1) ;
352 for( indexType k = kk ; k < kk + nb ; ++k ){
353 s00 += A(i+0,k)*B(k,j+0) ;
354 s01 += A(i+0,k)*B(k,j+1) ;
355 s10 += A(i+1,k)*B(k,j+0) ;
356 s11 += A(i+1,k)*B(k,j+1) ;
357 }
358 C(i+0,j+0) = s00 ;
359 C(i+0,j+1) = s01 ;
360 C(i+1,j+0) = s10 ;
361 C(i+1,j+1) = s11 ;
362 }
363 }
364 }
365 }
366 }
367}
368
369///////////////////////////////////////////////////////
370
371void
372MM_blas( valueType const a[], indexType lda, // n x p
373 valueType const b[], indexType ldb, // p x m
374 valueType c[], indexType ldc,
375 indexType n,
376 indexType p,
377 indexType m ) {
378#ifdef ROW_MAJOR
379 alglin::gemm( "N", "N", n, m, p,
380 1.0, b, ldb,
381 a, lda,
382 0.0, c, ldc ) ;
383#else
384 alglin::gemm( "N", "N", n, m, p,
385 1.0, a, lda,
386 b, ldb,
387 0.0, c, ldc ) ;
388#endif
389}
390
391///////////////////////////////////////////////////////
392
393void
394MM_blocking( valueType const a[], indexType lda, // n x p
395 valueType const b[], indexType ldb, // p x m
396 valueType c[], indexType ldc,
397 indexType n,
398 indexType p,
399 indexType m,
400 indexType step ) {
401
402 for ( indexType i = 0 ; i < n ; ++i )
403 for ( indexType j = 0 ; j < m ; ++j )
404 C(i,j) = 0 ;
405
406 for( indexType jj = 0 ; jj < m ; jj += step )
407 for( indexType ii = 0 ; ii < n ; ii += step )
408 for( indexType kk = 0 ; kk < p ; kk += step )
409 for( indexType j = jj ; j < min(m,jj+step) ; ++j )
410 for( indexType i = ii ; i < min(n,ii+step) ; ++i ) {
411 valueType temp = C(i,j) ;
412 for( indexType k = kk ; k < min(p,kk+step) ; ++k )
413 temp += A(i,k)*B(k,j) ;
414 C(i,j) = temp ;
415 }
416}
417
418///////////////////////////////////////////////////////
419
420#include <iostream>
421
422static valueType * aux = NULL ;
423static indexType naux = 5000*5000*10 ;
424
425void
426MM_strassen( valueType const A[], indexType lda, // n x p
427 valueType const B[], indexType ldb, // p x m
428 valueType C[], indexType ldc,
429 indexType n,
430 indexType p,
431 indexType m ) {
432
433 if ( aux == NULL ) aux = new valueType [ naux ] ;
434
435#ifdef ROW_MAJOR
436 strassen::matrixMultiply( n, /* Rows of C and A */
437 m, /* Cols of B and C */
438 p, /* Cols of A and Rows of B */
439 B, /* Pointer to coefficients of A */
440 ldb, /* Leading dimension of A, Fortran style */
441 A, /* Pointer to coefficients of B */
442 lda, /* Leading dimension of B, Fortran style */
443 C, /* Pointer to coefficients of C */
444 ldc, /* Leading dimension of C, Fortran style */
445 aux, /* Pointer to auxillary storage or NULL */
446 naux ) ; /* Number of elements in aux or <= 0 */
447#else
448 strassen::matrixMultiply( n, /* Rows of C and A */
449 m, /* Cols of B and C */
450 p, /* Cols of A and Rows of B */
451 A, /* Pointer to coefficients of A */
452 lda, /* Leading dimension of A, Fortran style */
453 B, /* Pointer to coefficients of B */
454 ldb, /* Leading dimension of B, Fortran style */
455 C, /* Pointer to coefficients of C */
456 ldc, /* Leading dimension of C, Fortran style */
457 aux, /* Pointer to auxillary storage or NULL */
458 naux ) ; /* Number of elements in aux or <= 0 */
459#endif
460}
461
462// EOF mm.hh
Matrix-matrix product with Strassen algorithm
File mm.hh
1/*--------------------------------------------------------------------------*\
2 | |
3 | Copyright (C) 2010 |
4 | |
5 | , __ , __ |
6 | /|/ \ /|/ \ |
7 | | __/ _ ,_ | __/ _ ,_ |
8 | | \|/ / | | | | \|/ / | | | |
9 | |(__/|__/ |_/ \_/|/|(__/|__/ |_/ \_/|/ |
10 | /| /| |
11 | \| \| |
12 | |
13 | Enrico Bertolazzi |
14 | Dipartimento di Ingegneria Meccanica e Strutturale |
15 | Universita` degli Studi di Trento |
16 | Via Mesiano 77, I-38050 Trento, Italy |
17 | email: enrico.bertolazzi@unitn.it |
18 | |
19 | version: 0.1 04-01-2010 |
20 | |
21\*--------------------------------------------------------------------------*/
22
23// To avoid double inclusion (old trick)
24#ifndef MM_HH
25#define MM_HH
26
27#ifndef REAL_TYPE
28#define REAL_TYPE double
29#endif
30
31#ifndef INTEGER_TYPE
32#define INTEGER_TYPE int
33#endif
34
35typedef REAL_TYPE valueType ;
36typedef INTEGER_TYPE indexType ;
37
38//#define ROW_MAJOR
39#ifdef ROW_MAJOR
40 #define ADDR(M,I,J) M[(I)*ld##M+(J)]
41 #define NEXT_COLUMN(M) ++p##M
42 #define NEXT_ROW(M) p##M += ld##M
43#else
44 #define ADDR(M,I,J) M[(I)+(J)*ld##M]
45 #define NEXT_COLUMN(M) p##M += ld##M
46 #define NEXT_ROW(M) ++p##M
47#endif
48
49void
50MM_normal( valueType const a[], indexType lda, // n x p
51 valueType const b[], indexType ldb, // p x m
52 valueType c[], indexType ldc,
53 indexType n,
54 indexType p,
55 indexType m ) ;
56
57void
58MM_temp( valueType const a[], indexType lda, // n x p
59 valueType const b[], indexType ldb, // p x m
60 valueType c[], indexType ldc,
61 indexType n,
62 indexType p,
63 indexType m ) ;
64
65void
66MM_unroll4( valueType const a[], indexType lda, // n x p
67 valueType const b[], indexType ldb, // p x m
68 valueType c[], indexType ldc,
69 indexType n,
70 indexType p,
71 indexType m ) ;
72
73void
74MM_unroll8( valueType const a[], indexType lda, // n x p
75 valueType const b[], indexType ldb, // p x m
76 valueType c[], indexType ldc,
77 indexType n,
78 indexType p,
79 indexType m ) ;
80
81void
82MM_unroll16( valueType const a[], indexType lda, // n x p
83 valueType const b[], indexType ldb, // p x m
84 valueType c[], indexType ldc,
85 indexType n,
86 indexType p,
87 indexType m ) ;
88
89void
90MM_pointer( valueType const a[], indexType lda, // n x p
91 valueType const b[], indexType ldb, // p x m
92 valueType c[], indexType ldc,
93 indexType n,
94 indexType p,
95 indexType m ) ;
96
97void
98MM_lam( valueType const a[], indexType lda, // n x p
99 valueType const b[], indexType ldb, // p x m
100 valueType c[], indexType ldc,
101 indexType n,
102 indexType p,
103 indexType m ) ;
104
105void
106MM_tiling( valueType const a[], indexType lda, // n x p
107 valueType const b[], indexType ldb, // p x m
108 valueType c[], indexType ldc,
109 indexType n,
110 indexType p,
111 indexType m,
112 indexType step ) ;
113
114void
115MM_maeno( valueType const a[], indexType lda, // n x p
116 valueType const b[], indexType ldb, // p x m
117 valueType c[], indexType ldc,
118 indexType n,
119 indexType p,
120 indexType m,
121 indexType lparm ) ;
122
123void
124MM_warner( valueType const a[], indexType lda, // n x p
125 valueType const b[], indexType ldb, // p x m
126 valueType c[], indexType ldc,
127 indexType n,
128 indexType p,
129 indexType m,
130 indexType nb ) ;
131
132void
133MM_blas( valueType const a[], indexType lda, // n x p
134 valueType const b[], indexType ldb, // p x m
135 valueType c[], indexType ldc,
136 indexType n,
137 indexType p,
138 indexType m ) ;
139
140void
141MM_blocking( valueType const a[], indexType lda, // n x p
142 valueType const b[], indexType ldb, // p x m
143 valueType c[], indexType ldc,
144 indexType n,
145 indexType p,
146 indexType m,
147 indexType step ) ;
148
149void
150MM_strassen( valueType const a[], indexType lda, // n x p
151 valueType const b[], indexType ldb, // p x m
152 valueType c[], indexType ldc,
153 indexType n,
154 indexType p,
155 indexType m ) ;
156
157#endif
158
159// EOF mm.hh
File mm.cc
1/*--------------------------------------------------------------------------*\
2 | |
3 | Copyright (C) 2010 |
4 | |
5 | , __ , __ |
6 | /|/ \ /|/ \ |
7 | | __/ _ ,_ | __/ _ ,_ |
8 | | \|/ / | | | | \|/ / | | | |
9 | |(__/|__/ |_/ \_/|/|(__/|__/ |_/ \_/|/ |
10 | /| /| |
11 | \| \| |
12 | |
13 | Enrico Bertolazzi |
14 | Dipartimento di Ingegneria Meccanica e Strutturale |
15 | Universita` degli Studi di Trento |
16 | Via Mesiano 77, I-38050 Trento, Italy |
17 | email: enrico.bertolazzi@unitn.it |
18 | |
19 | version: 0.1 04-01-2010 |
20 | |
21\*--------------------------------------------------------------------------*/
22
23#include "mm.hh"
24#include "alglin.hh"
25#include "strassen.hh"
26#include <algorithm>
27
28using namespace std ;
29
30/*
31 * matrix multiply tests -- C language, version 1.0, May 1993
32 *
33 * Contact Mark Smotherman (mark@cs.clemson.edu)
34 */
35
36#define A(I,J) ADDR(a,I,J)
37#define B(I,J) ADDR(b,I,J)
38#define C(I,J) ADDR(c,I,J)
39
40///////////////////////////////////////////////////////
41
42void
43MM_normal( valueType const a[], indexType lda, // n x p
44 valueType const b[], indexType ldb, // p x m
45 valueType c[], indexType ldc,
46 indexType n,
47 indexType p,
48 indexType m ) {
49
50 for ( indexType i = 0 ; i < n ; ++i ) {
51 for ( indexType j = 0 ; j < m ; ++j ) {
52 C(i,j) = 0 ;
53 for ( indexType k = 0 ; k < p ; ++k ) {
54 C(i,j) += A(i,k)*B(k,j) ;
55 }
56 }
57 }
58}
59
60///////////////////////////////////////////////////////
61
62void
63MM_temp( valueType const a[], indexType lda, // n x p
64 valueType const b[], indexType ldb, // p x m
65 valueType c[], indexType ldc,
66 indexType n,
67 indexType p,
68 indexType m ) {
69
70 for ( indexType i = 0 ; i < n ; ++i ) {
71 for ( indexType j = 0 ; j < m ; ++j ) {
72 valueType temp = 0 ;
73 for ( indexType k = 0 ; k < p ; ++k ) {
74 temp += A(i,k)*B(k,j) ;
75 }
76 C(i,j) = temp ;
77 }
78 }
79}
80
81///////////////////////////////////////////////////////
82
83void
84MM_unroll4( valueType const a[], indexType lda, // n x p
85 valueType const b[], indexType ldb, // p x m
86 valueType c[], indexType ldc,
87 indexType n,
88 indexType p,
89 indexType m ) {
90
91 for ( indexType i = 0 ; i < n ; ++i ) {
92 for ( indexType j = 0 ; j < m ; ++j ) {
93 indexType k = 0 ;
94 valueType temp = 0 ;
95 for ( ; k < (p-3) ; k += 4 ) {
96 temp += A(i,k+0) * B(k+0,j) ;
97 temp += A(i,k+1) * B(k+1,j) ;
98 temp += A(i,k+2) * B(k+2,j) ;
99 temp += A(i,k+3) * B(k+3,j) ;
100 }
101 for ( ; k < p ; ++k ) temp += A(i,k)*B(k,j) ;
102 C(i,j) = temp ;
103 }
104 }
105}
106
107///////////////////////////////////////////////////////
108
109void
110MM_unroll8( valueType const a[], indexType lda, // n x p
111 valueType const b[], indexType ldb, // p x m
112 valueType c[], indexType ldc,
113 indexType n,
114 indexType p,
115 indexType m ) {
116
117 for ( indexType i = 0 ; i < n ; ++i ) {
118 for ( indexType j = 0 ; j < m ; ++j ) {
119 indexType k = 0 ;
120 valueType temp = 0 ;
121 for ( ; k < (p-7) ; k += 8 ) {
122 temp += A(i,k+0) * B(k+0,j) ;
123 temp += A(i,k+1) * B(k+1,j) ;
124 temp += A(i,k+2) * B(k+2,j) ;
125 temp += A(i,k+3) * B(k+3,j) ;
126 temp += A(i,k+4) * B(k+4,j) ;
127 temp += A(i,k+5) * B(k+5,j) ;
128 temp += A(i,k+6) * B(k+6,j) ;
129 temp += A(i,k+7) * B(k+7,j) ;
130 }
131 for ( ; k < p ; ++k ) temp += A(i,k)*B(k,j) ;
132 C(i,j) = temp ;
133 }
134 }
135}
136
137///////////////////////////////////////////////////////
138
139void
140MM_unroll16( valueType const a[], indexType lda, // n x p
141 valueType const b[], indexType ldb, // p x m
142 valueType c[], indexType ldc,
143 indexType n,
144 indexType p,
145 indexType m ) {
146
147 for ( indexType i = 0 ; i < n ; ++i ) {
148 for ( indexType j = 0 ; j < m ; ++j ) {
149 indexType k = 0 ;
150 valueType temp = 0 ;
151 for ( ; k < (p-15) ; k += 16 ) {
152 temp += A(i,k+ 0) * B(k+ 0,j) ;
153 temp += A(i,k+ 1) * B(k+ 1,j) ;
154 temp += A(i,k+ 2) * B(k+ 2,j) ;
155 temp += A(i,k+ 3) * B(k+ 3,j) ;
156 temp += A(i,k+ 4) * B(k+ 4,j) ;
157 temp += A(i,k+ 5) * B(k+ 5,j) ;
158 temp += A(i,k+ 6) * B(k+ 6,j) ;
159 temp += A(i,k+ 7) * B(k+ 7,j) ;
160 temp += A(i,k+ 8) * B(k+ 8,j) ;
161 temp += A(i,k+ 9) * B(k+ 9,j) ;
162 temp += A(i,k+10) * B(k+10,j) ;
163 temp += A(i,k+11) * B(k+11,j) ;
164 temp += A(i,k+12) * B(k+12,j) ;
165 temp += A(i,k+13) * B(k+13,j) ;
166 temp += A(i,k+14) * B(k+14,j) ;
167 temp += A(i,k+15) * B(k+15,j) ;
168 }
169 for ( ; k < p ; ++k ) temp += A(i,k)*B(k,j) ;
170 C(i,j) = temp ;
171 }
172 }
173}
174
175///////////////////////////////////////////////////////
176
177void
178MM_pointer( valueType const a[], indexType lda, // n x p
179 valueType const b[], indexType ldb, // p x m
180 valueType c[], indexType ldc,
181 indexType n,
182 indexType p,
183 indexType m ) {
184
185 for ( indexType i = 0 ; i < n ; ++i ) {
186 valueType * pc = &C(i,0) ;
187 for ( indexType j = 0 ; j < m ; ++j ) {
188 valueType temp = 0 ;
189 valueType const * pa = &A(i,0) ;
190 valueType const * pb = &B(0,j) ;
191 for ( indexType k = 0 ; k < p ; ++k ) {
192 temp += (*pa) * (*pb) ;
193 NEXT_COLUMN(a) ;
194 NEXT_ROW(b) ;
195 }
196 *pc = temp ;
197 NEXT_COLUMN(c) ;
198 }
199 }
200}
201
202/* from Monica Lam ASPLOS-IV paper */
203void
204MM_lam( valueType const a[], indexType lda, // n x p
205 valueType const b[], indexType ldb, // p x m
206 valueType c[], indexType ldc,
207 indexType n,
208 indexType p,
209 indexType m ) {
210
211 for ( indexType i = 0 ; i < n ; ++i )
212 for ( indexType j = 0 ; j < m ; ++j )
213 C(i,j) = 0 ;
214
215 for ( indexType i = 0 ; i < n ; ++i )
216 for ( indexType k = 0 ; k < p ; ++k ) {
217 valueType a_entry = A(i,k) ;
218 for ( indexType j = 0 ; j < m ; ++j )
219 C(i,j) += a_entry*B(k,j) ;
220 }
221}
222
223/* from Monica Lam ASPLOS-IV paper */
224void
225MM_tiling( valueType const a[], indexType lda, // n x p
226 valueType const b[], indexType ldb, // p x m
227 valueType c[], indexType ldc,
228 indexType n,
229 indexType p,
230 indexType m,
231 indexType step ) {
232
233 for ( indexType i = 0 ; i < n ; ++i )
234 for ( indexType j = 0 ; j < m ; ++j )
235 C(i,j) = 0 ;
236
237 for( indexType kk = 0 ; kk < p ; kk += step )
238 for( indexType jj = 0 ; jj < m ; jj += step )
239 for ( indexType i = 0 ; i < n ; ++i )
240 for ( indexType k = kk ; k < min(kk+step,p) ; ++k ) {
241 valueType a_entry = A(i,k) ;
242 for ( indexType j = jj ; j < min(jj+step,m) ; ++j )
243 C(i,j) += a_entry*B(k,j) ;
244 }
245}
246
247/* Matrix Multiply tuned for SS-10/30;
248 * Maeno Toshinori
249 * Tokyo Institute of Technology
250 *
251 * Using gcc-2.4.1 (-O2), this program ends in 12 seconds on SS-10/30.
252 *
253 * in original algorithm - sub-area for cache tiling
254 * #define L 20
255 * #define L2 20
256 * three 20x20 matrices reside in cache; two may be enough
257 */
258void
259MM_maeno( valueType const a[], indexType lda, // n x p
260 valueType const b[], indexType ldb, // p x m
261 valueType c[], indexType ldc,
262 indexType n,
263 indexType p,
264 indexType m,
265 indexType lparm ) {
266
267 for ( indexType i = 0 ; i < n ; ++i )
268 for ( indexType j = 0 ; j < m ; ++j )
269 C(i,j) = 0 ;
270
271 for ( indexType i2 = 0 ; i2 < n ; i2 += lparm )
272 for ( indexType kk = 0 ; kk < p ; kk += lparm ) {
273 indexType it = i2 + lparm ;
274 indexType kt = kk + lparm ;
275 for ( indexType j = 0 ; j < m ; j += 4 )
276 for( indexType i = i2 ; i < min(n,it) ; i += 2 ) {
277 valueType t0=0,
278 t1=0,
279 t2=0,
280 t3=0,
281 t4=0,
282 t5=0,
283 t6=0,
284 t7=0 ;
285 for( indexType k = kk ; k < min(p,kt) ; ++k ) {
286 valueType s = A(i,k) ;
287 t0 += s*B(k,j+0);
288 t1 += s*B(k,j+1);
289 t2 += s*B(k,j+2);
290 t3 += s*B(k,j+3);
291 s = A(i+1,k) ;
292 t4 += s*B(k,j+0);
293 t5 += s*B(k,j+1);
294 t6 += s*B(k,j+2);
295 t7 += s*B(k,j+3);
296 }
297 C(i+0,j+0) += t0 ;
298 C(i+0,j+1) += t1 ;
299 C(i+0,j+2) += t2 ;
300 C(i+0,j+3) += t3 ;
301 C(i+1,j+0) += t4 ;
302 C(i+1,j+1) += t5 ;
303 C(i+1,j+2) += t6 ;
304 C(i+1,j+3) += t7 ;
305 }
306 }
307}
308
309/*
310 * Matrix Multiply by Dan Warner, Dept. of Mathematics, Clemson University
311 *
312 * mmbu2.f multiplies matrices a and b
313 * a and b are n by n matrices
314 * nb is the blocking parameter.
315 * the tuning guide indicates nb = 50 is reasonable for the
316 * ibm model 530 hence 25 should be reasonable for the 320
317 * since the 320 has 32k rather than 64k of cache.
318 * Inner loops unrolled to depth of 2
319 * The loop functions without clean up code at the end only
320 * if the unrolling occurs to a depth k which divides into n
321 * in this case n must be divisible by 2.
322 * The blocking parameter nb must divide into n if the
323 * multiply is to succeed without clean up code at the end.
324 *
325 * converted to c by Mark Smotherman
326 * note that nb must also be divisible by 2 => cannot use 25, so use 20
327 */
328
329void
330MM_warner( valueType const a[], indexType lda, // n x p
331 valueType const b[], indexType ldb, // p x m
332 valueType c[], indexType ldc,
333 indexType n,
334 indexType p,
335 indexType m,
336 indexType nb ) {
337
338 for( indexType ii = 0 ; ii < n ; ii += nb ) {
339 for( indexType jj = 0 ; jj < m ; jj += nb ) {
340
341 for( indexType i = ii ; i < min(n,ii + nb) ; ++i )
342 for( indexType j = jj ; j < min(m,jj + nb) ; ++j )
343 C(i,j) = 0.0;
344
345 for( indexType kk = 0 ; kk < p ; kk += nb ) {
346 for( indexType i = ii ; i < min(n-1,ii + nb) ; i += 2 ) {
347 for( indexType j = jj ; j < min(m-1,jj + nb) ; j += 2 ) {
348 valueType s00 = C(i+0,j+0) ;
349 valueType s01 = C(i+0,j+1) ;
350 valueType s10 = C(i+1,j+0) ;
351 valueType s11 = C(i+1,j+1) ;
352 for( indexType k = kk ; k < kk + nb ; ++k ){
353 s00 += A(i+0,k)*B(k,j+0) ;
354 s01 += A(i+0,k)*B(k,j+1) ;
355 s10 += A(i+1,k)*B(k,j+0) ;
356 s11 += A(i+1,k)*B(k,j+1) ;
357 }
358 C(i+0,j+0) = s00 ;
359 C(i+0,j+1) = s01 ;
360 C(i+1,j+0) = s10 ;
361 C(i+1,j+1) = s11 ;
362 }
363 }
364 }
365 }
366 }
367}
368
369///////////////////////////////////////////////////////
370
371void
372MM_blas( valueType const a[], indexType lda, // n x p
373 valueType const b[], indexType ldb, // p x m
374 valueType c[], indexType ldc,
375 indexType n,
376 indexType p,
377 indexType m ) {
378#ifdef ROW_MAJOR
379 alglin::gemm( "N", "N", n, m, p,
380 1.0, b, ldb,
381 a, lda,
382 0.0, c, ldc ) ;
383#else
384 alglin::gemm( "N", "N", n, m, p,
385 1.0, a, lda,
386 b, ldb,
387 0.0, c, ldc ) ;
388#endif
389}
390
391///////////////////////////////////////////////////////
392
393void
394MM_blocking( valueType const a[], indexType lda, // n x p
395 valueType const b[], indexType ldb, // p x m
396 valueType c[], indexType ldc,
397 indexType n,
398 indexType p,
399 indexType m,
400 indexType step ) {
401
402 for ( indexType i = 0 ; i < n ; ++i )
403 for ( indexType j = 0 ; j < m ; ++j )
404 C(i,j) = 0 ;
405
406 for( indexType jj = 0 ; jj < m ; jj += step )
407 for( indexType ii = 0 ; ii < n ; ii += step )
408 for( indexType kk = 0 ; kk < p ; kk += step )
409 for( indexType j = jj ; j < min(m,jj+step) ; ++j )
410 for( indexType i = ii ; i < min(n,ii+step) ; ++i ) {
411 valueType temp = C(i,j) ;
412 for( indexType k = kk ; k < min(p,kk+step) ; ++k )
413 temp += A(i,k)*B(k,j) ;
414 C(i,j) = temp ;
415 }
416}
417
418///////////////////////////////////////////////////////
419
420#include <iostream>
421
422static valueType * aux = NULL ;
423static indexType naux = 5000*5000*10 ;
424
425void
426MM_strassen( valueType const A[], indexType lda, // n x p
427 valueType const B[], indexType ldb, // p x m
428 valueType C[], indexType ldc,
429 indexType n,
430 indexType p,
431 indexType m ) {
432
433 if ( aux == NULL ) aux = new valueType [ naux ] ;
434
435#ifdef ROW_MAJOR
436 strassen::matrixMultiply( n, /* Rows of C and A */
437 m, /* Cols of B and C */
438 p, /* Cols of A and Rows of B */
439 B, /* Pointer to coefficients of A */
440 ldb, /* Leading dimension of A, Fortran style */
441 A, /* Pointer to coefficients of B */
442 lda, /* Leading dimension of B, Fortran style */
443 C, /* Pointer to coefficients of C */
444 ldc, /* Leading dimension of C, Fortran style */
445 aux, /* Pointer to auxillary storage or NULL */
446 naux ) ; /* Number of elements in aux or <= 0 */
447#else
448 strassen::matrixMultiply( n, /* Rows of C and A */
449 m, /* Cols of B and C */
450 p, /* Cols of A and Rows of B */
451 A, /* Pointer to coefficients of A */
452 lda, /* Leading dimension of A, Fortran style */
453 B, /* Pointer to coefficients of B */
454 ldb, /* Leading dimension of B, Fortran style */
455 C, /* Pointer to coefficients of C */
456 ldc, /* Leading dimension of C, Fortran style */
457 aux, /* Pointer to auxillary storage or NULL */
458 naux ) ; /* Number of elements in aux or <= 0 */
459#endif
460}
461
462// EOF mm.hh
Driver for testing
File main.cc
1/*--------------------------------------------------------------------------*\
2 | |
3 | Copyright (C) 2010 |
4 | |
5 | , __ , __ |
6 | /|/ \ /|/ \ |
7 | | __/ _ ,_ | __/ _ ,_ |
8 | | \|/ / | | | | \|/ / | | | |
9 | |(__/|__/ |_/ \_/|/|(__/|__/ |_/ \_/|/ |
10 | /| /| |
11 | \| \| |
12 | |
13 | Enrico Bertolazzi |
14 | Dipartimento di Ingegneria Meccanica e Strutturale |
15 | Universita` degli Studi di Trento |
16 | Via Mesiano 77, I-38050 Trento, Italy |
17 | email: enrico.bertolazzi@unitn.it |
18 | |
19 | version: 0.1 04-01-2010 |
20 | |
21 \*--------------------------------------------------------------------------*/
22
23#include "mm.hh"
24#include "timing.hh"
25//#include <stdlib.h>
26#include <iostream>
27#include <iomanip>
28#include <algorithm>
29#include <cmath>
30#include <limits>
31
32using namespace std ;
33
34#define A(I,J) ADDR(Amat,I,J)
35#define B(I,J) ADDR(Bmat,I,J)
36
37static
38void
39M_check( char const msg[],
40 valueType * Amat, indexType ldAmat,
41 valueType * Bmat, indexType ldBmat,
42 indexType n,
43 indexType m ) {
44 valueType res = 0 ;
45 for ( indexType i = 0 ; i < n ; ++i )
46 for ( indexType j = 0 ; j < m ; ++j )
47 res = std::max( res, abs( A(i,j)-B(i,j) ) ) ;
48
49 if ( res > numeric_limits<valueType>::epsilon()*1000 ) {
50 cout << "\n\n\nerror in matrix multiplication: " << msg << " = " << res << "\n\n\n" ;
51 exit(0) ;
52 }
53}
54
55int
56main() {
57
58 Timing tm ;
59
60 valueType *A, *B, *C, *Cref, elapsed ;
61
62 A = new valueType[ 5000 * 5000 ] ;
63 B = new valueType[ 5000 * 5000 ] ;
64 C = new valueType[ 5000 * 5000 ] ;
65 Cref = new valueType[ 5000 * 5000 ] ;
66
67 cout << setw(6) << "Size" ;
68 cout << setw(12) << "normal" ; // 1
69 cout << setw(12) << "temp" ; // 2
70 #if 0
71 cout << setw(12) << "unroll4" ; // 3
72 cout << setw(12) << "unroll8" ; // 4
73 cout << setw(12) << "unroll16" ; // 5
74 #endif
75 cout << setw(12) << "pointer" ; // 6
76 cout << setw(12) << "lam" ; // 7
77 cout << setw(12) << "tiling4" ; // 8
78 cout << setw(12) << "tiling8" ; // 9
79 cout << setw(12) << "tiling16" ; // 10
80 #if 0
81 cout << setw(12) << "Maeno4" ; // 11
82 cout << setw(12) << "Maeno8" ; // 12
83 cout << setw(12) << "Maeno16" ; // 13
84 #endif
85 cout << setw(12) << "Warner4" ; // 14
86 cout << setw(12) << "Warner8" ; // 15
87 cout << setw(12) << "Warner16" ; // 16
88 cout << setw(12) << "Blas" ; // 17
89 #if 0
90 cout << setw(12) << "blocking16" ; // 18
91 #endif
92 cout << setw(12) << "strassen" ; // 19
93 cout << '\n' ;
94
95 for ( indexType N = 100 ; N <= 600 ; N += 100 ) {
96
97 cout << setw(6) << N ;
98 for ( indexType i = 0 ; i < N ; ++i )
99 for ( indexType j = 0 ; j < N ; ++j )
100 A[i*N+j] = (arc4random()%10000) / 22123.0 ;
101 for ( indexType i = 0 ; i < N ; ++i )
102 for ( indexType j = 0 ; j < N ; ++j )
103 B[i*N+j] = (arc4random()%11111) / 32455.0 ;
104 //cout << "Generated matrix\n" ;
105
106 tm . start() ;
107 MM_normal( A, N, B, N, Cref, N, N, N, N ) ; // 1
108 elapsed = tm . milliseconds() ;
109 cout << setw(10) << elapsed << "ms" ;
110
111 tm . start() ;
112 MM_temp( A, N, B, N, C, N, N, N, N ) ; // 2
113 elapsed = tm . milliseconds() ;
114 cout << setw(10) << elapsed << "ms" ;
115 M_check( "temp", C, N, Cref, N, N, N ) ;
116
117 #if 0
118 tm . start() ;
119 MM_unroll4( A, N, B, N, C, N, N, N, N ) ; // 3
120 elapsed = tm . milliseconds() ;
121 cout << setw(10) << elapsed << "ms" ;
122 M_check( "unroll4", C, N, Cref, N, N, N ) ;
123
124 tm . start() ;
125 MM_unroll8( A, N, B, N, C, N, N, N, N ) ; // 4
126 elapsed = tm . milliseconds() ;
127 cout << setw(10) << elapsed << "ms" ;
128 M_check( "unroll8", C, N, Cref, N, N, N ) ;
129
130 tm . start() ;
131 MM_unroll16( A, N, B, N, C, N, N, N, N ) ; // 5
132 elapsed = tm . milliseconds() ;
133 cout << setw(10) << elapsed << "ms" ;
134 M_check( "unroll16", C, N, Cref, N, N, N ) ;
135 #endif
136
137 tm . start() ;
138 MM_pointer( A, N, B, N, C, N, N, N, N ) ; // 6
139 elapsed = tm . milliseconds() ;
140 cout << setw(10) << elapsed << "ms" ;
141 M_check( "pointer", C, N, Cref, N, N, N ) ;
142
143 tm . start() ;
144 MM_lam( A, N, B, N, C, N, N, N, N ) ; // 7
145 elapsed = tm . milliseconds() ;
146 cout << setw(10) << elapsed << "ms" ;
147 M_check( "lam", C, N, Cref, N, N, N ) ;
148
149 tm . start() ;
150 MM_tiling( A, N, B, N, C, N, N, N, N, 4 ) ; // 8
151 elapsed = tm . milliseconds() ;
152 cout << setw(10) << elapsed << "ms" ;
153 M_check( "tiling4", C, N, Cref, N, N, N ) ;
154
155 tm . start() ;
156 MM_tiling( A, N, B, N, C, N, N, N, N, 8 ) ;
157 elapsed = tm . milliseconds() ; // 9
158 cout << setw(10) << elapsed << "ms" ;
159 M_check( "tiling8", C, N, Cref, N, N, N ) ;
160
161 tm . start() ;
162 MM_tiling( A, N, B, N, C, N, N, N, N, 16 ) ; // 10
163 elapsed = tm . milliseconds() ;
164 cout << setw(10) << elapsed << "ms" ;
165 M_check( "tiling16", C, N, Cref, N, N, N ) ;
166
167 #if 0
168 tm . start() ;
169 MM_maeno( A, N, B, N, C, N, N, N, N, 4 ) ; // 11
170 elapsed = tm . milliseconds() ;
171 cout << setw(10) << elapsed << "ms" ;
172 M_check( "maeno4", C, N, Cref, N, N, N ) ;
173
174 tm . start() ;
175 MM_maeno( A, N, B, N, C, N, N, N, N, 8 ) ; // 12
176 elapsed = tm . milliseconds() ;
177 cout << setw(10) << elapsed << "ms" ;
178 M_check( "maeno8", C, N, Cref, N, N, N ) ;
179
180 tm . start() ;
181 MM_maeno( A, N, B, N, C, N, N, N, N, 16 ) ; // 13
182 elapsed = tm . milliseconds() ;
183 cout << setw(10) << elapsed << "ms" ;
184 M_check( "maeno16", C, N, Cref, N, N, N ) ;
185 #endif
186
187 tm . start() ;
188 MM_warner( A, N, B, N, C, N, N, N, N, 4 ) ; // 14
189 elapsed = tm . milliseconds() ;
190 cout << setw(10) << elapsed << "ms" ;
191 M_check( "warner4", C, N, Cref, N, N, N ) ;
192
193 tm . start() ;
194 MM_warner( A, N, B, N, C, N, N, N, N, 8 ) ; // 15
195 elapsed = tm . milliseconds() ;
196 cout << setw(10) << elapsed << "ms" ;
197 M_check( "warner8", C, N, Cref, N, N, N ) ;
198
199 tm . start() ;
200 MM_warner( A, N, B, N, C, N, N, N, N, 16 ) ; // 16
201 elapsed = tm . milliseconds() ;
202 cout << setw(10) << elapsed << "ms" ;
203 M_check( "warner16", C, N, Cref, N, N, N ) ;
204
205 tm . start() ;
206 MM_blas( A, N, B, N, C, N, N, N, N ) ; // 17
207 elapsed = tm . milliseconds() ;
208 cout << setw(10) << elapsed << "ms" ;
209 M_check( "blas", C, N, Cref, N, N, N ) ;
210
211 #if 0
212 tm . start() ;
213 MM_blocking( A, N, B, N, C, N, N, N, N, 16 ) ; // 18
214 elapsed = tm . milliseconds() ;
215 cout << setw(10) << elapsed << "ms" ;
216 M_check( "blocking16", C, N, Cref, N, N, N ) ;
217 #endif
218
219 tm . start() ;
220 MM_strassen( A, N, B, N, C, N, N, N, N ) ; // 19
221 elapsed = tm . milliseconds() ;
222 cout << setw(10) << elapsed << "ms" ;
223 M_check( "strassen", C, N, Cref, N, N, N ) ;
224
225 cout << '\n' ;
226
227 }
228
229 delete [] A ;
230 delete [] B ;
231 delete [] C ;
232 delete [] Cref ;
233
234 return 0 ;
235}
236
237// EOF main.cc
File main2.cc
1/*--------------------------------------------------------------------------*\
2 | |
3 | Copyright (C) 2010 |
4 | |
5 | , __ , __ |
6 | /|/ \ /|/ \ |
7 | | __/ _ ,_ | __/ _ ,_ |
8 | | \|/ / | | | | \|/ / | | | |
9 | |(__/|__/ |_/ \_/|/|(__/|__/ |_/ \_/|/ |
10 | /| /| |
11 | \| \| |
12 | |
13 | Enrico Bertolazzi |
14 | Dipartimento di Ingegneria Meccanica e Strutturale |
15 | Universita` degli Studi di Trento |
16 | Via Mesiano 77, I-38050 Trento, Italy |
17 | email: enrico.bertolazzi@unitn.it |
18 | |
19 | version: 0.1 04-01-2010 |
20 | |
21 \*--------------------------------------------------------------------------*/
22
23#include "mm.hh"
24#include "timing.hh"
25#include <stdlib.h>
26#include <iostream>
27#include <iomanip>
28#include <algorithm>
29#include <cmath>
30
31using namespace std ;
32
33#define A(I,J) ADDR(Amat,I,J)
34#define B(I,J) ADDR(Bmat,I,J)
35
36void
37M_check( char const msg[],
38 valueType * Amat, indexType ldAmat,
39 valueType * Bmat, indexType ldBmat,
40 indexType n,
41 indexType m ) {
42 valueType res = 0 ;
43 for ( indexType i = 0 ; i < n ; ++i )
44 for ( indexType j = 0 ; j < m ; ++j )
45 res = std::max( res, abs( A(i,j)-B(i,j) ) ) ;
46
47 if ( res > 1E-6 ) {
48 cout << "\n\n\nerror in matrix multiplication: " << msg << " = " << res << "\n\n\n" ;
49 exit(0) ;
50 }
51}
52
53int
54main() {
55
56 Timing tm ;
57
58 valueType *A, *B, *C, *Cref, elapsed ;
59
60 A = new valueType[ 6000 * 6000 ] ;
61 B = new valueType[ 6000 * 6000 ] ;
62 C = new valueType[ 6000 * 6000 ] ;
63 Cref = new valueType[ 6000 * 6000 ] ;
64
65 for ( indexType i = 0 ; i < 6000 * 6000 ; ++i ) A[i] = arc4random() * 1.02323E-8 ;
66 for ( indexType i = 0 ; i < 6000 * 6000 ; ++i ) B[i] = arc4random() * 2.323E-8 ;
67
68 cout << setw(6) << "Size" ;
69 cout << setw(12) << "Blas" ;
70 cout << setw(12) << "strassen" ;
71 cout << '\n' ;
72
73 for ( indexType N = 6000 ; N >= 1000 ; N *= 0.5 ) {
74
75 cout << setw(6) << N ;
76
77 indexType m = N-1 ;
78 indexType p = N-1 ;
79 indexType n = N-3 ;
80
81 tm . start() ;
82 MM_blas( A, N, B, N, Cref, N, m, p, n ) ;
83 //MM_strassen( A, N, B, N, Cref, N, m, p, n ) ;
84 elapsed = tm . milliseconds() ;
85 cout << setw(10) << elapsed << "ms" ;
86
87 tm . start() ;
88 MM_strassen( A, N, B, N, C, N, m, p, n ) ;
89 elapsed = tm . milliseconds() ;
90 cout << setw(10) << elapsed << "ms" ;
91 M_check( "strassen", C, N, Cref, N, m, n ) ;
92
93 cout << '\n' ;
94
95 }
96
97 delete [] A ;
98 delete [] B ;
99 delete [] C ;
100 delete [] Cref ;
101
102 return 0 ;
103}
104
105// EOF main.cc
Makefile for the compilation of the tests
File Makefile