Lesson of 25 February 2010¶
Lesson file in a zip file lesson9.zip
The header of cyclic reduction for BABD linear system
File AmodioLU.hh
1/*--------------------------------------------------------------------------*\
2 | |
3 | This program is free software; you can redistribute it and/or modify |
4 | it under the terms of the GNU General Public License as published by |
5 | the Free Software Foundation; either version 2, or (at your option) |
6 | any later version. |
7 | |
8 | This program is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | GNU General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU General Public License |
14 | along with this program; if not, write to the Free Software |
15 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
16 | |
17 | Copyright (C) 2003 |
18 | |
19 | , __ , __ |
20 | /|/ \ /|/ \ |
21 | | __/ _ ,_ | __/ _ ,_ |
22 | | \|/ / | | | | \|/ / | | | |
23 | |(__/|__/ |_/ \_/|/|(__/|__/ |_/ \_/|/ |
24 | /| /| |
25 | \| \| |
26 | |
27 | Enrico Bertolazzi |
28 | Dipartimento di Ingegneria Meccanica e Strutturale |
29 | Universita` degli Studi di Trento |
30 | Via Mesiano 77, I-38050 Trento, Italy |
31 | email: enrico.bertolazzi@unitn.it |
32 | |
33\*--------------------------------------------------------------------------*/
34
35/**
36 *
37 * @mainpage BLOCK LU
38 * @date May 30, 2006
39 * @version 1.0
40 * @note first release May 30, 2006
41 *
42 * @author Enrico Bertolazzi
43 *
44 * @par Affiliation:
45 * Department of Mechanics and Structures Engineering <br>
46 * University of Trento <br>
47 * via Mesiano 77, I -- 38050 Trento, Italy <br>
48 * enrico.bertolazzi@ing.unitn.it
49 *
50 * @par Preface
51 *
52 * @par Abstract
53 *
54 */
55
56#ifndef AMODIO_LU_HH
57#define AMODIO_LU_HH
58
59#include "Malloc.hh"
60
61#ifndef CHAR_TYPE
62#define CHAR_TYPE char
63#endif
64
65#ifndef REAL_TYPE
66#define REAL_TYPE double
67#endif
68
69#ifndef INTEGER_TYPE
70#define INTEGER_TYPE int
71#endif
72
73namespace AmodioLUDefine {
74
75 typedef REAL_TYPE valueType ;
76 typedef valueType* valuePointer ;
77 typedef const valueType* valueConstPointer ;
78 typedef valueType& valueReference ;
79 typedef const valueType& valueConstReference ;
80
81 typedef INTEGER_TYPE indexType ;
82 typedef indexType* indexPointer ;
83 typedef const indexType* indexConstPointer ;
84 typedef indexType& indexReference ;
85 typedef const indexType& indexConstReference ;
86
87 /*
88 // _ _ _ _ _ _
89 // / \ _ __ ___ ___ __| (_) ___ | | | | | |
90 // / _ \ | '_ ` _ \ / _ \ / _` | |/ _ \| | | | | |
91 // / ___ \| | | | | | (_) | (_| | | (_) | |__| |_| |
92 // /_/ \_\_| |_| |_|\___/ \__,_|_|\___/|_____\___/
93 */
94 class AmodioLU {
95 private:
96
97 Malloc<valueType> baseValue ;
98 Malloc<indexType> baseIndex ;
99
100 AmodioLU(AmodioLU const &) ;
101 AmodioLU const & operator = (AmodioLU const &) ;
102
103 indexType nblock ; //!< total number of blocks
104 indexType n ; //!< size of square blocks
105 indexType m ; //!< number final rows (m>=n)
106 indexType nnz ; //!< total number of non zeros
107
108 /*
109 //
110 // Matrix structure
111 //
112 // n * nblock
113 // ________________^_________________
114 // / \
115 // n n n n q
116 // +-----+-----+-----+----.........-----+-----+-----+ \
117 // | Ad | Au | 0 | | 0 | 0 | n |
118 // +-----+-----+-----+ -----+-----+-----+ |
119 // | 0 | Ad | Au | 0 | 0 | 0 | n |
120 // +-----+-----+-----+-----+ -----+-----+-----+ |
121 // | 0 | 0 | Ad | Au | | 0 | 0 | n |
122 // +-----+-----+-----+-----+ -----+-----+-----+ |
123 // | : |
124 // : : > n * nblock
125 // : : |
126 // : : |
127 // : : |
128 // : +-----+-----+-----+ |
129 // : | Au | 0 | 0 | |
130 // : +-----+-----+-----+-----+ |
131 // : | 0 | Ad | Au | 0 | n |
132 // +-----+-----+---......---+-----+-----+=====+=====+ /
133 // | | | | | ! | !
134 // | H0 | 0 | | | 0 ! HN | Hq ! m
135 // | | | | | ! | !
136 // +-----+-----+---......---+-----+-----+=====+=====+
137 //
138 */
139 valueConstPointer AdAu ;
140 valueConstPointer H0 ;
141 valueConstPointer HN ;
142 valueConstPointer Hq ;
143
144 ///////////////////////////////////////////////////////
145
146 valuePointer SR_blk ;
147 valuePointer D_blk ;
148 valuePointer F_blk ;
149 indexPointer ipiv_work ;
150 indexPointer ipiv_blk ;
151 indexPointer perm_blk ;
152 valuePointer TMP_blk ;
153
154 public:
155
156 explicit
157 AmodioLU()
158 : baseValue("AmodioLU_value")
159 , baseIndex("AmodioLU_index")
160 { }
161
162 ~AmodioLU() {
163 baseValue . free() ;
164 baseIndex . free() ;
165 }
166
167 void load( indexType nblk,
168 indexType n,
169 indexType q,
170 valueConstPointer AdAu,
171 valueConstPointer H0,
172 valueConstPointer HN,
173 valueConstPointer Hq ) ;
174
175 void amxpy( valueType a, valuePointer x, valuePointer y ) const ;
176
177 indexType factorize() ;
178
179 void solve( valuePointer in_out ) ;
180
181 } ;
182}
183
184namespace AmodioLULoad {
185 using AmodioLUDefine::AmodioLU ;
186}
187
188#endif
The code of cyclic reduction
File AmodioLU.cc
1/*--------------------------------------------------------------------------*\
2 | |
3 | This program is free software; you can redistribute it and/or modify |
4 | it under the terms of the GNU General Public License as published by |
5 | the Free Software Foundation; either version 2, or (at your option) |
6 | any later version. |
7 | |
8 | This program is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | GNU General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU General Public License |
14 | along with this program; if not, write to the Free Software |
15 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
16 | |
17 | Copyright (C) 2003 |
18 | |
19 | , __ , __ |
20 | /|/ \ /|/ \ |
21 | | __/ _ ,_ | __/ _ ,_ |
22 | | \|/ / | | | | \|/ / | | | |
23 | |(__/|__/ |_/ \_/|/|(__/|__/ |_/ \_/|/ |
24 | /| /| |
25 | \| \| |
26 | |
27 | Enrico Bertolazzi |
28 | Dipartimento di Ingegneria Meccanica e Strutturale |
29 | Universita' degli Studi di Trento |
30 | Via Mesiano 77, I-38050 Trento, Italy |
31 | email: enrico.bertolazzi@unitn.it |
32 | |
33\*--------------------------------------------------------------------------*/
34
35#include "AmodioLU.hh"
36#include "alglin.hh"
37
38#include <iostream>
39using namespace std ;
40
41namespace AmodioLUDefine {
42
43 using namespace std ;
44
45 static const valueType ONE = 1 ;
46 static const valueType M_ONE = -1 ;
47
48 /*
49 // _ _
50 // | | ___ __ _ __| |
51 // | |/ _ \ / _` |/ _` |
52 // | | (_) | (_| | (_| |
53 // |_|\___/ \__,_|\__,_|
54 */
55 void
56 AmodioLU::load( indexType nblock,
57 indexType n,
58 indexType q,
59 valueConstPointer AdAu,
60 valueConstPointer H0,
61 valueConstPointer HN,
62 valueConstPointer Hq ) {
63
64 this -> AdAu = AdAu ;
65 this -> H0 = H0 ;
66 this -> HN = HN ;
67 this -> Hq = Hq ;
68 this -> nblock = nblock ;
69 this -> n = n ;
70 this -> m = n+q ;
71
72 indexType nnzF = nblock*n*n ;
73 indexType nnzSR = 2*nnzF ;
74 indexType nnzD = (n+m)*(n+m) ;
75 indexType nnzTMP = 2*n*n ;
76
77 indexType nv = nnzF + nnzSR + nnzD + nnzTMP ;
78 indexType ni = 2*n*nblock + 2*n+m ;
79
80 baseValue . allocate(nv) ;
81 baseIndex . allocate(ni) ;
82
83 SR_blk = baseValue( nnzSR ) ;
84 D_blk = baseValue( nnzD ) ;
85 F_blk = baseValue( nnzF ) ;
86 TMP_blk = baseValue( nnzTMP ) ;
87
88 perm_blk = baseIndex( 2*n*nblock ) ;
89 ipiv_blk = baseIndex( n+m ) ;
90 ipiv_work = baseIndex( n ) ;
91
92 alglin::copy( nnzSR, AdAu, 1, SR_blk, 1 ) ;
93
94 }
95
96 /* __ _ _ __ ___ __ ___ __ _ _
97 // / _` | '_ ` _ \\ \/ / '_ \| | | |
98 // | (_| | | | | | |> <| |_) | |_| |
99 // \__,_|_| |_| |_/_/\_\ .__/ \__, |
100 // |_| |___/
101 */
102 void
103 AmodioLU::amxpy( valueType alpha, valuePointer x, valuePointer y ) const {
104
105 valueType * yk = y ;
106 valueType const * xk = x ;
107
108 for ( indexType k = 0 ; k < nblock ; ++k, xk += n, yk += n ) {
109 valueConstPointer Ad = AdAu + 2*k*n*n ;
110 valueConstPointer Au = Ad + n*n ;
111 alglin::gemv( "N", n, n, alpha, Ad, n, xk, 1, ONE, yk, 1 ) ;
112 alglin::gemv( "N", n, n, alpha, Au, n, xk+n, 1, ONE, yk, 1 ) ;
113 }
114
115 alglin::gemv( "No Transpose", m, n, alpha, H0, m, x, 1, ONE, yk, 1 ) ;
116 alglin::gemv( "No Transpose", m, n, alpha, HN, m, xk, 1, ONE, yk, 1 ) ;
117 alglin::gemv( "No Transpose", m, m-n, alpha, Hq, m, xk+n, 1, ONE, yk, 1 ) ;
118 }
119
120 /*
121 // __ _ _
122 // / _| __ _ ___| |_ ___ _ __(_)_______
123 // | |_ / _` |/ __| __/ _ \| '__| |_ / _ \
124 // | _| (_| | (__| || (_) | | | |/ / __/
125 // |_| \__,_|\___|\__\___/|_| |_/___\___|
126 */
127 indexType
128 AmodioLU::factorize() {
129
130 /*\
131 | Based on the algorithm
132 | Pierluigi Amodio and Giuseppe Romanazzi
133 | Algorithm 859: BABDCR: a Fortran 90 package for the Solution of Bordered ABD Linear Systems
134 | ACM Transactions on Mathematical Software, 32, 4, 597—608 (2006)
135 \*/
136
137 // some constanst
138 indexType const nxn = n*n ;
139 indexType const nx2 = n*2 ;
140
141 // initialize indices
142 valuePointer F = F_blk ;
143 indexPointer perm = perm_blk ;
144
145 // 0 1 2 3 4 5 6 7 8 9 10 11 12
146 // 0 * 2 * 4 * 6 * 8 * 10 * 12
147 // 0 - * - 4 - * - 8 - * - 12
148 // 0 - - - * - - - 8 - - - 12
149 // 0 - - - - - - - * - - - 12
150
151 // !!!!!!!!!!!!!!!!!!!!!!!!!
152 // !!!! reduction phase !!!!
153 // !!!!!!!!!!!!!!!!!!!!!!!!!
154 for ( indexType jump = 1 ; jump < nblock ; jump *= 2 ) {
155
156 for ( indexType k = 1 ; k*jump < nblock ; k += 2 ) {
157
158 indexType jC = k*jump ;
159 indexType jL = jC-jump ;
160 indexType jR = min(jC + jump,nblock) ;
161 valuePointer S = SR_blk + 2*jL*nxn ;
162 valuePointer RS = SR_blk + (2*jC-1)*nxn ;
163 valuePointer R = SR_blk + (2*jR-1)*nxn ;
164
165 // reshape ( Rb Sb ) as ( Rb ) and save in ( , ) columns by columns
166 // ( Sb )
167 alglin::copy( 2*nxn, RS, 1, TMP_blk, 1 ) ;
168 alglin::gecopy( n, n, TMP_blk, n, RS, nx2 ) ;
169 alglin::gecopy( n, n, TMP_blk+nxn, n, RS+n, nx2 ) ;
170
171 /*\
172 | reduces a 2x3 block matrix / Sa Rb \ to a 1x2 block matrix ( Sa' Rc' )
173 | \ Sb Rc /
174 |
175 | by using the following algorithm:
176 |
177 | P * / Rb \ = / Rb' \ = / L \ (U) = / I \ (L*U) = / I \ (L*U)
178 | \ Sb / \ Sb' / \ Sb' U^(-1) / \ Sb' (L*U)^(-1) / \ G /
179 |
180 | where and G = Sb'*U^(-1)*L^(-1).
181 |
182 | / -G I \ * P * / Rb \ = / -G I \ / I \ (L*U) = / 0 \
183 | \ I 0 / \ Sb / \ I 0 / \ G / \ L*U /
184 |
185 | ------------------------------------------------------------
186 |
187 | / -G I \ * P * / Sa 0 \ = / -G I \ / Sa' Rc' \ = / Sa''' Rc''' \
188 | \ I 0 / \ 0 Rc / \ I 0 / \ Sa'' Rc'' / \ Sa' Rc' /
189 |
190 | where Sa''' = Sa'' - G*Sa', Rc''' = Rc'' - G*Rc' and thus
191 |
192 | / -G I \ * P * / Sa Rb \ = / Sa''' 0 Rc''' \
193 | \ I 0 / \ Sb Rc / \ Sa' L*U Rc' /
194 |
195 \*/
196 // factorize RS by means of the LU factorization
197 // P * / Rb \ = / L \ (U)
198 // \ Sb / \ Sb' U^(-1) /
199 indexType INFO = alglin::getrf( nx2, n, RS, nx2, ipiv_work ) ;
200 if ( INFO != 0 ) return INFO ;
201
202 // compute G = Sb' U^(-1) L^(-1)
203 // / L \ (U) = / I \ (L*U) = / I \ (L*U)
204 // \ Sb' U^(-1) / \ Sb' U^(-1) L^(-1) / \ G /
205 alglin::trsm( "Right", "Lower", "No Transpose", "Unit", n, n, ONE, RS, nx2, RS+n, nx2 ) ;
206
207 // determine the permutation vector
208 for ( indexType i = 0 ; i < nx2 ; ++i ) perm[i] = i ;
209 for ( indexType i = 0 ; i < n ; ++i ) {
210 indexType ip = ipiv_work[i]-1 ;
211 if ( ip != i ) std::swap( perm[i], perm[ip] ) ;
212 }
213
214 // P * / Sa 0 \ = / Sa' Rc' \
215 // \ 0 Rc / \ Sa'' Rc'' /
216 // S = Sa'' and R = Rc''
217 valuePointer Sa1 = TMP_blk ;
218 valuePointer Rc1 = TMP_blk + nxn ;
219 alglin::copy( nxn, S, 1, Sa1, 1 ) ; alglin::zero( nxn, S, 1 ) ;
220 alglin::copy( nxn, R, 1, Rc1, 1 ) ; alglin::zero( nxn, R, 1 ) ;
221 for ( indexType i = 0 ; i < n ; ++i ) {
222 indexType pi = perm[i+n] ;
223 if ( pi < n ) alglin::copy( n, Sa1+pi, n, S+i, n ) ;
224 else alglin::copy( n, Rc1+pi-n, n, R+i, n ) ;
225 }
226
227 // / -G I \ / Sa' Rc' \ = / Sa''' Rc''' \
228 // \ I 0 / \ Sa'' Rc'' / \ Sa' Rc' /
229 //
230 // where Sa''' = Sa'' - G*Sa', Rc''' = Rc'' - G*Rc'
231 // where the nonnull rows of Sa' and Rc' are saved in F
232 valuePointer Fi = F ;
233 valuePointer Gi = RS + n ; // i-th column of G
234 for ( indexType i = 0 ; i < n ; ++i, Fi += n, Gi += nx2 ) {
235 indexType pi = perm[i] ;
236 valuePointer R_or_S, Ra_or_Sc ;
237 if ( pi < n ) { // save nonnull row of Sa' in the column of F
238 Ra_or_Sc = Sa1 + pi ;
239 R_or_S = S ;
240 } else { // save nonnull row of Rc' in the column of F
241 Ra_or_Sc = Rc1 + pi - n ;
242 R_or_S = R ;
243 }
244 alglin::copy( n, Ra_or_Sc, n, Fi, 1 ) ;
245 alglin::ger( n, n, M_ONE, Gi, 1, Fi, 1, R_or_S, n ) ;
246 }
247
248 perm += nx2 ;
249 F += nxn ;
250 }
251 }
252
253 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
254 // !!!! factorization of the last 2 by 2 matrix !!!!
255 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
256 valuePointer S = SR_blk ;
257 valuePointer R = SR_blk + (2*nblock-1)*nxn ;
258
259 indexType nm = n+m ;
260 alglin::gecopy( n, n, S, n, D_blk, nm ) ;
261 alglin::gecopy( n, n, R, n, D_blk+n*nm, nm ) ;
262
263 alglin::gecopy( m, n, H0, m, D_blk+n, nm ) ;
264 alglin::gecopy( m, n, HN, m, D_blk+n+n*nm, nm ) ;
265
266 if ( m > n ) {
267 alglin::gezero( n, m-n, D_blk+nx2*nm, nm ) ;
268 alglin::gecopy( m, m-n, Hq, m, D_blk+nx2*nm+n, nm ) ;
269 }
270
271 return alglin::getrf( nm, nm, D_blk, nm, ipiv_blk ) ;
272 }
273
274 /* _
275 // ___ ___ | |_ _____
276 // / __|/ _ \| \ \ / / _ \
277 // \__ \ (_) | |\ V / __/
278 // |___/\___/|_| \_/ \___|
279 */
280 void
281 AmodioLU::solve( valuePointer y ) {
282 /*\
283 |
284 | Purpose
285 | =======
286 |
287 | BABDCR_SOLV solves a babd linear system whose coefficient matrix
288 | has been factorized by BABDCR_FACT. The algorithm consists of three
289 | phases: reduction (by using the subroutine REDUCE_RHS), solution of
290 | the 2 by 2 block system (by using the subroutine DGETRS) and
291 | back-substitution (by using the subroutine SOLVE_BLOCK).
292 |
293 | In input, BABDCR_SOLV requires the coefficient matrix factorized by
294 | the subroutine BABDCR_FACT (see details of this subroutine) and the
295 | right hand side which is stored in the block vector VECT_B in the
296 | following form
297 |
298 | VECT_B = [f(0), f(1), ...., f(NBLOKS)].
299 |
300 | The first block element f(0) corresponds to the first row of the
301 | coefficient matrix containing Ba and Bb.
302 |
303 | On exit, BABDCR_SOLV gives the solution of the babd linear system.
304 | The solution is stored in the block vector VECT_B.
305 |
306 \*/
307 // some constanst
308 indexType const nxn = n*n ;
309 indexType const nx2 = 2*n ;
310
311 // initialize indices
312 valuePointer F = F_blk ;
313 indexPointer perm = perm_blk ;
314
315 // !!!!!!!!!!!!!!!!!!!!!!!!!
316 // !!!! reduction phase !!!!
317 // !!!!!!!!!!!!!!!!!!!!!!!!!
318 indexType jump = 1 ;
319 for ( ; jump < nblock ; jump *=2 ) {
320
321 for ( indexType k = 1 ; k*jump < nblock ; k += 2 ) {
322
323 indexType jC = k * jump ;
324 indexType jL = jC - jump ;
325 valuePointer RS = SR_blk + (2*jC-1)*nxn ;
326 valuePointer Va = y + jL*n ;
327 valuePointer Vb = y + jC*n ;
328
329 /*\
330 | P * / Va \ = / Vp \
331 | \ Vb / \ Vq /
332 \*/
333 alglin::copy( n, Va, 1, TMP_blk, 1 ) ;
334 alglin::copy( n, Vb, 1, TMP_blk+n, 1 ) ;
335
336 for ( indexType i = 0 ; i < n ; ++i ) {
337 Va[i] = TMP_blk[perm[i+n]] ;
338 Vb[i] = TMP_blk[perm[i]] ;
339 }
340 /*\
341 | / -G I \ / Vp \ = / Vq-G*Vp \
342 | \ I 0 / \ Vq / \ Vp /
343 \*/
344 alglin::gemv( "No Transpose", n, n, M_ONE, RS + n, nx2, Vb, 1, ONE, Va, 1 ) ;
345
346 perm += nx2 ;
347 F += nxn ;
348 }
349 }
350
351 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
352 // !!!! 2 by 2 block linear system solution !!!!
353 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
354 indexType nm = n+m ;
355
356 // / S R 0 \ /x(0)\ = b(0)
357 // \ H0 HN Hq / \x(N)/ = b(N)
358 valuePointer ye = y + (nblock-1) * n ;
359
360 // Apply row interchanges to the right hand sides.
361 alglin::swap( n, y, 1, ye, 1 ) ;
362 alglin::swaps( 1, ye, nm, 0, nm-1, ipiv_blk, 1 ) ;
363 alglin::trsv( "Lower", "No transpose", "Unit", nm, D_blk, nm, ye, 1 ) ;
364 alglin::trsv( "Upper", "No transpose", "Non-Unit", nm, D_blk, nm, ye, 1 ) ;
365 alglin::swap( n, y, 1, ye, 1 ) ;
366
367 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
368 // !!!! back-substitution phase !!!!
369 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
370
371 for ( jump /= 2 ; jump > 0 ; jump /= 2 ) {
372
373 indexType k = (nblock-1)/jump ;
374 if ( (k & 0x01) == 0 ) --k ;
375
376 for ( ; k > 0 ; k -= 2 ) {
377 indexType jC = k*jump ;
378 indexType jL = jC-jump ;
379 indexType jR = min(jC + jump,nblock) ;
380 valuePointer Xm = y + jL*n ;
381 valuePointer Xs = y + jR*n ;
382 valuePointer V = y + jC*n ;
383 valuePointer RS = SR_blk + (2*jC-1)*nxn ;
384
385 perm -= nx2 ;
386 F -= nxn ;
387
388 /*\
389 | computes the solution Xn (of length n) of the linear system
390 |
391 | L*U Xn = Vp - Sp*Xm - Rp*Xs
392 |
393 | obtained from the subroutines reduceBlock and reduceRHS applied
394 | to the system (see the subroutine reduceBlock for further details)
395 |
396 | ( Sa Rb ) ( Xm ) ( Va ).
397 | ( Sb Rc ) ( Xn ) = ( Vb )
398 | ( Xs )
399 |
400 | ( I ) * P * ( Sa Rb ) ( Xm ) ( Sp L*U Rp ) ( Xm ) ( Vp )
401 | ( -G I ) ( Sb Rc ) ( Xn ) = ( Sa' 0 Rc' ) ( Xn ) = ( V' )
402 | ( Xs ) ( Xs )
403 \*/
404
405 // compute V = Vp - Sp*Xm - Rp*Xs
406 for ( indexType i = 0 ; i < n ; ++i )
407 V[i] -= alglin::dot( n, F + i*n, 1, perm[i] < n ? Xm : Xs, 1 ) ;
408
409 // solve the system L*U Xn = V
410 alglin::trsv( "Lower", "No transpose", "Unit", n, RS, nx2, V, 1 ) ;
411 alglin::trsv( "Upper", "No transpose", "Non Unit", n, RS, nx2, V, 1 ) ;
412 }
413 }
414 }
415
416}
The header of LU decomposition for BABD linear system
File BlockLU.hh
1/*--------------------------------------------------------------------------*\
2 | |
3 | This program is free software; you can redistribute it and/or modify |
4 | it under the terms of the GNU General Public License as published by |
5 | the Free Software Foundation; either version 2, or (at your option) |
6 | any later version. |
7 | |
8 | This program is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | GNU General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU General Public License |
14 | along with this program; if not, write to the Free Software |
15 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
16 | |
17 | Copyright (C) 2003 |
18 | |
19 | , __ , __ |
20 | /|/ \ /|/ \ |
21 | | __/ _ ,_ | __/ _ ,_ |
22 | | \|/ / | | | | \|/ / | | | |
23 | |(__/|__/ |_/ \_/|/|(__/|__/ |_/ \_/|/ |
24 | /| /| |
25 | \| \| |
26 | |
27 | Enrico Bertolazzi |
28 | Dipartimento di Ingegneria Meccanica e Strutturale |
29 | Universita` degli Studi di Trento |
30 | Via Mesiano 77, I-38050 Trento, Italy |
31 | email: enrico.bertolazzi@unitn.it |
32 | |
33\*--------------------------------------------------------------------------*/
34
35/**
36 *
37 * @mainpage BLOCK LU
38 * @date May 30, 2006
39 * @version 1.0
40 * @note first release May 30, 2006
41 *
42 * @author Enrico Bertolazzi
43 *
44 * @par Affiliation:
45 * Department of Mechanics and Structures Engineering <br>
46 * University of Trento <br>
47 * via Mesiano 77, I -- 38050 Trento, Italy <br>
48 * enrico.bertolazzi@ing.unitn.it
49 *
50 * @par Preface
51 *
52 * @par Abstract
53 *
54 */
55
56#ifndef BLOCK_LU_HH
57#define BLOCK_LU_HH
58
59#include "Malloc.hh"
60
61#ifndef CHAR_TYPE
62#define CHAR_TYPE char
63#endif
64
65#ifndef REAL_TYPE
66#define REAL_TYPE double
67#endif
68
69#ifndef INTEGER_TYPE
70#define INTEGER_TYPE int
71#endif
72
73namespace BlockLUDefine {
74
75 typedef CHAR_TYPE charType ;
76 typedef charType* charPointer ;
77 typedef const charType* charConstPointer ;
78 typedef charType& charReference ;
79 typedef const charType& charConstReference ;
80
81 typedef REAL_TYPE valueType ;
82 typedef valueType* valuePointer ;
83 typedef const valueType* valueConstPointer ;
84 typedef valueType& valueReference ;
85 typedef const valueType& valueConstReference ;
86
87 typedef INTEGER_TYPE indexType ;
88 typedef indexType* indexPointer ;
89 typedef const indexType* indexConstPointer ;
90 typedef indexType& indexReference ;
91 typedef const indexType& indexConstReference ;
92
93 /*
94 // ____ _ _ _ _ _
95 // | __ )| | ___ ___| | _| | | | | |
96 // | _ \| |/ _ \ / __| |/ / | | | | |
97 // | |_) | | (_) | (__| <| |__| |_| |
98 // |____/|_|\___/ \___|_|\_\_____\___/
99 //
100 */
101
102 class BlockLU {
103
104 Malloc<valueType> baseValue ;
105 Malloc<indexType> baseIndex ;
106
107 BlockLU(BlockLU const &) ;
108 BlockLU const & operator = (BlockLU const &) ;
109
110 indexType nblock ; //!< total number of blocks
111 indexType n ; //!< size of square blocks
112 indexType m ; //!< number final rows (m>=n)
113 indexType N ; //!< n * (nblock+1) + q
114 indexType nnz ; //!< total number of non zeros
115
116 /*
117 //
118 // Matrix structure
119 //
120 // n * nblock
121 // ________________^_________________
122 // / \
123 // n n n n q
124 // +-----+-----+-----+----.........-----+-----+-----+ \
125 // | Ad | Au | 0 | | 0 | 0 | n |
126 // +-----+-----+-----+ -----+-----+-----+ |
127 // | 0 | Ad | Au | 0 | 0 | 0 | n |
128 // +-----+-----+-----+-----+ -----+-----+-----+ |
129 // | 0 | 0 | Ad | Au | | 0 | 0 | n |
130 // +-----+-----+-----+-----+ -----+-----+-----+ |
131 // | : |
132 // : : > n * nblock
133 // : : |
134 // : : |
135 // : : |
136 // : +-----+-----+-----+ |
137 // : | Au | 0 | 0 | |
138 // : +-----+-----+-----+-----+ |
139 // : | 0 | Ad | Au | 0 | n |
140 // +-----+-----+---......---+-----+-----+=====+=====+ /
141 // | | | | | ! | !
142 // | H0 | 0 | | | 0 ! HN | Hq ! m
143 // | | | | | ! | !
144 // +-----+-----+---......---+-----+-----+=====+=====+
145 //
146 */
147 valueConstPointer AdAu ;
148 valueConstPointer H0 ;
149 valueConstPointer HN ;
150 valueConstPointer Hq ;
151
152 /*
153 //
154 // Working block AdH_blk [ size = nblock * ( n * (n+m) ) ]
155 //
156 // n * nblock
157 // ________________^_________________
158 // / \
159 // n n n
160 // +-----+-----+-----+----........+-----+
161 // | Ad | Ad | Ad | Ad | n
162 // +-----+-----+-----+ +-----+
163 // | | | | | !
164 // | H0 | 0 | | | 0 ! m
165 // | | | | | !
166 // +-----+-----+---......---+-----+-----+
167 //
168 */
169 valuePointer AdH_blk ;
170
171 /*
172 //
173 // Working block Au_blk [ size = nblock * (n*n) ]
174 //
175 // n * nblock + q
176 // ________________^________________________
177 // / \
178 // n n n n q
179 // +-----+-----+-----+----.........-----+------+
180 // | Au | Au | Au | Au | fill | n
181 // +-----+-----+-----+----.........-----+------+
182 //
183 */
184 valuePointer Au_blk ;
185
186 /*
187 //
188 // Working block DD_blk [ size = m * m ]
189 //
190 // n q (n+q=m)
191 // +=====+=====+
192 // ! | !
193 // ! HN | Hq ! m
194 // ! | !
195 // +=====+=====+
196 //
197 */
198 valuePointer DD_blk ;
199
200 /*
201 //
202 // Working block FF_blk [ size = nblock * (n*m) ]
203 //
204 // n q (n+q=m)
205 // +-----+-----+ \
206 // |fill |fill | n |
207 // +-----+-----+ |
208 // |fill |fill | n |
209 // +-----+-----+ |
210 // |fill |fill | n |
211 // +-----+-----+ :
212 // | | : n * (nblock-1)
213 // : : :
214 // : : :
215 // : : :
216 // : : :
217 // : : :
218 // |fill |fill | n |
219 // +-----+-----+ /
220 //
221 */
222 valuePointer FF_blk ;
223
224 // pivot vector
225 indexPointer ipiv_blk ;
226
227 void allocate( indexType nblk, indexType n, indexType q ) ;
228
229 public:
230
231 explicit BlockLU();
232
233 ~BlockLU() ;
234
235 void load( indexType nblk,
236 indexType n,
237 indexType q,
238 valueConstPointer AdAu,
239 valueConstPointer H0,
240 valueConstPointer HN,
241 valueConstPointer Hq ) ;
242
243 void amxpy( valueType a, valuePointer x, valuePointer y ) const ;
244
245 indexType factorize() ;
246
247 void solve( valuePointer in_out ) ;
248
249 } ;
250
251}
252
253namespace BlockLULoad {
254 using BlockLUDefine::BlockLU ;
255}
256
257#endif
The code of LU decomposition
File BlockLU.cc
1/*--------------------------------------------------------------------------*\
2 | |
3 | This program is free software; you can redistribute it and/or modify |
4 | it under the terms of the GNU General Public License as published by |
5 | the Free Software Foundation; either version 2, or (at your option) |
6 | any later version. |
7 | |
8 | This program is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | GNU General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU General Public License |
14 | along with this program; if not, write to the Free Software |
15 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
16 | |
17 | Copyright (C) 2003-2010 |
18 | |
19 | , __ , __ |
20 | /|/ \ /|/ \ |
21 | | __/ _ ,_ | __/ _ ,_ |
22 | | \|/ / | | | | \|/ / | | | |
23 | |(__/|__/ |_/ \_/|/|(__/|__/ |_/ \_/|/ |
24 | /| /| |
25 | \| \| |
26 | |
27 | Enrico Bertolazzi |
28 | Dipartimento di Ingegneria Meccanica e Strutturale |
29 | Universita` degli Studi di Trento |
30 | Via Mesiano 77, I-38050 Trento, Italy |
31 | email: enrico.bertolazzi@unitn.it |
32 | |
33\*--------------------------------------------------------------------------*/
34
35/**
36 *
37 * @mainpage BLOCK LU
38 * @date February 12, 2010
39 * @version 1.0
40 * @note first release May 30, 2006
41 *
42 * @author Enrico Bertolazzi
43 *
44 * @par Affiliation:
45 * Department of Mechanics and Structures Engineering <br>
46 * University of Trento <br>
47 * via Mesiano 77, I -- 38050 Trento, Italy <br>
48 * enrico.bertolazzi@ing.unitn.it
49 *
50 * @par Preface
51 *
52 * @par Abstract
53 *
54 */
55
56#include "BlockLU.hh"
57#include "Alglin.hh"
58#include <limits>
59
60namespace BlockLUDefine {
61
62 using namespace std ;
63
64 static const valueType ONE = 1 ;
65 static const valueType M_ONE = -1 ;
66
67 /* ____ _ _ _ _ _
68 // | __ )| | ___ ___| | _| | | | | |
69 // | _ \| |/ _ \ / __| |/ / | | | | |
70 // | |_) | | (_) | (__| <| |__| |_| |
71 // |____/|_|\___/ \___|_|\_\_____\___/
72 */
73 BlockLU::BlockLU()
74 : baseValue("BlockLU_value")
75 , baseIndex("BlockLU_index")
76 { }
77
78 BlockLU::~BlockLU() {
79 baseValue . free() ;
80 baseIndex . free() ;
81 }
82
83 /* _ _ _
84 // __ _| | | ___ ___ __ _| |_ ___
85 // / _` | | |/ _ \ / __/ _` | __/ _ \
86 // | (_| | | | (_) | (_| (_| | || __/
87 // \__,_|_|_|\___/ \___\__,_|\__\___|
88 */
89 void
90 BlockLU::allocate( indexType nblock, indexType n, indexType q ) {
91
92 this -> nblock = nblock ;
93 this -> n = n ;
94 this -> m = n+q ;
95 this -> N = nblock*n+m ;
96
97 indexType nnzAdH = nblock*(n*(n+m)) ; // blocchi AdH
98 indexType nnzAu = nblock*(n*n)+n*q ; // blocchi Au
99 indexType nnzFF = (nblock-1)*(n*m) ; // blocchi FF
100 indexType nnzDD = m*m ; ; // blocco DD
101
102 nnz = nnzAdH + nnzAu + nnzFF + nnzDD ;
103
104 baseValue . allocate( nnz ) ;
105 baseIndex . allocate( N ) ;
106
107 AdH_blk = baseValue( nnzAdH ) ;
108 Au_blk = baseValue( nnzAu ) ;
109 FF_blk = baseValue( nnzFF ) ;
110 DD_blk = baseValue( nnzDD ) ;
111 ipiv_blk = baseIndex( N ) ;
112 }
113
114 /* _ _
115 // | | ___ __ _ __| |
116 // | |/ _ \ / _` |/ _` |
117 // | | (_) | (_| | (_| |
118 // |_|\___/ \__,_|\__,_|
119 */
120 void
121 BlockLU::load( indexType nblk,
122 indexType n,
123 indexType q,
124 valueConstPointer AdAu,
125 valueConstPointer H0,
126 valueConstPointer HN,
127 valueConstPointer Hq ) {
128
129 allocate( nblk, n, q ) ;
130
131 this -> AdAu = AdAu ;
132 this -> H0 = H0 ;
133 this -> HN = HN ;
134 this -> Hq = Hq ;
135
136 // Initialize structures
137 alglin::zero( nnz, AdH_blk, 1 ) ;
138
139 // fill matrix
140 indexType nm = n+m ;
141 for ( indexType k = 0 ; k < nblock ; ++k ) {
142 valueConstPointer Ad = AdAu + 2*k*n*n ;
143 valueConstPointer Au = Ad + n*n ;
144 alglin::gecopy( n, n, Ad, n, AdH_blk + k*nm*n, nm ) ;
145 alglin::gecopy( n, n, Au, n, Au_blk + k*n*n, n ) ;
146 }
147
148 alglin::gecopy( m, n, H0, m, AdH_blk + n, nm ) ;
149 alglin::gecopy( m, n, HN, m, DD_blk, m ) ;
150 alglin::gecopy( m, q, Hq, m, DD_blk + m*n, m ) ;
151 }
152
153 /* __ _ _ __ ___ __ ___ __ _ _
154 // / _` | '_ ` _ \\ \/ / '_ \| | | |
155 // | (_| | | | | | |> <| |_) | |_| |
156 // \__,_|_| |_| |_/_/\_\ .__/ \__, |
157 // |_| |___/
158 */
159 void
160 BlockLU::amxpy( valueType alpha, valuePointer x, valuePointer y ) const {
161
162 valueType * yk = y ;
163 valueType const * xk = x ;
164
165 for ( indexType k = 0 ; k < nblock ; ++k, xk += n, yk += n ) {
166 valueConstPointer Ad = AdAu + 2*k*n*n ;
167 valueConstPointer Au = Ad + n*n ;
168 alglin::gemv( "N", n, n, alpha, Ad, n, xk, 1, ONE, yk, 1 ) ;
169 alglin::gemv( "N", n, n, alpha, Au, n, xk+n, 1, ONE, yk, 1 ) ;
170 }
171
172 alglin::gemv( "No Transpose", m, n, alpha, H0, m, x, 1, ONE, yk, 1 ) ;
173 alglin::gemv( "No Transpose", m, n, alpha, HN, m, xk, 1, ONE, yk, 1 ) ;
174 alglin::gemv( "No Transpose", m, m-n, alpha, Hq, m, xk+n, 1, ONE, yk, 1 ) ;
175 }
176
177 /* __ _ _
178 // / _| __ _ ___| |_ ___ _ __(_)_______
179 // | |_ / _` |/ __| __/ _ \| '__| |_ / _ \
180 // | _| (_| | (__| || (_) | | | |/ / __/
181 // |_| \__,_|\___|\__\___/|_| |_/___\___|
182 */
183 indexType
184 BlockLU::factorize() {
185
186 indexType nm = n+m ;
187 indexType rowFF = (nblock-1)*n ;
188 indexType INFO ;
189
190 indexPointer ipivk = ipiv_blk ;
191 valuePointer AdH = AdH_blk ;
192 valuePointer Au = Au_blk ;
193 valuePointer FF = FF_blk ;
194
195 for ( indexType k = 0 ; k < nblock-1 ; ++k, ipivk += n, AdH += nm*n, Au += n*n, FF += n ) {
196
197 INFO = alglin::getrf( nm, n, AdH, nm, ipivk ) ; // LU factorization
198 if ( INFO != 0 ) return INFO + k*n ;
199
200 valuePointer H = AdH + n ;
201 valuePointer CC = AdH + nm*n + n ;
202
203 for ( indexType i = 0 ; i < n ; ++i ) {
204 indexType ip = ipivk[i]-1 ;
205 if ( ip != i ) { // detected row exchange
206 if ( ip < n ) { // exchange row
207 alglin::swap( n, Au + i, n, Au + ip, n ) ;
208 alglin::swap( m, FF + i, rowFF, FF + ip, rowFF ) ; // last column block
209 } else {
210 alglin::swap( n, Au + i, n, CC + (ip-n), nm ) ;
211 alglin::swap( m, FF + i, rowFF, DD_blk + (ip-n), m ) ; // last column block
212 }
213 }
214 }
215
216 //
217 // +---------+---------+ ........ +--------+
218 // | \ U | | | |
219 // | \ |L^(-1)Au | |L^(-1)FF|
220 // | L \ | | | |
221 // |=========|=========| +========+
222 // | | | | |
223 // | H | CC* | | DD* |
224 // | | | | |
225 // +---------+---------+ ........ +--------+
226 //
227 // CC* = CC - H (LU)^(-1) Au
228 // DD* = DD - H (LU)^(-1) FF
229 //
230 alglin::trsm( "Left", "Lower", "No Transpose", "Unit", n, n, ONE, AdH, nm, Au, n ) ;
231 alglin::trsm( "Left", "Lower", "No Transpose", "Unit", n, m, ONE, AdH, nm, FF, rowFF ) ;
232
233 alglin::gemm( "No Transpose", "No Transpose", m, n, n, M_ONE, H, nm, Au, n, ONE, CC, nm ) ;
234 alglin::gemm( "No Transpose", "No Transpose", m, m, n, M_ONE, H, nm, FF, rowFF, ONE, DD_blk, m ) ;
235 }
236
237 // factorize last two block
238 INFO = alglin::getrf( nm, n, AdH, nm, ipivk ) ; // LU factorization
239 if ( INFO != 0 ) return false ;
240
241 for ( indexType i = 0 ; i < n ; ++i ) {
242 indexType ip = ipivk[i]-1 ;
243 if ( ip != i ) { // detected row exchange
244 if ( ip < n ) { // exchange row
245 alglin::swap( m, Au + i, n, Au + ip, n ) ; // last column block
246 } else {
247 alglin::swap( m, Au + i, n, DD_blk + (ip-n), m ) ; // last column block
248 }
249 }
250 }
251
252 //
253 // +---------+---------+
254 // | \ U | |
255 // | \ |L^(-1)Au |
256 // | L \ | |
257 // |=========|=========|
258 // | | |
259 // | H | CC* |
260 // | | |
261 // +---------+---------+
262 //
263 // CC* = CC - H (LU)^(-1) Au
264 //
265 valuePointer H = AdH + n ;
266 alglin::trsm( "Left", "Lower", "No Transpose", "Unit", n, m, ONE, AdH, nm, Au, n ) ;
267 alglin::gemm( "No Transpose", "No Transpose", m, m, n, M_ONE, H, nm, Au, n, ONE, DD_blk, m ) ;
268
269 // factorize last block
270 ipivk += n ;
271 INFO = alglin::getrf( m, m, DD_blk, m, ipivk ) ; // LU factorization
272 return INFO == 0 ;
273 }
274
275 /* _
276 // ___ ___ | |_ _____
277 // / __|/ _ \| \ \ / / _ \
278 // \__ \ (_) | |\ V / __/
279 // |___/\___/|_| \_/ \___|
280 */
281 void
282 BlockLU::solve( valuePointer y ) {
283
284 // solve L
285 indexType nm = n+m ;
286 indexType rowFF = (nblock-1) * n ;
287 valuePointer ye = y + nblock * n ;
288
289 for ( indexType k = 0 ; k < nblock ; ++k ) {
290 indexConstPointer ipivk = ipiv_blk + k * n ;
291 valueConstPointer AdH = AdH_blk + k * (nm*n) ;
292 valuePointer yk = y + k * n ;
293
294 // apply permutation
295 for ( indexType i = 0 ; i < n ; ++i ) {
296 indexType ip = ipivk[i]-1 ;
297 if ( ip != i ) { // detected row exchange
298 if ( ip < n ) std::swap( yk[i], yk[ip] ) ;
299 else std::swap( yk[i], ye[ip-n] ) ;
300 }
301 }
302 alglin::trsv( "Lower", "No Transpose", "Unit", n, AdH, nm, yk, 1 ) ;
303 alglin::gemv( "No Transpose", m, n, M_ONE, AdH + n, nm, yk, 1, ONE, ye, 1 ) ;
304 }
305
306 indexConstPointer ipive = ipiv_blk + nblock * n ;
307 indexType ok = alglin::getrs( "No Transpose", m, 1, DD_blk, m, ipive, ye, m ) ;
308
309 if ( rowFF > 0 ) alglin::gemv( "No Transpose", rowFF, m, M_ONE, FF_blk, rowFF, ye, 1, ONE, y, 1 ) ;
310 if ( m > n ) alglin::gemv( "No Transpose", n, m-n, M_ONE, Au_blk + nblock*n*n, n, ye+n, 1, ONE, ye-n, 1 ) ;
311
312 indexType k = nblock ;
313 do {
314 --k ;
315 valueConstPointer AdH = AdH_blk + k*nm*n ;
316 valueConstPointer Au = Au_blk + k*n*n ;
317 valuePointer yk = y + k*n ;
318
319 alglin::gemv( "No Transpose", n, n, M_ONE, Au, n, yk + n, 1, ONE, yk, 1 ) ;
320 alglin::trsv( "Upper", "No Transpose", "Non Unit", n, AdH, nm, yk, 1 ) ;
321
322 } while ( k > 0 ) ;
323 }
324}
A driver code
File main.cc
1#include "BlockLU.hh"
2#include "AmodioLU.hh"
3//#include "ArcecoLU.hh"
4#include "Alglin.hh"
5#include "timing.hh"
6
7#include <cmath>
8#include <iostream>
9#include <iomanip>
10
11using namespace std ;
12
13/*
14// + . +
15// | n*n n*n . |
16// | n*n n*n . |
17// | n*n n*n . |
18// | . |
19// | . |
20// | . . . . . |
21// | n*n . n*n n*q | q = m-n
22// | m*n . m*n 0 |
23// + +
24//
25*/
26
27using namespace BlockLULoad ;
28using namespace AmodioLULoad ;
29//using namespace ArcecoLULoad ;
30
31typedef BlockLUDefine::valueType valueType ;
32typedef BlockLUDefine::indexType indexType ;
33
34int
35main() {
36 Timing tm ;
37 BlockLU bm ;
38 AmodioLU am ;
39 //ArcecoLU bm ;
40 bool ok ;
41 valueType elapsed ;
42
43 indexType nblk = 1700 ;
44 indexType n = 16 ;
45 indexType q = 9 ;
46 indexType m = n+q ;
47 indexType nxn = n*n ;
48 indexType N = (nblk+1)*n+q ;
49
50 valueType * AdAu = new valueType[2*nblk*nxn] ;
51 valueType * Hi = new valueType[m*n] ;
52 valueType * Hf = new valueType[m*n] ;
53 valueType * Hmu = new valueType[m*q] ;
54 valueType * xb = new valueType[N] ;
55 valueType * xb1 = new valueType[N] ;
56 valueType * x = new valueType[N] ;
57
58 cout << setw(12) << "factor" ; // 1
59 cout << setw(12) << "solve" ; // 1
60 cout << setw(12) << "err" ; // 1
61 cout << setw(12) << "factor (Amo)" ; // 1
62 cout << setw(12) << "solve (Amo)" ; // 1
63 cout << setw(12) << "err" ; // 1
64 cout << '\n' ;
65
66 for ( indexType kkk = 0 ; kkk < 100 ; ++kkk ) {
67
68 // riempie la matrice
69 for ( indexType i = 0 ; i < n ; ++i ) {
70 for ( indexType j = 0 ; j < n ; ++j ) {
71 indexType iaddr = i+j*n ;
72 for ( indexType k = 0 ; k < nblk ; ++k ) {
73 valueType * Ad = AdAu + 2*k*nxn ;
74 valueType * Au = Ad + nxn ;
75 Ad[iaddr] = (rand()%1000) ;
76 Au[iaddr] = (rand()%2000) ;
77 }
78 }
79 }
80 // riempie la matrice
81 for ( indexType i = 0 ; i < m ; ++i ) {
82 for ( indexType j = 0 ; j < n ; ++j ) {
83 indexType iaddr = i+j*m ;
84 Hi[iaddr] = (rand()%1000) ;
85 Hf[iaddr] = (rand()%1000) ;
86 }
87 }
88 // riempie la matrice
89 for ( indexType i = 0 ; i < m ; ++i ) {
90 for ( indexType j = 0 ; j < q ; ++j ) {
91 indexType iaddr = i+j*m ;
92 Hmu[iaddr] = (rand()%1000) ;
93 }
94 }
95
96 //
97 for ( indexType i = 0 ; i < N ; ++i ) {
98 x[i] = i ; // (arc4random()%234) ;
99 xb[i] = 0 ;
100 xb1[i] = 0 ;
101 }
102
103 //////////////////////////////
104 bm . load( nblk, n, q, AdAu, Hi, Hf, Hmu ) ;
105 am . load( nblk, n, q, AdAu, Hi, Hf, Hmu ) ;
106 bm . amxpy( 1, x, xb ) ;
107 bm . amxpy( 1, x, xb1 ) ;
108
109 tm . start() ;
110 ok = bm . factorize() ; if ( !ok ) cout << "factorize error\n" ;
111 elapsed = tm . milliseconds() ;
112 cout << setw(10) << elapsed << "ms" ;
113
114 tm . start() ;
115 bm . solve( xb ) ;
116 elapsed = tm . milliseconds() ;
117 cout << setw(10) << elapsed << "ms" ;
118
119 alglin::axpy( N, -1.0, x, 1, xb, 1 ) ;
120 cout << " " << setw(10) << alglin::nrm2( N, xb, 1 ) ;
121
122 tm . start() ;
123 indexType ierr = am . factorize() ;
124 if ( ierr ) cout << "factorize error ierr = " << ierr << '\n' ;
125 elapsed = tm . milliseconds() ;
126 cout << setw(10) << elapsed << "ms" ;
127
128 tm . start() ;
129 am . solve( xb1 ) ;
130 elapsed = tm . milliseconds() ;
131 cout << setw(10) << elapsed << "ms" ;
132 alglin::axpy( N, -1.0, x, 1, xb1, 1 ) ;
133 cout << " " << setw(10) << alglin::nrm2( N, xb1, 1 ) ;
134 cout << '\n' ;
135
136 }
137
138}