MBDyn-1.7.3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups
pmthrdslv.c
Go to the documentation of this file.
1 /*
2  * MBDyn (C) is a multibody analysis code.
3  * http://www.mbdyn.org
4  *
5  * Copyright (C) 1996-2017
6  *
7  * Pierangelo Masarati <masarati@aero.polimi.it>
8  * Paolo Mantegazza <mantegazza@aero.polimi.it>
9  *
10  * Dipartimento di Ingegneria Aerospaziale - Politecnico di Milano
11  * via La Masa, 34 - 20156 Milano, Italy
12  * http://www.aero.polimi.it
13  *
14  * Changing this copyright notice is forbidden.
15  *
16  * This program is free software; you can redistribute it and/or modify
17  * it under the terms of the GNU General Public License as published by
18  * the Free Software Foundation (version 2 of the License).
19  *
20  *
21  * This program is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU General Public License for more details.
25  *
26  * You should have received a copy of the GNU General Public License
27  * along with this program; if not, write to the Free Software
28  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29  */
30 /*
31  * Copyright Marco Morandini
32  */
33 
34 #include "mbconfig.h" /* This goes first in every *.c,*.cc file */
35 
36 #ifdef USE_NAIVE_MULTITHREAD
37 
38 #define _GNU_SOURCE
39 
40 #include <stdlib.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <math.h>
44 #include <pthread.h>
45 #include <sys/poll.h>
46 
47 struct task_struct;
48 
49 /* FIXME: from <f2c.h> */
50 typedef int integer;
51 typedef double doublereal;
52 
53 #include "pmthrdslv.h"
54 
55 #define MINPIV (1.0e-8)
56 
57 int
58 pnaivfct(doublereal** a,
59  integer neq,
60  integer *nzr, integer** ri,
61  integer *nzc, integer** ci,
62  integer* nril, integer **ril,
63  char** nz,
64  integer *piv,
65  integer *todo,
66  doublereal minpiv,
67  AO_t *row_locks,
68  volatile AO_TS_t *col_locks,
69  int task,
70  int ncpu)
71 {
72  integer i, j, k, pvr = 0, pvc, nr, nc, r;
73  integer *pri, *pci;
74  char *pnzk;
75  doublereal den, mul, mulpiv, fari;
76  doublereal *par, *papvr;
77 
78  if (minpiv == 0.) {
79  minpiv = MINPIV;
80  }
81 
82  for (i = 0; i < neq; i++) {
83  pri = ri[i];
84  if (task == 0) {
85  doublereal fapvr;
86 
87  nr = nzr[i];
88  if (nr == 0) {
89  return NAIVE_ENULCOL + i;
90  }
91  nc = neq + 1;
92  mul = mulpiv = fapvr = 0.0;
93 /*
94  * for (k = 0; k < nr; k++) {
95  * r = pri[k];
96  * if (todo[r]) {
97  * fari = fabs(a[r][i]);
98  * if (fari > mul) {
99  * mulpiv = fari*minpiv;
100  * if (nzc[r] <= m || mulpiv > fapvr) {
101  * m = nzc[pvr = r];
102  * fapvr = fari;
103  * }
104  * mul = fari;
105  * } else if (nzc[r] < m && fari > mulpiv) {
106  * m = nzc[pvr = r];
107  * }
108  * }
109  * }
110  */
111  for (k = 0; k < nr; k++) {
112  r = pri[k];
113  if (todo[r]) {
114  fari = fabs(a[r][i]);
115  if (fari > mul) {
116  mul = fari;
117  }
118  }
119  }
120  mulpiv = mul*minpiv;
121  for (k = 0; k < nr; k++) {
122  r = pri[k];
123  if (todo[r]) {
124  fari = fabs(a[r][i]);
125  if (fari >= mulpiv && nzc[r] < nc) {
126  nc = nzc[pvr = r];
127  }
128  }
129  }
130  if (nc == neq + 1) { return NAIVE_ENOPIV + i; }
131  if (mulpiv == 0.) { return NAIVE_ENOPIV + i; }
132 
133  todo[pvr] = 0;
134  papvr = a[pvr];
135  den = papvr[i] = 1.0/papvr[i];
136  AO_nop_full();
137 
138  //set_wmb(piv[i], pvr);
139  piv[i] = pvr;
140 
141 
142  } else {
143  while ((pvr = AO_int_load_full((unsigned int *)&piv[i])) < 0);
144  papvr = a[pvr];
145  nr = nzr[i];
146  if (nr == 0) {
147  return NAIVE_ENULCOL + i;
148  }
149  den = papvr[i];
150  }
151 
152  nc = nzc[pvr];
153  pci = ci[pvr];
154 
155  for (k = 0; k < nr; k++) {
156  //for (k = task; k < nr; k += ncpu) {
157  r = pri[k];
158  if (todo[r] == 0 || r%ncpu != task) {
159  //if (todo[r] == 0) {
160  continue;
161  }
162  pnzk = nz[r];
163  par = a[r];
164  mul = par[i] = par[i]*den;
165  for (j = 0; j < nc; j++) {
166  pvc = pci[j];
167  if (pvc <= i) {
168  continue;
169  }
170  if (pnzk[pvc]) {
171  par[pvc] -= mul*papvr[pvc];
172  } else {
173  par[pvc] = -mul*papvr[pvc];
174  ci[r][nzc[r]++] = pvc;
175 #if 0
176  while (atomic_inc_and_test((atomic_t *)&col_locks[pvc]));
177 #endif
178  //while (mbdyn_cmpxchgl((int32_t *)&col_locks[pvc], 1, 0) != 0);
179  while (AO_test_and_set_full(&col_locks[pvc]) == AO_TS_CLEAR);
180  pnzk[pvc] = 1;
181  ril[k][nril[k]++] = pvc;
182  AO_CLEAR(&col_locks[pvc]);
183  //atomic_set((atomic_t *)&col_locks[pvc], 0);
184  }
185  }
186  }
187 
188  AO_fetch_and_add1_full(&row_locks[i]);
189  while (AO_load_full(&row_locks[i]) < ncpu);
190 
191  if (task == 0) {
192  for (k = 0; k < nr; k++) {
193  r = pri[k];
194  for (j = 0; j < nril[k]; j++) {
195  pvc = ril[k][j];
196  ri[pvc][nzr[pvc]++] = r;
197  }
198  nril[k] = 0;
199  }
200  }
201  }
202 
203  return 0;
204 }
205 
206 int
207 pnaivslv(doublereal** a,
208  integer neq,
209  integer *nzc,
210  integer** ci,
211  doublereal *rhs,
212  integer *piv,
213  doublereal *fwd,
214  doublereal *sol,
215  unsigned long *locks,
216  int task,
217  int ncpu)
218 {
219 
220  integer i, k, nc, r, c;
221  integer *pci;
222  doublereal s;
223  doublereal *par;
224 
225  if (!task) {
226  fwd[0] = rhs[piv[0]];
227  AO_nop_full();
228  AO_store_full(locks, 1);
229  }
230 
231  for (i = 1; i < neq; i++) {
232 #if 0
233  for (i = 1 + task; i < neq; i+=ncpu) {
234 #endif
235  if (i%ncpu != task) { continue; }
236  //used[task + 2]++;
237  nc = nzc[r = piv[i]];
238  s = rhs[r];
239  par = a[r];
240  pci = ci[r];
241  for (k = 0; k < nc; k++) {
242  c = pci[k];
243  if (c < i) {
244  while (!AO_load_full(&locks[c]));
245  s -= par[c]*fwd[c];
246  }
247  }
248  AO_nop_full();
249  fwd[i] = s;
250  AO_nop_full();
251 #if 0
252  fwd[i] = s;
253 #endif
254  AO_store_full(&locks[i], 1);
255  }
256 
257  AO_fetch_and_add1_full(&locks[neq]);
258  while (AO_load_full(&locks[neq]) < ncpu);
259 
260  neq--;
261  r = piv[neq];
262  if (!task) {
263  sol[neq] = fwd[neq]*a[r][neq];
264  AO_store_full(&locks[neq], 0);
265  }
266  for (i = neq - 1; i >= 0; i--) {
267 #if 0
268  for (i = neq - 1 - task; i >= 0; i-=ncpu) {
269 #endif
270  if (i%ncpu != task) { continue; }
271 #if 0
272  used[task + 4]++;
273 #endif
274  r = piv[i];
275  nc = nzc[r];
276  s = fwd[i];
277  par = a[r];
278  pci = ci[r];
279  for (k = 0; k < nc; k++) {
280  if ((c = pci[k]) > i) {
281  while (AO_load_full(&locks[c]));
282  s -= par[c]*sol[c];
283  }
284  }
285  AO_nop_full();
286  sol[i] = s*par[i];
287  AO_nop_full();
288 
289 #if 0
290  sol[i] = s*par[i];
291 #endif
292  AO_store_full(&locks[i], 0);
293  }
294 
295  return 0;
296 }
297 
298 void
299 naivesad(doublereal** a,
300  integer** ri,
301  integer *nzr,
302  integer** ci,
303  integer *nzc,
304  char ** nz,
305  integer neq,
306  integer *rowindx,
307  integer *colindx,
308  doublereal *elmat,
309  integer elr,
310  integer elc,
311  integer eldc)
312 {
313  integer i, k, r, c, er;
314  doublereal *par;
315 
316  er = 0;
317  for (i = 0; i < elr; i++) {
318  if ((r = rowindx[i]) >= 0) {
319  par = a[r];
320  for (k = 0; k < elc; k++) {
321  c = colindx[k];
322  if (!elmat[er + k] && c >= 0) {
323  if (nz[r][c]) {
324  par[c] += elmat[er + k];
325  } else {
326  par[c] = elmat[er + k];
327  nz[r][c] = 1;
328  ri[c][nzr[c]++] = r;
329  ci[r][nzc[r]++] = c;
330  }
331  }
332  }
333  }
334  er += eldc;
335  }
336 }
337 
338 void
339 naivepsad(doublereal** ga,
340  integer** gri,
341  integer *gnzr,
342  integer** gci,
343  integer *gnzc,
344  char ** nz,
345  doublereal** a,
346  integer** ci,
347  integer *nzc,
348  integer from,
349  integer to)
350 {
351  integer i, r, c, nc;
352  doublereal *pgar, *par;
353 
354  for (r = from; r <= to; r++) {
355  if ((nc = nzc[r])) {
356  pgar = ga[r];
357  par = a[r];
358  for (i = 0; i < nc; i++) {
359  if (nz[r][c = ci[r][i]]) {
360  pgar[c] += par[c];
361  } else {
362  pgar[c] = par[c];
363  nz[r][c] = 1;
364  gri[c][gnzr[c]++] = r;
365  gci[r][gnzc[r]++] = c;
366  }
367  }
368  }
369  }
370 }
371 
372 #endif /* USE_NAIVE_MULTITHREAD */
GradientExpression< UnaryExpr< FuncFabs, Expr > > fabs(const GradientExpression< Expr > &u)
Definition: gradient.h:2973
#define MINPIV
Definition: mthrdslv.c:73
#define NAIVE_ENOPIV
Definition: mthrdslv.h:74
static std::stack< cleanup * > c
Definition: cleanup.cc:59
#define NAIVE_ENULCOL
Definition: mthrdslv.h:73
static const doublereal a
Definition: hfluid_.h:289
double doublereal
Definition: colamd.c:52
long int integer
Definition: colamd.c:51