00001
00002
00003
00004
00005
00006 #ifndef CoinAbcHelperFunctions_H
00007 #define CoinAbcHelperFunctions_H
00008
00009 #include "ClpConfig.h"
00010 #ifdef HAVE_CMATH
00011 # include <cmath>
00012 #else
00013 # ifdef HAVE_MATH_H
00014 # include <math.h>
00015 # else
00016 # include <cmath>
00017 # endif
00018 #endif
00019 #include "CoinAbcCommon.hpp"
00020 #ifndef abc_assert
00021 #define abc_assert(condition) \
00022 { if (!condition) {printf("abc_assert in %s at line %d - %s is false\n", \
00023 __FILE__, __LINE__, __STRING(condition)); abort();} }
00024 #endif
00025
00026 #define CILK_FOR_GRAINSIZE 128
00027
00028 #if AVX2==1
00029 #include "emmintrin.h"
00030 #elif AVX2==2
00031 #include <immintrin.h>
00032 #elif AVX2==3
00033 #include "avx2intrin.h"
00034 #endif
00035
00036
00043 #define UNROLL_SCATTER 2
00044 #define INLINE_SCATTER 1
00045 #if INLINE_SCATTER==0
00046 void CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
00047 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00048 const int * COIN_RESTRICT thisIndex,
00049 CoinFactorizationDouble * COIN_RESTRICT region);
00050 #else
00051 void ABC_INLINE inline CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
00052 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00053 const int * COIN_RESTRICT thisIndex,
00054 CoinFactorizationDouble * COIN_RESTRICT region)
00055 {
00056 #if UNROLL_SCATTER==0
00057 for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
00058 CoinSimplexInt iRow = thisIndex[j];
00059 CoinFactorizationDouble regionValue = region[iRow];
00060 CoinFactorizationDouble value = thisElement[j];
00061 assert (value);
00062 region[iRow] = regionValue - value * pivotValue;
00063 }
00064 #elif UNROLL_SCATTER==1
00065 if ((number&1)!=0) {
00066 number--;
00067 CoinSimplexInt iRow = thisIndex[number];
00068 CoinFactorizationDouble regionValue = region[iRow];
00069 CoinFactorizationDouble value = thisElement[number];
00070 region[iRow] = regionValue - value * pivotValue;
00071 }
00072 for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
00073 CoinSimplexInt iRow0 = thisIndex[j];
00074 CoinSimplexInt iRow1 = thisIndex[j-1];
00075 CoinFactorizationDouble regionValue0 = region[iRow0];
00076 CoinFactorizationDouble regionValue1 = region[iRow1];
00077 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
00078 region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
00079 }
00080 #elif UNROLL_SCATTER==2
00081 if ((number&1)!=0) {
00082 number--;
00083 CoinSimplexInt iRow = thisIndex[number];
00084 CoinFactorizationDouble regionValue = region[iRow];
00085 CoinFactorizationDouble value = thisElement[number];
00086 region[iRow] = regionValue - value * pivotValue;
00087 }
00088 if ((number&2)!=0) {
00089 CoinSimplexInt iRow0 = thisIndex[number-1];
00090 CoinFactorizationDouble regionValue0 = region[iRow0];
00091 CoinFactorizationDouble value0 = thisElement[number-1];
00092 CoinSimplexInt iRow1 = thisIndex[number-2];
00093 CoinFactorizationDouble regionValue1 = region[iRow1];
00094 CoinFactorizationDouble value1 = thisElement[number-2];
00095 region[iRow0] = regionValue0 - value0 * pivotValue;
00096 region[iRow1] = regionValue1 - value1 * pivotValue;
00097 number-=2;
00098 }
00099 #pragma cilk grainsize=CILK_FOR_GRAINSIZE
00100 cilk_for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
00101 CoinSimplexInt iRow0 = thisIndex[j];
00102 CoinSimplexInt iRow1 = thisIndex[j-1];
00103 CoinFactorizationDouble regionValue0 = region[iRow0];
00104 CoinFactorizationDouble regionValue1 = region[iRow1];
00105 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
00106 region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
00107 CoinSimplexInt iRow2 = thisIndex[j-2];
00108 CoinSimplexInt iRow3 = thisIndex[j-3];
00109 CoinFactorizationDouble regionValue2 = region[iRow2];
00110 CoinFactorizationDouble regionValue3 = region[iRow3];
00111 region[iRow2] = regionValue2 - thisElement[j-2] * pivotValue;
00112 region[iRow3] = regionValue3 - thisElement[j-3] * pivotValue;
00113 }
00114 #elif UNROLL_SCATTER==3
00115 CoinSimplexInt iRow0;
00116 CoinSimplexInt iRow1;
00117 CoinFactorizationDouble regionValue0;
00118 CoinFactorizationDouble regionValue1;
00119 switch(static_cast<unsigned int>(number)) {
00120 case 0:
00121 break;
00122 case 1:
00123 iRow0 = thisIndex[0];
00124 regionValue0 = region[iRow0];
00125 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00126 break;
00127 case 2:
00128 iRow0 = thisIndex[0];
00129 iRow1 = thisIndex[1];
00130 regionValue0 = region[iRow0];
00131 regionValue1 = region[iRow1];
00132 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00133 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00134 break;
00135 case 3:
00136 iRow0 = thisIndex[0];
00137 iRow1 = thisIndex[1];
00138 regionValue0 = region[iRow0];
00139 regionValue1 = region[iRow1];
00140 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00141 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00142 iRow0 = thisIndex[2];
00143 regionValue0 = region[iRow0];
00144 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00145 break;
00146 case 4:
00147 iRow0 = thisIndex[0];
00148 iRow1 = thisIndex[1];
00149 regionValue0 = region[iRow0];
00150 regionValue1 = region[iRow1];
00151 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00152 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00153 iRow0 = thisIndex[2];
00154 iRow1 = thisIndex[3];
00155 regionValue0 = region[iRow0];
00156 regionValue1 = region[iRow1];
00157 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00158 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00159 break;
00160 case 5:
00161 iRow0 = thisIndex[0];
00162 iRow1 = thisIndex[1];
00163 regionValue0 = region[iRow0];
00164 regionValue1 = region[iRow1];
00165 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00166 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00167 iRow0 = thisIndex[2];
00168 iRow1 = thisIndex[3];
00169 regionValue0 = region[iRow0];
00170 regionValue1 = region[iRow1];
00171 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00172 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00173 iRow0 = thisIndex[4];
00174 regionValue0 = region[iRow0];
00175 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
00176 break;
00177 case 6:
00178 iRow0 = thisIndex[0];
00179 iRow1 = thisIndex[1];
00180 regionValue0 = region[iRow0];
00181 regionValue1 = region[iRow1];
00182 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00183 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00184 iRow0 = thisIndex[2];
00185 iRow1 = thisIndex[3];
00186 regionValue0 = region[iRow0];
00187 regionValue1 = region[iRow1];
00188 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00189 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00190 iRow0 = thisIndex[4];
00191 iRow1 = thisIndex[5];
00192 regionValue0 = region[iRow0];
00193 regionValue1 = region[iRow1];
00194 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
00195 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
00196 break;
00197 case 7:
00198 iRow0 = thisIndex[0];
00199 iRow1 = thisIndex[1];
00200 regionValue0 = region[iRow0];
00201 regionValue1 = region[iRow1];
00202 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00203 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00204 iRow0 = thisIndex[2];
00205 iRow1 = thisIndex[3];
00206 regionValue0 = region[iRow0];
00207 regionValue1 = region[iRow1];
00208 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00209 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00210 iRow0 = thisIndex[4];
00211 iRow1 = thisIndex[5];
00212 regionValue0 = region[iRow0];
00213 regionValue1 = region[iRow1];
00214 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
00215 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
00216 iRow0 = thisIndex[6];
00217 regionValue0 = region[iRow0];
00218 region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
00219 break;
00220 case 8:
00221 iRow0 = thisIndex[0];
00222 iRow1 = thisIndex[1];
00223 regionValue0 = region[iRow0];
00224 regionValue1 = region[iRow1];
00225 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00226 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00227 iRow0 = thisIndex[2];
00228 iRow1 = thisIndex[3];
00229 regionValue0 = region[iRow0];
00230 regionValue1 = region[iRow1];
00231 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00232 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00233 iRow0 = thisIndex[4];
00234 iRow1 = thisIndex[5];
00235 regionValue0 = region[iRow0];
00236 regionValue1 = region[iRow1];
00237 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
00238 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
00239 iRow0 = thisIndex[6];
00240 iRow1 = thisIndex[7];
00241 regionValue0 = region[iRow0];
00242 regionValue1 = region[iRow1];
00243 region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
00244 region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
00245 break;
00246 default:
00247 if ((number&1)!=0) {
00248 number--;
00249 CoinSimplexInt iRow = thisIndex[number];
00250 CoinFactorizationDouble regionValue = region[iRow];
00251 CoinFactorizationDouble value = thisElement[number];
00252 region[iRow] = regionValue - value * pivotValue;
00253 }
00254 for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
00255 CoinSimplexInt iRow0 = thisIndex[j];
00256 CoinSimplexInt iRow1 = thisIndex[j-1];
00257 CoinFactorizationDouble regionValue0 = region[iRow0];
00258 CoinFactorizationDouble regionValue1 = region[iRow1];
00259 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
00260 region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
00261 }
00262 break;
00263 }
00264 #endif
00265 }
00266 void ABC_INLINE inline CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
00267 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00268 CoinFactorizationDouble * COIN_RESTRICT region)
00269 {
00270 #if UNROLL_SCATTER==0
00271 const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);
00272 for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
00273 CoinSimplexInt iRow = thisIndex[j];
00274 CoinFactorizationDouble regionValue = region[iRow];
00275 CoinFactorizationDouble value = thisElement[j];
00276 assert (value);
00277 region[iRow] = regionValue - value * pivotValue;
00278 }
00279 #elif UNROLL_SCATTER==1
00280 const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);
00281 if ((number&1)!=0) {
00282 number--;
00283 CoinSimplexInt iRow = thisIndex[number];
00284 CoinFactorizationDouble regionValue = region[iRow];
00285 CoinFactorizationDouble value = thisElement[number];
00286 region[iRow] = regionValue - value * pivotValue;
00287 }
00288 for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
00289 CoinSimplexInt iRow0 = thisIndex[j];
00290 CoinSimplexInt iRow1 = thisIndex[j-1];
00291 CoinFactorizationDouble regionValue0 = region[iRow0];
00292 CoinFactorizationDouble regionValue1 = region[iRow1];
00293 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
00294 region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
00295 }
00296 #elif UNROLL_SCATTER==2
00297 const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);
00298 if ((number&1)!=0) {
00299 number--;
00300 CoinSimplexInt iRow = thisIndex[number];
00301 CoinFactorizationDouble regionValue = region[iRow];
00302 CoinFactorizationDouble value = thisElement[number];
00303 region[iRow] = regionValue - value * pivotValue;
00304 }
00305 if ((number&2)!=0) {
00306 CoinSimplexInt iRow0 = thisIndex[number-1];
00307 CoinFactorizationDouble regionValue0 = region[iRow0];
00308 CoinFactorizationDouble value0 = thisElement[number-1];
00309 CoinSimplexInt iRow1 = thisIndex[number-2];
00310 CoinFactorizationDouble regionValue1 = region[iRow1];
00311 CoinFactorizationDouble value1 = thisElement[number-2];
00312 region[iRow0] = regionValue0 - value0 * pivotValue;
00313 region[iRow1] = regionValue1 - value1 * pivotValue;
00314 number-=2;
00315 }
00316 #if AVX2==22
00317 CoinFactorizationDouble temp[4] __attribute__ ((aligned (32)));
00318 __m256d pv = _mm256_broadcast_sd(&pivotValue);
00319 for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
00320 __m256d elements=_mm256_loadu_pd(thisElement+j-3);
00321 CoinSimplexInt iRow0 = thisIndex[j-3];
00322 CoinSimplexInt iRow1 = thisIndex[j-2];
00323 CoinSimplexInt iRow2 = thisIndex[j-1];
00324 CoinSimplexInt iRow3 = thisIndex[j-0];
00325 temp[0] = region[iRow0];
00326 temp[1] = region[iRow1];
00327 temp[2] = region[iRow2];
00328 temp[3] = region[iRow3];
00329 __m256d t0=_mm256_load_pd(temp);
00330 t0 -= pv*elements;
00331 _mm256_store_pd (temp, t0);
00332 region[iRow0] = temp[0];
00333 region[iRow1] = temp[1];
00334 region[iRow2] = temp[2];
00335 region[iRow3] = temp[3];
00336 }
00337 #else
00338 #pragma cilk grainsize=CILK_FOR_GRAINSIZE
00339 cilk_for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
00340 CoinSimplexInt iRow0 = thisIndex[j];
00341 CoinSimplexInt iRow1 = thisIndex[j-1];
00342 CoinFactorizationDouble regionValue0 = region[iRow0];
00343 CoinFactorizationDouble regionValue1 = region[iRow1];
00344 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
00345 region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
00346 CoinSimplexInt iRow2 = thisIndex[j-2];
00347 CoinSimplexInt iRow3 = thisIndex[j-3];
00348 CoinFactorizationDouble regionValue2 = region[iRow2];
00349 CoinFactorizationDouble regionValue3 = region[iRow3];
00350 region[iRow2] = regionValue2 - thisElement[j-2] * pivotValue;
00351 region[iRow3] = regionValue3 - thisElement[j-3] * pivotValue;
00352 }
00353 #endif
00354 #elif UNROLL_SCATTER==3
00355 const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);
00356 CoinSimplexInt iRow0;
00357 CoinSimplexInt iRow1;
00358 CoinFactorizationDouble regionValue0;
00359 CoinFactorizationDouble regionValue1;
00360 switch(static_cast<unsigned int>(number)) {
00361 case 0:
00362 break;
00363 case 1:
00364 iRow0 = thisIndex[0];
00365 regionValue0 = region[iRow0];
00366 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00367 break;
00368 case 2:
00369 iRow0 = thisIndex[0];
00370 iRow1 = thisIndex[1];
00371 regionValue0 = region[iRow0];
00372 regionValue1 = region[iRow1];
00373 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00374 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00375 break;
00376 case 3:
00377 iRow0 = thisIndex[0];
00378 iRow1 = thisIndex[1];
00379 regionValue0 = region[iRow0];
00380 regionValue1 = region[iRow1];
00381 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00382 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00383 iRow0 = thisIndex[2];
00384 regionValue0 = region[iRow0];
00385 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00386 break;
00387 case 4:
00388 iRow0 = thisIndex[0];
00389 iRow1 = thisIndex[1];
00390 regionValue0 = region[iRow0];
00391 regionValue1 = region[iRow1];
00392 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00393 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00394 iRow0 = thisIndex[2];
00395 iRow1 = thisIndex[3];
00396 regionValue0 = region[iRow0];
00397 regionValue1 = region[iRow1];
00398 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00399 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00400 break;
00401 case 5:
00402 iRow0 = thisIndex[0];
00403 iRow1 = thisIndex[1];
00404 regionValue0 = region[iRow0];
00405 regionValue1 = region[iRow1];
00406 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00407 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00408 iRow0 = thisIndex[2];
00409 iRow1 = thisIndex[3];
00410 regionValue0 = region[iRow0];
00411 regionValue1 = region[iRow1];
00412 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00413 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00414 iRow0 = thisIndex[4];
00415 regionValue0 = region[iRow0];
00416 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
00417 break;
00418 case 6:
00419 iRow0 = thisIndex[0];
00420 iRow1 = thisIndex[1];
00421 regionValue0 = region[iRow0];
00422 regionValue1 = region[iRow1];
00423 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00424 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00425 iRow0 = thisIndex[2];
00426 iRow1 = thisIndex[3];
00427 regionValue0 = region[iRow0];
00428 regionValue1 = region[iRow1];
00429 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00430 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00431 iRow0 = thisIndex[4];
00432 iRow1 = thisIndex[5];
00433 regionValue0 = region[iRow0];
00434 regionValue1 = region[iRow1];
00435 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
00436 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
00437 break;
00438 case 7:
00439 iRow0 = thisIndex[0];
00440 iRow1 = thisIndex[1];
00441 regionValue0 = region[iRow0];
00442 regionValue1 = region[iRow1];
00443 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00444 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00445 iRow0 = thisIndex[2];
00446 iRow1 = thisIndex[3];
00447 regionValue0 = region[iRow0];
00448 regionValue1 = region[iRow1];
00449 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00450 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00451 iRow0 = thisIndex[4];
00452 iRow1 = thisIndex[5];
00453 regionValue0 = region[iRow0];
00454 regionValue1 = region[iRow1];
00455 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
00456 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
00457 iRow0 = thisIndex[6];
00458 regionValue0 = region[iRow0];
00459 region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
00460 break;
00461 case 8:
00462 iRow0 = thisIndex[0];
00463 iRow1 = thisIndex[1];
00464 regionValue0 = region[iRow0];
00465 regionValue1 = region[iRow1];
00466 region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00467 region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00468 iRow0 = thisIndex[2];
00469 iRow1 = thisIndex[3];
00470 regionValue0 = region[iRow0];
00471 regionValue1 = region[iRow1];
00472 region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00473 region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00474 iRow0 = thisIndex[4];
00475 iRow1 = thisIndex[5];
00476 regionValue0 = region[iRow0];
00477 regionValue1 = region[iRow1];
00478 region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
00479 region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
00480 iRow0 = thisIndex[6];
00481 iRow1 = thisIndex[7];
00482 regionValue0 = region[iRow0];
00483 regionValue1 = region[iRow1];
00484 region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
00485 region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
00486 break;
00487 default:
00488 if ((number&1)!=0) {
00489 number--;
00490 CoinSimplexInt iRow = thisIndex[number];
00491 CoinFactorizationDouble regionValue = region[iRow];
00492 CoinFactorizationDouble value = thisElement[number];
00493 region[iRow] = regionValue - value * pivotValue;
00494 }
00495 for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
00496 CoinSimplexInt iRow0 = thisIndex[j];
00497 CoinSimplexInt iRow1 = thisIndex[j-1];
00498 CoinFactorizationDouble regionValue0 = region[iRow0];
00499 CoinFactorizationDouble regionValue1 = region[iRow1];
00500 region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
00501 region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
00502 }
00503 break;
00504 }
00505 #endif
00506 }
00507 #endif
00508
00509 #ifdef COIN_PREFETCH
00510 #if 1
00511 #define coin_prefetch(mem) \
00512 __asm__ __volatile__ ("prefetchnta %0" : : "m" (*(reinterpret_cast<char *>(mem))))
00513 #define coin_prefetch_const(mem) \
00514 __asm__ __volatile__ ("prefetchnta %0" : : "m" (*(reinterpret_cast<const char *>(mem))))
00515 #else
00516 #define coin_prefetch(mem) \
00517 __asm__ __volatile__ ("prefetch %0" : : "m" (*(reinterpret_cast<char *>(mem))))
00518 #define coin_prefetch_const(mem) \
00519 __asm__ __volatile__ ("prefetch %0" : : "m" (*(reinterpret_cast<const char *>(mem))))
00520 #endif
00521 #else
00522
00523 #define coin_prefetch(mem)
00524 #define coin_prefetch_const(mem)
00525 #endif
00526 #define NEW_CHUNK_SIZE 4
00527 #define NEW_CHUNK_SIZE_INCREMENT (NEW_CHUNK_SIZE+NEW_CHUNK_SIZE/2);
00528 #define NEW_CHUNK_SIZE_OFFSET (NEW_CHUNK_SIZE/2)
00529
00530
00531
00532 #define SCATTER_ATTRIBUTE
00533 typedef void (*scatterUpdate) (int,CoinFactorizationDouble,const CoinFactorizationDouble *, double *) SCATTER_ATTRIBUTE ;
00534 typedef struct {
00535 scatterUpdate functionPointer;
00536 CoinBigIndex offset;
00537 int number;
00538 } scatterStruct;
00539 void CoinAbcScatterUpdate0(int numberIn, CoinFactorizationDouble multiplier,
00540 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00541 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00542 void CoinAbcScatterUpdate1(int numberIn, CoinFactorizationDouble multiplier,
00543 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00544 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00545 void CoinAbcScatterUpdate2(int numberIn, CoinFactorizationDouble multiplier,
00546 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00547 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00548 void CoinAbcScatterUpdate3(int numberIn, CoinFactorizationDouble multiplier,
00549 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00550 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00551 void CoinAbcScatterUpdate4(int numberIn, CoinFactorizationDouble multiplier,
00552 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00553 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00554 void CoinAbcScatterUpdate5(int numberIn, CoinFactorizationDouble multiplier,
00555 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00556 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00557 void CoinAbcScatterUpdate6(int numberIn, CoinFactorizationDouble multiplier,
00558 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00559 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00560 void CoinAbcScatterUpdate7(int numberIn, CoinFactorizationDouble multiplier,
00561 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00562 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00563 void CoinAbcScatterUpdate8(int numberIn, CoinFactorizationDouble multiplier,
00564 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00565 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00566 void CoinAbcScatterUpdate4N(int numberIn, CoinFactorizationDouble multiplier,
00567 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00568 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00569 void CoinAbcScatterUpdate4NPlus1(int numberIn, CoinFactorizationDouble multiplier,
00570 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00571 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00572 void CoinAbcScatterUpdate4NPlus2(int numberIn, CoinFactorizationDouble multiplier,
00573 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00574 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00575 void CoinAbcScatterUpdate4NPlus3(int numberIn, CoinFactorizationDouble multiplier,
00576 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00577 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00578 void CoinAbcScatterUpdate1Subtract(int numberIn, CoinFactorizationDouble multiplier,
00579 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00580 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00581 void CoinAbcScatterUpdate2Subtract(int numberIn, CoinFactorizationDouble multiplier,
00582 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00583 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00584 void CoinAbcScatterUpdate3Subtract(int numberIn, CoinFactorizationDouble multiplier,
00585 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00586 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00587 void CoinAbcScatterUpdate4Subtract(int numberIn, CoinFactorizationDouble multiplier,
00588 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00589 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00590 void CoinAbcScatterUpdate5Subtract(int numberIn, CoinFactorizationDouble multiplier,
00591 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00592 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00593 void CoinAbcScatterUpdate6Subtract(int numberIn, CoinFactorizationDouble multiplier,
00594 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00595 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00596 void CoinAbcScatterUpdate7Subtract(int numberIn, CoinFactorizationDouble multiplier,
00597 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00598 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00599 void CoinAbcScatterUpdate8Subtract(int numberIn, CoinFactorizationDouble multiplier,
00600 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00601 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00602 void CoinAbcScatterUpdate4NSubtract(int numberIn, CoinFactorizationDouble multiplier,
00603 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00604 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00605 void CoinAbcScatterUpdate4NPlus1Subtract(int numberIn, CoinFactorizationDouble multiplier,
00606 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00607 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00608 void CoinAbcScatterUpdate4NPlus2Subtract(int numberIn, CoinFactorizationDouble multiplier,
00609 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00610 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00611 void CoinAbcScatterUpdate4NPlus3Subtract(int numberIn, CoinFactorizationDouble multiplier,
00612 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00613 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00614 void CoinAbcScatterUpdate1Add(int numberIn, CoinFactorizationDouble multiplier,
00615 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00616 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00617 void CoinAbcScatterUpdate2Add(int numberIn, CoinFactorizationDouble multiplier,
00618 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00619 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00620 void CoinAbcScatterUpdate3Add(int numberIn, CoinFactorizationDouble multiplier,
00621 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00622 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00623 void CoinAbcScatterUpdate4Add(int numberIn, CoinFactorizationDouble multiplier,
00624 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00625 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00626 void CoinAbcScatterUpdate5Add(int numberIn, CoinFactorizationDouble multiplier,
00627 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00628 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00629 void CoinAbcScatterUpdate6Add(int numberIn, CoinFactorizationDouble multiplier,
00630 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00631 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00632 void CoinAbcScatterUpdate7Add(int numberIn, CoinFactorizationDouble multiplier,
00633 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00634 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00635 void CoinAbcScatterUpdate8Add(int numberIn, CoinFactorizationDouble multiplier,
00636 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00637 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00638 void CoinAbcScatterUpdate4NAdd(int numberIn, CoinFactorizationDouble multiplier,
00639 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00640 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00641 void CoinAbcScatterUpdate4NPlus1Add(int numberIn, CoinFactorizationDouble multiplier,
00642 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00643 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00644 void CoinAbcScatterUpdate4NPlus2Add(int numberIn, CoinFactorizationDouble multiplier,
00645 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00646 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00647 void CoinAbcScatterUpdate4NPlus3Add(int numberIn, CoinFactorizationDouble multiplier,
00648 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00649 CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00650 #if INLINE_SCATTER==0
00651 void CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
00652 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00653 const int * COIN_RESTRICT thisIndex,
00654 CoinFactorizationDouble * COIN_RESTRICT region,
00655 double * COIN_RESTRICT work);
00656 #else
00657 #if 0
00658 void ABC_INLINE inline CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
00659 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00660 const int * COIN_RESTRICT thisIndex,
00661 CoinFactorizationDouble * COIN_RESTRICT region,
00662 double * COIN_RESTRICT )
00663 {
00664 #if UNROLL_SCATTER==0
00665 for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
00666 CoinSimplexInt iRow = thisIndex[j];
00667 CoinFactorizationDouble regionValue = region[iRow];
00668 CoinFactorizationDouble value = thisElement[j];
00669 assert (value);
00670 region[iRow] = regionValue - value * pivotValue;
00671 }
00672 #elif UNROLL_SCATTER==1
00673 if ((number&1)!=0) {
00674 CoinSimplexInt iRow = thisIndex[0];
00675 thisIndex++;
00676 CoinFactorizationDouble regionValue = region[iRow];
00677 CoinFactorizationDouble value = thisElement[0];
00678 thisElement++;
00679 region[iRow] = regionValue - value * pivotValue;
00680 }
00681 number = number>>1;
00682 CoinFactorizationDouble work2[4];
00683 for ( ; number !=0; number-- ) {
00684 CoinSimplexInt iRow0 = thisIndex[0];
00685 CoinSimplexInt iRow1 = thisIndex[1];
00686 work2[0] = region[iRow0];
00687 work2[1] = region[iRow1];
00688 #if 0
00689 work2[2] = region[iRow0];
00690 work2[3] = region[iRow1];
00691
00692 __v4df b = __builtin_ia32_loadupd256(work2);
00693
00694 #endif
00695 work2[0] -= thisElement[0] * pivotValue;
00696 work2[1] -= thisElement[1] * pivotValue;
00697 region[iRow0] = work2[0];
00698 region[iRow1] = work2[1];
00699 thisIndex+=2;
00700 thisElement+=2;
00701 }
00702 #endif
00703 }
00704 #endif
00705 #endif
00706 #define UNROLL_GATHER 0
00707 #define INLINE_GATHER 1
00708 #if INLINE_GATHER==0
00709 CoinFactorizationDouble CoinAbcGatherUpdate(CoinSimplexInt number,
00710 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00711 const int * COIN_RESTRICT thisIndex,
00712 CoinFactorizationDouble * COIN_RESTRICT region);
00713 #else
00714 CoinFactorizationDouble ABC_INLINE inline CoinAbcGatherUpdate(CoinSimplexInt number,
00715 const CoinFactorizationDouble * COIN_RESTRICT thisElement,
00716 const int * COIN_RESTRICT thisIndex,
00717 CoinFactorizationDouble * COIN_RESTRICT region)
00718 {
00719 #if UNROLL_GATHER==0
00720 CoinFactorizationDouble pivotValue=0.0;
00721 for (CoinBigIndex j = 0; j < number; j ++ ) {
00722 CoinFactorizationDouble value = thisElement[j];
00723 CoinSimplexInt jRow = thisIndex[j];
00724 value *= region[jRow];
00725 pivotValue -= value;
00726 }
00727 return pivotValue;
00728 #else
00729 #error code
00730 #endif
00731 }
00732 #endif
00733 #define UNROLL_MULTIPLY_INDEXED 0
00734 #define INLINE_MULTIPLY_INDEXED 0
00735 #if INLINE_MULTIPLY_INDEXED==0
00736 void CoinAbcMultiplyIndexed(int number,
00737 const double * COIN_RESTRICT multiplier,
00738 const int * COIN_RESTRICT thisIndex,
00739 CoinFactorizationDouble * COIN_RESTRICT region);
00740 void CoinAbcMultiplyIndexed(int number,
00741 const long double * COIN_RESTRICT multiplier,
00742 const int * COIN_RESTRICT thisIndex,
00743 long double * COIN_RESTRICT region);
00744 #else
00745 void ABC_INLINE inline CoinAbcMultiplyIndexed(int number,
00746 const double * COIN_RESTRICT multiplier,
00747 const int * COIN_RESTRICT thisIndex,
00748 CoinFactorizationDouble * COIN_RESTRICT region)
00749 {
00750 }
00751 #endif
00752 double CoinAbcMaximumAbsElement(const double * region, int size);
00753 void CoinAbcMinMaxAbsElement(const double * region, int size,double & minimum , double & maximum);
00754 void CoinAbcMinMaxAbsNormalValues(const double * region, int size,double & minimum , double & maximum);
00755 void CoinAbcScale(double * region, double multiplier,int size);
00756 void CoinAbcScaleNormalValues(double * region, double multiplier,double killIfLessThanThis,int size);
00758 double CoinAbcMaximumAbsElementAndScale(double * region, double multiplier,int size);
00759 void CoinAbcSetElements(double * region, int size, double value);
00760 void CoinAbcMultiplyAdd(const double * region1, int size, double multiplier1,
00761 double * regionChanged, double multiplier2);
00762 double CoinAbcInnerProduct(const double * region1, int size, const double * region2);
00763 void CoinAbcGetNorms(const double * region, int size, double & norm1, double & norm2);
00765 void CoinAbcScatterTo(const double * regionFrom, double * regionTo, const int * index,int number);
00767 void CoinAbcGatherFrom(const double * regionFrom, double * regionTo, const int * index,int number);
00769 void CoinAbcScatterZeroTo(double * regionTo, const int * index,int number);
00771 void CoinAbcScatterToList(const double * regionFrom, double * regionTo,
00772 const int * indexList, const int * indexScatter ,int number);
00774 void CoinAbcInverseSqrts(double * array, int n);
00775 void CoinAbcReciprocal(double * array, int n, const double *input);
00776 void CoinAbcMemcpyLong(double * array,const double * arrayFrom,int size);
00777 void CoinAbcMemcpyLong(int * array,const int * arrayFrom,int size);
00778 void CoinAbcMemcpyLong(unsigned char * array,const unsigned char * arrayFrom,int size);
00779 void CoinAbcMemset0Long(double * array,int size);
00780 void CoinAbcMemset0Long(int * array,int size);
00781 void CoinAbcMemset0Long(unsigned char * array,int size);
00782 void CoinAbcMemmove(double * array,const double * arrayFrom,int size);
00783 void CoinAbcMemmove(int * array,const int * arrayFrom,int size);
00784 void CoinAbcMemmove(unsigned char * array,const unsigned char * arrayFrom,int size);
00786 void CoinAbcMemmoveAndZero(double * array,double * arrayFrom,int size);
00788 int CoinAbcCompact(int numberSections,int alreadyDone,double * array,const int * starts, const int * lengths);
00790 int CoinAbcCompact(int numberSections,int alreadyDone,int * array,const int * starts, const int * lengths);
00791 #endif
00792 #if ABC_CREATE_SCATTER_FUNCTION
00793 SCATTER_ATTRIBUTE void functionName(ScatterUpdate1)(int numberIn, CoinFactorizationDouble multiplier,
00794 const CoinFactorizationDouble * COIN_RESTRICT element,
00795 CoinFactorizationDouble * COIN_RESTRICT region)
00796 {
00797 #ifndef NDEBUG
00798 assert (numberIn==1);
00799 #endif
00800 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+1);
00801 int iColumn0=thisColumn[0];
00802 double value0=region[iColumn0];
00803 value0 OPERATION multiplier*element[0];
00804 region[iColumn0]=value0;
00805 }
00806 SCATTER_ATTRIBUTE void functionName(ScatterUpdate2)(int numberIn, CoinFactorizationDouble multiplier,
00807 const CoinFactorizationDouble * COIN_RESTRICT element,
00808 CoinFactorizationDouble * COIN_RESTRICT region)
00809 {
00810 #ifndef NDEBUG
00811 assert (numberIn==2);
00812 #endif
00813 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+2);
00814 #if NEW_CHUNK_SIZE==2
00815 int nFull=2&(~(NEW_CHUNK_SIZE-1));
00816 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
00817 coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
00818 int iColumn0=thisColumn[0];
00819 int iColumn1=thisColumn[1];
00820 CoinFactorizationDouble value0=region[iColumn0];
00821 CoinFactorizationDouble value1=region[iColumn1];
00822 value0 OPERATION multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
00823 value1 OPERATION multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
00824 region[iColumn0]=value0;
00825 region[iColumn1]=value1;
00826 element+=NEW_CHUNK_SIZE_INCREMENT;
00827 thisColumn = reinterpret_cast<const int *>(element);
00828 }
00829 #endif
00830 #if NEW_CHUNK_SIZE==4
00831 int iColumn0=thisColumn[0];
00832 int iColumn1=thisColumn[1];
00833 CoinFactorizationDouble value0=region[iColumn0];
00834 CoinFactorizationDouble value1=region[iColumn1];
00835 value0 OPERATION multiplier*element[0];
00836 value1 OPERATION multiplier*element[1];
00837 region[iColumn0]=value0;
00838 region[iColumn1]=value1;
00839 #endif
00840 }
00841 SCATTER_ATTRIBUTE void functionName(ScatterUpdate3)(int numberIn, CoinFactorizationDouble multiplier,
00842 const CoinFactorizationDouble * COIN_RESTRICT element,
00843 CoinFactorizationDouble * COIN_RESTRICT region)
00844 {
00845 #ifndef NDEBUG
00846 assert (numberIn==3);
00847 #endif
00848 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+3);
00849 #if AVX2==1
00850 double temp[2];
00851 #endif
00852 #if NEW_CHUNK_SIZE==2
00853 int nFull=3&(~(NEW_CHUNK_SIZE-1));
00854 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
00855
00856 int iColumn0=thisColumn[0];
00857 int iColumn1=thisColumn[1];
00858 CoinFactorizationDouble value0=region[iColumn0];
00859 CoinFactorizationDouble value1=region[iColumn1];
00860 value0 OPERATION multiplier*element[0];
00861 value1 OPERATION multiplier*element[1];
00862 region[iColumn0]=value0;
00863 region[iColumn1]=value1;
00864 element+=NEW_CHUNK_SIZE;
00865 thisColumn+ = NEW_CHUNK_SIZE;
00866 }
00867 #endif
00868 #if NEW_CHUNK_SIZE==2
00869 int iColumn0=thisColumn[0];
00870 double value0=region[iColumn0];
00871 value0 OPERATION multiplier*element[0];
00872 region[iColumn0]=value0;
00873 #else
00874 int iColumn0=thisColumn[0];
00875 int iColumn1=thisColumn[1];
00876 int iColumn2=thisColumn[2];
00877 #if AVX2==1
00878 __v2df bb;
00879 double value2=region[iColumn2];
00880 value2 OPERATION multiplier*element[2];
00881 set_const_v2df(bb,multiplier);
00882 temp[0]=region[iColumn0];
00883 temp[1]=region[iColumn1];
00884 region[iColumn2]=value2;
00885 __v2df v0 = __builtin_ia32_loadupd (temp);
00886 __v2df a = __builtin_ia32_loadupd (element);
00887 a *= bb;
00888 v0 OPERATION a;
00889 __builtin_ia32_storeupd (temp, v0);
00890 region[iColumn0]=temp[0];
00891 region[iColumn1]=temp[1];
00892 #else
00893 double value0=region[iColumn0];
00894 double value1=region[iColumn1];
00895 double value2=region[iColumn2];
00896 value0 OPERATION multiplier*element[0];
00897 value1 OPERATION multiplier*element[1];
00898 value2 OPERATION multiplier*element[2];
00899 region[iColumn0]=value0;
00900 region[iColumn1]=value1;
00901 region[iColumn2]=value2;
00902 #endif
00903 #endif
00904 }
00905 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4)(int numberIn, CoinFactorizationDouble multiplier,
00906 const CoinFactorizationDouble * COIN_RESTRICT element,
00907 CoinFactorizationDouble * COIN_RESTRICT region)
00908 {
00909 #ifndef NDEBUG
00910 assert (numberIn==4);
00911 #endif
00912 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+4);
00913 int nFull=4&(~(NEW_CHUNK_SIZE-1));
00914 #if AVX2==1
00915 double temp[4];
00916 #endif
00917 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
00918
00919 #if NEW_CHUNK_SIZE==2
00920 int iColumn0=thisColumn[0];
00921 int iColumn1=thisColumn[1];
00922 double value0=region[iColumn0];
00923 double value1=region[iColumn1];
00924 value0 OPERATION multiplier*element[0];
00925 value1 OPERATION multiplier*element[1];
00926 region[iColumn0]=value0;
00927 region[iColumn1]=value1;
00928 #elif NEW_CHUNK_SIZE==4
00929 int iColumn0=thisColumn[0];
00930 int iColumn1=thisColumn[1];
00931 int iColumn2=thisColumn[2];
00932 int iColumn3=thisColumn[3];
00933 #if AVX2==1
00934 __v2df bb;
00935 set_const_v2df(bb,multiplier);
00936 temp[0]=region[iColumn0];
00937 temp[1]=region[iColumn1];
00938 temp[2]=region[iColumn2];
00939 temp[3]=region[iColumn3];
00940 __v2df v0 = __builtin_ia32_loadupd (temp);
00941 __v2df v1 = __builtin_ia32_loadupd (temp+2);
00942 __v2df a = __builtin_ia32_loadupd (element);
00943 a *= bb;
00944 v0 OPERATION a;
00945 a = __builtin_ia32_loadupd (element+2);
00946 a *= bb;
00947 v1 OPERATION a;
00948 __builtin_ia32_storeupd (temp, v0);
00949 __builtin_ia32_storeupd (temp+2, v1);
00950 region[iColumn0]=temp[0];
00951 region[iColumn1]=temp[1];
00952 region[iColumn2]=temp[2];
00953 region[iColumn3]=temp[3];
00954 #else
00955 double value0=region[iColumn0];
00956 double value1=region[iColumn1];
00957 double value2=region[iColumn2];
00958 double value3=region[iColumn3];
00959 value0 OPERATION multiplier*element[0];
00960 value1 OPERATION multiplier*element[1];
00961 value2 OPERATION multiplier*element[2];
00962 value3 OPERATION multiplier*element[3];
00963 region[iColumn0]=value0;
00964 region[iColumn1]=value1;
00965 region[iColumn2]=value2;
00966 region[iColumn3]=value3;
00967 #endif
00968 #else
00969 abort();
00970 #endif
00971 element+=NEW_CHUNK_SIZE;
00972 thisColumn += NEW_CHUNK_SIZE;
00973 }
00974 }
00975 SCATTER_ATTRIBUTE void functionName(ScatterUpdate5)(int numberIn, CoinFactorizationDouble multiplier,
00976 const CoinFactorizationDouble * COIN_RESTRICT element,
00977 CoinFactorizationDouble * COIN_RESTRICT region)
00978 {
00979 #ifndef NDEBUG
00980 assert (numberIn==5);
00981 #endif
00982 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+5);
00983 int nFull=5&(~(NEW_CHUNK_SIZE-1));
00984 #if AVX2==1
00985 double temp[4];
00986 #endif
00987 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
00988
00989 #if NEW_CHUNK_SIZE==2
00990 int iColumn0=thisColumn[0];
00991 int iColumn1=thisColumn[1];
00992 double value0=region[iColumn0];
00993 double value1=region[iColumn1];
00994 value0 OPERATION multiplier*element[0];
00995 value1 OPERATION multiplier*element[1];
00996 region[iColumn0]=value0;
00997 region[iColumn1]=value1;
00998 #elif NEW_CHUNK_SIZE==4
00999 int iColumn0=thisColumn[0];
01000 int iColumn1=thisColumn[1];
01001 int iColumn2=thisColumn[2];
01002 int iColumn3=thisColumn[3];
01003 #if AVX2==1
01004 __v2df bb;
01005 set_const_v2df(bb,multiplier);
01006 temp[0]=region[iColumn0];
01007 temp[1]=region[iColumn1];
01008 temp[2]=region[iColumn2];
01009 temp[3]=region[iColumn3];
01010 __v2df v0 = __builtin_ia32_loadupd (temp);
01011 __v2df v1 = __builtin_ia32_loadupd (temp+2);
01012 __v2df a = __builtin_ia32_loadupd (element);
01013 a *= bb;
01014 v0 OPERATION a;
01015 a = __builtin_ia32_loadupd (element+2);
01016 a *= bb;
01017 v1 OPERATION a;
01018 __builtin_ia32_storeupd (temp, v0);
01019 __builtin_ia32_storeupd (temp+2, v1);
01020 region[iColumn0]=temp[0];
01021 region[iColumn1]=temp[1];
01022 region[iColumn2]=temp[2];
01023 region[iColumn3]=temp[3];
01024 #else
01025 double value0=region[iColumn0];
01026 double value1=region[iColumn1];
01027 double value2=region[iColumn2];
01028 double value3=region[iColumn3];
01029 value0 OPERATION multiplier*element[0];
01030 value1 OPERATION multiplier*element[1];
01031 value2 OPERATION multiplier*element[2];
01032 value3 OPERATION multiplier*element[3];
01033 region[iColumn0]=value0;
01034 region[iColumn1]=value1;
01035 region[iColumn2]=value2;
01036 region[iColumn3]=value3;
01037 #endif
01038 #else
01039 abort();
01040 #endif
01041 element+=NEW_CHUNK_SIZE;
01042 thisColumn += NEW_CHUNK_SIZE;
01043 }
01044 int iColumn0=thisColumn[0];
01045 double value0=region[iColumn0];
01046 value0 OPERATION multiplier*element[0];
01047 region[iColumn0]=value0;
01048 }
01049 SCATTER_ATTRIBUTE void functionName(ScatterUpdate6)(int numberIn, CoinFactorizationDouble multiplier,
01050 const CoinFactorizationDouble * COIN_RESTRICT element,
01051 CoinFactorizationDouble * COIN_RESTRICT region)
01052 {
01053 #ifndef NDEBUG
01054 assert (numberIn==6);
01055 #endif
01056 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+6);
01057 int nFull=6&(~(NEW_CHUNK_SIZE-1));
01058 #if AVX2==1
01059 double temp[4];
01060 #endif
01061 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
01062 coin_prefetch_const(element+6);
01063 #if NEW_CHUNK_SIZE==2
01064 int iColumn0=thisColumn[0];
01065 int iColumn1=thisColumn[1];
01066 double value0=region[iColumn0];
01067 double value1=region[iColumn1];
01068 value0 OPERATION multiplier*element[0];
01069 value1 OPERATION multiplier*element[1];
01070 region[iColumn0]=value0;
01071 region[iColumn1]=value1;
01072 #elif NEW_CHUNK_SIZE==4
01073 int iColumn0=thisColumn[0];
01074 int iColumn1=thisColumn[1];
01075 int iColumn2=thisColumn[2];
01076 int iColumn3=thisColumn[3];
01077 #if AVX2==1
01078 __v2df bb;
01079 set_const_v2df(bb,multiplier);
01080 temp[0]=region[iColumn0];
01081 temp[1]=region[iColumn1];
01082 temp[2]=region[iColumn2];
01083 temp[3]=region[iColumn3];
01084 __v2df v0 = __builtin_ia32_loadupd (temp);
01085 __v2df v1 = __builtin_ia32_loadupd (temp+2);
01086 __v2df a = __builtin_ia32_loadupd (element);
01087 a *= bb;
01088 v0 OPERATION a;
01089 a = __builtin_ia32_loadupd (element+2);
01090 a *= bb;
01091 v1 OPERATION a;
01092 __builtin_ia32_storeupd (temp, v0);
01093 __builtin_ia32_storeupd (temp+2, v1);
01094 region[iColumn0]=temp[0];
01095 region[iColumn1]=temp[1];
01096 region[iColumn2]=temp[2];
01097 region[iColumn3]=temp[3];
01098 #else
01099 double value0=region[iColumn0];
01100 double value1=region[iColumn1];
01101 double value2=region[iColumn2];
01102 double value3=region[iColumn3];
01103 value0 OPERATION multiplier*element[0];
01104 value1 OPERATION multiplier*element[1];
01105 value2 OPERATION multiplier*element[2];
01106 value3 OPERATION multiplier*element[3];
01107 region[iColumn0]=value0;
01108 region[iColumn1]=value1;
01109 region[iColumn2]=value2;
01110 region[iColumn3]=value3;
01111 #endif
01112 #else
01113 abort();
01114 #endif
01115 element+=NEW_CHUNK_SIZE;
01116 thisColumn += NEW_CHUNK_SIZE;
01117 }
01118 #if NEW_CHUNK_SIZE==4
01119 int iColumn0=thisColumn[0];
01120 int iColumn1=thisColumn[1];
01121 double value0=region[iColumn0];
01122 double value1=region[iColumn1];
01123 value0 OPERATION multiplier*element[0];
01124 value1 OPERATION multiplier*element[1];
01125 region[iColumn0]=value0;
01126 region[iColumn1]=value1;
01127 #endif
01128 }
01129 SCATTER_ATTRIBUTE void functionName(ScatterUpdate7)(int numberIn, CoinFactorizationDouble multiplier,
01130 const CoinFactorizationDouble * COIN_RESTRICT element,
01131 CoinFactorizationDouble * COIN_RESTRICT region)
01132 {
01133 #ifndef NDEBUG
01134 assert (numberIn==7);
01135 #endif
01136 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+7);
01137 int nFull=7&(~(NEW_CHUNK_SIZE-1));
01138 #if AVX2==1
01139 double temp[4];
01140 #endif
01141 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
01142 coin_prefetch_const(element+6);
01143 #if NEW_CHUNK_SIZE==2
01144 int iColumn0=thisColumn[0];
01145 int iColumn1=thisColumn[1];
01146 double value0=region[iColumn0];
01147 double value1=region[iColumn1];
01148 value0 OPERATION multiplier*element[0];
01149 value1 OPERATION multiplier*element[1];
01150 region[iColumn0]=value0;
01151 region[iColumn1]=value1;
01152 #elif NEW_CHUNK_SIZE==4
01153 int iColumn0=thisColumn[0];
01154 int iColumn1=thisColumn[1];
01155 int iColumn2=thisColumn[2];
01156 int iColumn3=thisColumn[3];
01157 #if AVX2==1
01158 __v2df bb;
01159 set_const_v2df(bb,multiplier);
01160 temp[0]=region[iColumn0];
01161 temp[1]=region[iColumn1];
01162 temp[2]=region[iColumn2];
01163 temp[3]=region[iColumn3];
01164 __v2df v0 = __builtin_ia32_loadupd (temp);
01165 __v2df v1 = __builtin_ia32_loadupd (temp+2);
01166 __v2df a = __builtin_ia32_loadupd (element);
01167 a *= bb;
01168 v0 OPERATION a;
01169 a = __builtin_ia32_loadupd (element+2);
01170 a *= bb;
01171 v1 OPERATION a;
01172 __builtin_ia32_storeupd (temp, v0);
01173 __builtin_ia32_storeupd (temp+2, v1);
01174 region[iColumn0]=temp[0];
01175 region[iColumn1]=temp[1];
01176 region[iColumn2]=temp[2];
01177 region[iColumn3]=temp[3];
01178 #else
01179 double value0=region[iColumn0];
01180 double value1=region[iColumn1];
01181 double value2=region[iColumn2];
01182 double value3=region[iColumn3];
01183 value0 OPERATION multiplier*element[0];
01184 value1 OPERATION multiplier*element[1];
01185 value2 OPERATION multiplier*element[2];
01186 value3 OPERATION multiplier*element[3];
01187 region[iColumn0]=value0;
01188 region[iColumn1]=value1;
01189 region[iColumn2]=value2;
01190 region[iColumn3]=value3;
01191 #endif
01192 #else
01193 abort();
01194 #endif
01195 element+=NEW_CHUNK_SIZE;
01196 thisColumn += NEW_CHUNK_SIZE;
01197 }
01198 #if NEW_CHUNK_SIZE==2
01199 int iColumn0=thisColumn[0];
01200 double value0=region[iColumn0];
01201 value0 OPERATION multiplier*element[0];
01202 region[iColumn0]=value0;
01203 #else
01204 int iColumn0=thisColumn[0];
01205 int iColumn1=thisColumn[1];
01206 int iColumn2=thisColumn[2];
01207 double value0=region[iColumn0];
01208 double value1=region[iColumn1];
01209 double value2=region[iColumn2];
01210 value0 OPERATION multiplier*element[0];
01211 value1 OPERATION multiplier*element[1];
01212 value2 OPERATION multiplier*element[2];
01213 region[iColumn0]=value0;
01214 region[iColumn1]=value1;
01215 region[iColumn2]=value2;
01216 #endif
01217 }
01218 SCATTER_ATTRIBUTE void functionName(ScatterUpdate8)(int numberIn, CoinFactorizationDouble multiplier,
01219 const CoinFactorizationDouble * COIN_RESTRICT element,
01220 CoinFactorizationDouble * COIN_RESTRICT region)
01221 {
01222 #ifndef NDEBUG
01223 assert (numberIn==8);
01224 #endif
01225 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+8);
01226 int nFull=8&(~(NEW_CHUNK_SIZE-1));
01227 #if AVX2==1
01228 double temp[4];
01229 #endif
01230 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
01231 coin_prefetch_const(element+6);
01232 #if NEW_CHUNK_SIZE==2
01233 int iColumn0=thisColumn[0];
01234 int iColumn1=thisColumn[1];
01235 double value0=region[iColumn0];
01236 double value1=region[iColumn1];
01237 value0 OPERATION multiplier*element[0];
01238 value1 OPERATION multiplier*element[1];
01239 region[iColumn0]=value0;
01240 region[iColumn1]=value1;
01241 #elif NEW_CHUNK_SIZE==4
01242 int iColumn0=thisColumn[0];
01243 int iColumn1=thisColumn[1];
01244 int iColumn2=thisColumn[2];
01245 int iColumn3=thisColumn[3];
01246 #if AVX2==1
01247 __v2df bb;
01248 set_const_v2df(bb,multiplier);
01249 temp[0]=region[iColumn0];
01250 temp[1]=region[iColumn1];
01251 temp[2]=region[iColumn2];
01252 temp[3]=region[iColumn3];
01253 __v2df v0 = __builtin_ia32_loadupd (temp);
01254 __v2df v1 = __builtin_ia32_loadupd (temp+2);
01255 __v2df a = __builtin_ia32_loadupd (element);
01256 a *= bb;
01257 v0 OPERATION a;
01258 a = __builtin_ia32_loadupd (element+2);
01259 a *= bb;
01260 v1 OPERATION a;
01261 __builtin_ia32_storeupd (temp, v0);
01262 __builtin_ia32_storeupd (temp+2, v1);
01263 region[iColumn0]=temp[0];
01264 region[iColumn1]=temp[1];
01265 region[iColumn2]=temp[2];
01266 region[iColumn3]=temp[3];
01267 #else
01268 double value0=region[iColumn0];
01269 double value1=region[iColumn1];
01270 double value2=region[iColumn2];
01271 double value3=region[iColumn3];
01272 value0 OPERATION multiplier*element[0];
01273 value1 OPERATION multiplier*element[1];
01274 value2 OPERATION multiplier*element[2];
01275 value3 OPERATION multiplier*element[3];
01276 region[iColumn0]=value0;
01277 region[iColumn1]=value1;
01278 region[iColumn2]=value2;
01279 region[iColumn3]=value3;
01280 #endif
01281 #else
01282 abort();
01283 #endif
01284 element+=NEW_CHUNK_SIZE;
01285 thisColumn += NEW_CHUNK_SIZE;
01286 }
01287 }
01288 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4N)(int numberIn, CoinFactorizationDouble multiplier,
01289 const CoinFactorizationDouble * COIN_RESTRICT element,
01290 CoinFactorizationDouble * COIN_RESTRICT region)
01291 {
01292 assert ((numberIn&3)==0);
01293 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);
01294 int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
01295 #if AVX2==1
01296 double temp[4];
01297 #endif
01298 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
01299 coin_prefetch_const(element+16);
01300 coin_prefetch_const(thisColumn+32);
01301 #if NEW_CHUNK_SIZE==2
01302 int iColumn0=thisColumn[0];
01303 int iColumn1=thisColumn[1];
01304 double value0=region[iColumn0];
01305 double value1=region[iColumn1];
01306 value0 OPERATION multiplier*element[0];
01307 value1 OPERATION multiplier*element[1];
01308 region[iColumn0]=value0;
01309 region[iColumn1]=value1;
01310 #elif NEW_CHUNK_SIZE==4
01311 int iColumn0=thisColumn[0];
01312 int iColumn1=thisColumn[1];
01313 int iColumn2=thisColumn[2];
01314 int iColumn3=thisColumn[3];
01315 #if AVX2==1
01316 __v2df bb;
01317 set_const_v2df(bb,multiplier);
01318 temp[0]=region[iColumn0];
01319 temp[1]=region[iColumn1];
01320 temp[2]=region[iColumn2];
01321 temp[3]=region[iColumn3];
01322 __v2df v0 = __builtin_ia32_loadupd (temp);
01323 __v2df v1 = __builtin_ia32_loadupd (temp+2);
01324 __v2df a = __builtin_ia32_loadupd (element);
01325 a *= bb;
01326 v0 OPERATION a;
01327 a = __builtin_ia32_loadupd (element+2);
01328 a *= bb;
01329 v1 OPERATION a;
01330 __builtin_ia32_storeupd (temp, v0);
01331 __builtin_ia32_storeupd (temp+2, v1);
01332 region[iColumn0]=temp[0];
01333 region[iColumn1]=temp[1];
01334 region[iColumn2]=temp[2];
01335 region[iColumn3]=temp[3];
01336 #else
01337 double value0=region[iColumn0];
01338 double value1=region[iColumn1];
01339 double value2=region[iColumn2];
01340 double value3=region[iColumn3];
01341 value0 OPERATION multiplier*element[0];
01342 value1 OPERATION multiplier*element[1];
01343 value2 OPERATION multiplier*element[2];
01344 value3 OPERATION multiplier*element[3];
01345 region[iColumn0]=value0;
01346 region[iColumn1]=value1;
01347 region[iColumn2]=value2;
01348 region[iColumn3]=value3;
01349 #endif
01350 #else
01351 abort();
01352 #endif
01353 element+=NEW_CHUNK_SIZE;
01354 thisColumn += NEW_CHUNK_SIZE;
01355 }
01356 }
01357 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus1)(int numberIn, CoinFactorizationDouble multiplier,
01358 const CoinFactorizationDouble * COIN_RESTRICT element,
01359 CoinFactorizationDouble * COIN_RESTRICT region)
01360 {
01361 assert ((numberIn&3)==1);
01362 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);
01363 int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
01364 #if AVX2==1
01365 double temp[4];
01366 #endif
01367 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
01368 coin_prefetch_const(element+16);
01369 coin_prefetch_const(thisColumn+32);
01370 #if NEW_CHUNK_SIZE==2
01371 int iColumn0=thisColumn[0];
01372 int iColumn1=thisColumn[1];
01373 double value0=region[iColumn0];
01374 double value1=region[iColumn1];
01375 value0 OPERATION multiplier*element[0];
01376 value1 OPERATION multiplier*element[1];
01377 region[iColumn0]=value0;
01378 region[iColumn1]=value1;
01379 #elif NEW_CHUNK_SIZE==4
01380 int iColumn0=thisColumn[0];
01381 int iColumn1=thisColumn[1];
01382 int iColumn2=thisColumn[2];
01383 int iColumn3=thisColumn[3];
01384 #if AVX2==1
01385 __v2df bb;
01386 set_const_v2df(bb,multiplier);
01387 temp[0]=region[iColumn0];
01388 temp[1]=region[iColumn1];
01389 temp[2]=region[iColumn2];
01390 temp[3]=region[iColumn3];
01391 __v2df v0 = __builtin_ia32_loadupd (temp);
01392 __v2df v1 = __builtin_ia32_loadupd (temp+2);
01393 __v2df a = __builtin_ia32_loadupd (element);
01394 a *= bb;
01395 v0 OPERATION a;
01396 a = __builtin_ia32_loadupd (element+2);
01397 a *= bb;
01398 v1 OPERATION a;
01399 __builtin_ia32_storeupd (temp, v0);
01400 __builtin_ia32_storeupd (temp+2, v1);
01401 region[iColumn0]=temp[0];
01402 region[iColumn1]=temp[1];
01403 region[iColumn2]=temp[2];
01404 region[iColumn3]=temp[3];
01405 #else
01406 double value0=region[iColumn0];
01407 double value1=region[iColumn1];
01408 double value2=region[iColumn2];
01409 double value3=region[iColumn3];
01410 value0 OPERATION multiplier*element[0];
01411 value1 OPERATION multiplier*element[1];
01412 value2 OPERATION multiplier*element[2];
01413 value3 OPERATION multiplier*element[3];
01414 region[iColumn0]=value0;
01415 region[iColumn1]=value1;
01416 region[iColumn2]=value2;
01417 region[iColumn3]=value3;
01418 #endif
01419 #else
01420 abort();
01421 #endif
01422 element+=NEW_CHUNK_SIZE;
01423 thisColumn += NEW_CHUNK_SIZE;
01424 }
01425 int iColumn0=thisColumn[0];
01426 double value0=region[iColumn0];
01427 value0 OPERATION multiplier*element[0];
01428 region[iColumn0]=value0;
01429 }
01430 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus2)(int numberIn, CoinFactorizationDouble multiplier,
01431 const CoinFactorizationDouble * COIN_RESTRICT element,
01432 CoinFactorizationDouble * COIN_RESTRICT region)
01433 {
01434 assert ((numberIn&3)==2);
01435 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);
01436 int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
01437 #if AVX2==1
01438 double temp[4];
01439 #endif
01440 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
01441 coin_prefetch_const(element+16);
01442 coin_prefetch_const(thisColumn+32);
01443 #if NEW_CHUNK_SIZE==2
01444 int iColumn0=thisColumn[0];
01445 int iColumn1=thisColumn[1];
01446 double value0=region[iColumn0];
01447 double value1=region[iColumn1];
01448 value0 OPERATION multiplier*element[0];
01449 value1 OPERATION multiplier*element[1];
01450 region[iColumn0]=value0;
01451 region[iColumn1]=value1;
01452 #elif NEW_CHUNK_SIZE==4
01453 int iColumn0=thisColumn[0];
01454 int iColumn1=thisColumn[1];
01455 int iColumn2=thisColumn[2];
01456 int iColumn3=thisColumn[3];
01457 #if AVX2==1
01458 __v2df bb;
01459 set_const_v2df(bb,multiplier);
01460 temp[0]=region[iColumn0];
01461 temp[1]=region[iColumn1];
01462 temp[2]=region[iColumn2];
01463 temp[3]=region[iColumn3];
01464 __v2df v0 = __builtin_ia32_loadupd (temp);
01465 __v2df v1 = __builtin_ia32_loadupd (temp+2);
01466 __v2df a = __builtin_ia32_loadupd (element);
01467 a *= bb;
01468 v0 OPERATION a;
01469 a = __builtin_ia32_loadupd (element+2);
01470 a *= bb;
01471 v1 OPERATION a;
01472 __builtin_ia32_storeupd (temp, v0);
01473 __builtin_ia32_storeupd (temp+2, v1);
01474 region[iColumn0]=temp[0];
01475 region[iColumn1]=temp[1];
01476 region[iColumn2]=temp[2];
01477 region[iColumn3]=temp[3];
01478 #else
01479 double value0=region[iColumn0];
01480 double value1=region[iColumn1];
01481 double value2=region[iColumn2];
01482 double value3=region[iColumn3];
01483 value0 OPERATION multiplier*element[0];
01484 value1 OPERATION multiplier*element[1];
01485 value2 OPERATION multiplier*element[2];
01486 value3 OPERATION multiplier*element[3];
01487 region[iColumn0]=value0;
01488 region[iColumn1]=value1;
01489 region[iColumn2]=value2;
01490 region[iColumn3]=value3;
01491 #endif
01492 #else
01493 abort();
01494 #endif
01495 element+=NEW_CHUNK_SIZE;
01496 thisColumn += NEW_CHUNK_SIZE;
01497 }
01498 #if NEW_CHUNK_SIZE==4
01499 int iColumn0=thisColumn[0];
01500 int iColumn1=thisColumn[1];
01501 double value0=region[iColumn0];
01502 double value1=region[iColumn1];
01503 value0 OPERATION multiplier*element[0];
01504 value1 OPERATION multiplier*element[1];
01505 region[iColumn0]=value0;
01506 region[iColumn1]=value1;
01507 #endif
01508 }
01509 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus3)(int numberIn, CoinFactorizationDouble multiplier,
01510 const CoinFactorizationDouble * COIN_RESTRICT element,
01511 CoinFactorizationDouble * COIN_RESTRICT region)
01512 {
01513 assert ((numberIn&3)==3);
01514 const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);
01515 int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
01516 #if AVX2==1
01517 double temp[4];
01518 #endif
01519 for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
01520 coin_prefetch_const(element+16);
01521 coin_prefetch_const(thisColumn+32);
01522 #if NEW_CHUNK_SIZE==2
01523 int iColumn0=thisColumn[0];
01524 int iColumn1=thisColumn[1];
01525 double value0=region[iColumn0];
01526 double value1=region[iColumn1];
01527 value0 OPERATION multiplier*element[0];
01528 value1 OPERATION multiplier*element[1];
01529 region[iColumn0]=value0;
01530 region[iColumn1]=value1;
01531 #elif NEW_CHUNK_SIZE==4
01532 int iColumn0=thisColumn[0];
01533 int iColumn1=thisColumn[1];
01534 int iColumn2=thisColumn[2];
01535 int iColumn3=thisColumn[3];
01536 #if AVX2==1
01537 __v2df bb;
01538 set_const_v2df(bb,multiplier);
01539 temp[0]=region[iColumn0];
01540 temp[1]=region[iColumn1];
01541 temp[2]=region[iColumn2];
01542 temp[3]=region[iColumn3];
01543 __v2df v0 = __builtin_ia32_loadupd (temp);
01544 __v2df v1 = __builtin_ia32_loadupd (temp+2);
01545 __v2df a = __builtin_ia32_loadupd (element);
01546 a *= bb;
01547 v0 OPERATION a;
01548 a = __builtin_ia32_loadupd (element+2);
01549 a *= bb;
01550 v1 OPERATION a;
01551 __builtin_ia32_storeupd (temp, v0);
01552 __builtin_ia32_storeupd (temp+2, v1);
01553 region[iColumn0]=temp[0];
01554 region[iColumn1]=temp[1];
01555 region[iColumn2]=temp[2];
01556 region[iColumn3]=temp[3];
01557 #else
01558 double value0=region[iColumn0];
01559 double value1=region[iColumn1];
01560 double value2=region[iColumn2];
01561 double value3=region[iColumn3];
01562 value0 OPERATION multiplier*element[0];
01563 value1 OPERATION multiplier*element[1];
01564 value2 OPERATION multiplier*element[2];
01565 value3 OPERATION multiplier*element[3];
01566 region[iColumn0]=value0;
01567 region[iColumn1]=value1;
01568 region[iColumn2]=value2;
01569 region[iColumn3]=value3;
01570 #endif
01571 #else
01572 abort();
01573 #endif
01574 element+=NEW_CHUNK_SIZE;
01575 thisColumn += NEW_CHUNK_SIZE;
01576 }
01577 #if NEW_CHUNK_SIZE==2
01578 int iColumn0=thisColumn[0];
01579 double value0=region[iColumn0];
01580 value0 OPERATION multiplier*element[0];
01581 region[iColumn0]=value0;
01582 #else
01583 int iColumn0=thisColumn[0];
01584 int iColumn1=thisColumn[1];
01585 int iColumn2=thisColumn[2];
01586 double value0=region[iColumn0];
01587 double value1=region[iColumn1];
01588 double value2=region[iColumn2];
01589 value0 OPERATION multiplier*element[0];
01590 value1 OPERATION multiplier*element[1];
01591 value2 OPERATION multiplier*element[2];
01592 region[iColumn0]=value0;
01593 region[iColumn1]=value1;
01594 region[iColumn2]=value2;
01595 #endif
01596 }
01597 #endif