CoinAbcHelperFunctions.hpp

Go to the documentation of this file.
00001 /* $Id: CoinAbcHelperFunctions.hpp 2042 2014-07-22 10:38:37Z forrest $ */
00002 // Copyright (C) 2003, International Business Machines
00003 // Corporation and others, Copyright (C) 2012, FasterCoin.  All Rights Reserved.
00004 // This code is licensed under the terms of the Eclipse Public License (EPL).
00005 
00006 #ifndef CoinAbcHelperFunctions_H
00007 #define CoinAbcHelperFunctions_H
00008 
00009 #include "ClpConfig.h"
00010 #ifdef HAVE_CMATH
00011 # include <cmath>
00012 #else
00013 # ifdef HAVE_MATH_H
00014 #  include <math.h>
00015 # else
00016 # include <cmath>
00017 # endif
00018 #endif
00019 #include "CoinAbcCommon.hpp"
00020 #ifndef abc_assert
00021 #define abc_assert(condition)                                                   \
00022   { if (!condition) {printf("abc_assert in %s at line %d - %s is false\n", \
00023                             __FILE__, __LINE__, __STRING(condition)); abort();} }
00024 #endif
00025 // cilk_for granularity.
00026 #define CILK_FOR_GRAINSIZE 128
00027 //#define AVX2 2
00028 #if AVX2==1
00029 #include "emmintrin.h"
00030 #elif AVX2==2
00031 #include <immintrin.h>
00032 #elif AVX2==3
00033 #include "avx2intrin.h"
00034 #endif
00035 //#define __AVX__ 1
00036 //#define __AVX2__ 1
00043 #define UNROLL_SCATTER 2
00044 #define INLINE_SCATTER 1
00045 #if INLINE_SCATTER==0
00046 void CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
00047                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00048                           const int *  COIN_RESTRICT thisIndex,
00049                           CoinFactorizationDouble * COIN_RESTRICT region);
00050 #else
00051 void ABC_INLINE inline CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
00052                                             const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00053                                             const int *  COIN_RESTRICT thisIndex,
00054                                             CoinFactorizationDouble * COIN_RESTRICT region)
00055 {
00056 #if UNROLL_SCATTER==0
00057   for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
00058     CoinSimplexInt iRow = thisIndex[j];
00059     CoinFactorizationDouble regionValue = region[iRow];
00060     CoinFactorizationDouble value = thisElement[j];
00061     assert (value);
00062     region[iRow] = regionValue - value * pivotValue;
00063   }
00064 #elif UNROLL_SCATTER==1
00065   if ((number&1)!=0) {
00066     number--;
00067     CoinSimplexInt iRow = thisIndex[number];
00068     CoinFactorizationDouble regionValue = region[iRow];
00069     CoinFactorizationDouble value = thisElement[number];
00070     region[iRow] = regionValue - value * pivotValue;
00071   }
00072   for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
00073     CoinSimplexInt iRow0 = thisIndex[j];
00074     CoinSimplexInt iRow1 = thisIndex[j-1];
00075     CoinFactorizationDouble regionValue0 = region[iRow0];
00076     CoinFactorizationDouble regionValue1 = region[iRow1];
00077     region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
00078     region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
00079   }
00080 #elif UNROLL_SCATTER==2
00081   if ((number&1)!=0) {
00082     number--;
00083     CoinSimplexInt iRow = thisIndex[number];
00084     CoinFactorizationDouble regionValue = region[iRow];
00085     CoinFactorizationDouble value = thisElement[number]; 
00086     region[iRow] = regionValue - value * pivotValue;
00087   }
00088   if ((number&2)!=0) {
00089     CoinSimplexInt iRow0 = thisIndex[number-1];
00090     CoinFactorizationDouble regionValue0 = region[iRow0];
00091     CoinFactorizationDouble value0 = thisElement[number-1]; 
00092     CoinSimplexInt iRow1 = thisIndex[number-2];
00093     CoinFactorizationDouble regionValue1 = region[iRow1];
00094     CoinFactorizationDouble value1 = thisElement[number-2]; 
00095     region[iRow0] = regionValue0 - value0 * pivotValue;
00096     region[iRow1] = regionValue1 - value1 * pivotValue;
00097     number-=2;
00098   } 
00099 #pragma cilk grainsize=CILK_FOR_GRAINSIZE
00100   cilk_for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
00101     CoinSimplexInt iRow0 = thisIndex[j];
00102     CoinSimplexInt iRow1 = thisIndex[j-1];
00103     CoinFactorizationDouble regionValue0 = region[iRow0];
00104     CoinFactorizationDouble regionValue1 = region[iRow1];
00105     region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
00106     region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
00107     CoinSimplexInt iRow2 = thisIndex[j-2];
00108     CoinSimplexInt iRow3 = thisIndex[j-3];
00109     CoinFactorizationDouble regionValue2 = region[iRow2];
00110     CoinFactorizationDouble regionValue3 = region[iRow3];
00111     region[iRow2] = regionValue2 - thisElement[j-2] * pivotValue;
00112     region[iRow3] = regionValue3 - thisElement[j-3] * pivotValue;
00113   }
00114 #elif UNROLL_SCATTER==3
00115   CoinSimplexInt iRow0;
00116   CoinSimplexInt iRow1;
00117   CoinFactorizationDouble regionValue0;
00118   CoinFactorizationDouble regionValue1;
00119   switch(static_cast<unsigned int>(number)) {
00120   case 0:
00121     break;
00122   case 1:
00123     iRow0 = thisIndex[0];
00124     regionValue0 = region[iRow0];
00125     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00126     break;
00127   case 2:
00128     iRow0 = thisIndex[0];
00129     iRow1 = thisIndex[1];
00130     regionValue0 = region[iRow0];
00131     regionValue1 = region[iRow1];
00132     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00133     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00134     break;
00135   case 3:
00136     iRow0 = thisIndex[0];
00137     iRow1 = thisIndex[1];
00138     regionValue0 = region[iRow0];
00139     regionValue1 = region[iRow1];
00140     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00141     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00142     iRow0 = thisIndex[2];
00143     regionValue0 = region[iRow0];
00144     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00145     break;
00146   case 4:
00147     iRow0 = thisIndex[0];
00148     iRow1 = thisIndex[1];
00149     regionValue0 = region[iRow0];
00150     regionValue1 = region[iRow1];
00151     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00152     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00153     iRow0 = thisIndex[2];
00154     iRow1 = thisIndex[3];
00155     regionValue0 = region[iRow0];
00156     regionValue1 = region[iRow1];
00157     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00158     region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00159     break;
00160   case 5:
00161     iRow0 = thisIndex[0];
00162     iRow1 = thisIndex[1];
00163     regionValue0 = region[iRow0];
00164     regionValue1 = region[iRow1];
00165     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00166     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00167     iRow0 = thisIndex[2];
00168     iRow1 = thisIndex[3];
00169     regionValue0 = region[iRow0];
00170     regionValue1 = region[iRow1];
00171     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00172     region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00173     iRow0 = thisIndex[4];
00174     regionValue0 = region[iRow0];
00175     region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
00176     break;
00177   case 6:
00178     iRow0 = thisIndex[0];
00179     iRow1 = thisIndex[1];
00180     regionValue0 = region[iRow0];
00181     regionValue1 = region[iRow1];
00182     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00183     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00184     iRow0 = thisIndex[2];
00185     iRow1 = thisIndex[3];
00186     regionValue0 = region[iRow0];
00187     regionValue1 = region[iRow1];
00188     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00189     region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00190     iRow0 = thisIndex[4];
00191     iRow1 = thisIndex[5];
00192     regionValue0 = region[iRow0];
00193     regionValue1 = region[iRow1];
00194     region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
00195     region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
00196     break;
00197   case 7:
00198     iRow0 = thisIndex[0];
00199     iRow1 = thisIndex[1];
00200     regionValue0 = region[iRow0];
00201     regionValue1 = region[iRow1];
00202     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00203     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00204     iRow0 = thisIndex[2];
00205     iRow1 = thisIndex[3];
00206     regionValue0 = region[iRow0];
00207     regionValue1 = region[iRow1];
00208     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00209     region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00210     iRow0 = thisIndex[4];
00211     iRow1 = thisIndex[5];
00212     regionValue0 = region[iRow0];
00213     regionValue1 = region[iRow1];
00214     region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
00215     region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
00216     iRow0 = thisIndex[6];
00217     regionValue0 = region[iRow0];
00218     region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
00219     break;
00220   case 8:
00221     iRow0 = thisIndex[0];
00222     iRow1 = thisIndex[1];
00223     regionValue0 = region[iRow0];
00224     regionValue1 = region[iRow1];
00225     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00226     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00227     iRow0 = thisIndex[2];
00228     iRow1 = thisIndex[3];
00229     regionValue0 = region[iRow0];
00230     regionValue1 = region[iRow1];
00231     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00232     region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00233     iRow0 = thisIndex[4];
00234     iRow1 = thisIndex[5];
00235     regionValue0 = region[iRow0];
00236     regionValue1 = region[iRow1];
00237     region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
00238     region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
00239     iRow0 = thisIndex[6];
00240     iRow1 = thisIndex[7];
00241     regionValue0 = region[iRow0];
00242     regionValue1 = region[iRow1];
00243     region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
00244     region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
00245     break;
00246   default:
00247     if ((number&1)!=0) {
00248       number--;
00249       CoinSimplexInt iRow = thisIndex[number];
00250       CoinFactorizationDouble regionValue = region[iRow];
00251       CoinFactorizationDouble value = thisElement[number];
00252       region[iRow] = regionValue - value * pivotValue;
00253     }
00254     for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
00255       CoinSimplexInt iRow0 = thisIndex[j];
00256       CoinSimplexInt iRow1 = thisIndex[j-1];
00257       CoinFactorizationDouble regionValue0 = region[iRow0];
00258       CoinFactorizationDouble regionValue1 = region[iRow1];
00259       region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
00260       region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
00261     }
00262     break;
00263   }
00264 #endif
00265 }
00266 void ABC_INLINE inline CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
00267                                             const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00268                                             CoinFactorizationDouble * COIN_RESTRICT region)
00269 {
00270 #if UNROLL_SCATTER==0
00271   const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);
00272   for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
00273     CoinSimplexInt iRow = thisIndex[j];
00274     CoinFactorizationDouble regionValue = region[iRow];
00275     CoinFactorizationDouble value = thisElement[j];
00276     assert (value);
00277     region[iRow] = regionValue - value * pivotValue;
00278   }
00279 #elif UNROLL_SCATTER==1
00280   const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);
00281   if ((number&1)!=0) {
00282     number--;
00283     CoinSimplexInt iRow = thisIndex[number];
00284     CoinFactorizationDouble regionValue = region[iRow];
00285     CoinFactorizationDouble value = thisElement[number];
00286     region[iRow] = regionValue - value * pivotValue;
00287   }
00288   for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
00289     CoinSimplexInt iRow0 = thisIndex[j];
00290     CoinSimplexInt iRow1 = thisIndex[j-1];
00291     CoinFactorizationDouble regionValue0 = region[iRow0];
00292     CoinFactorizationDouble regionValue1 = region[iRow1];
00293     region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
00294     region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
00295   }
00296 #elif UNROLL_SCATTER==2
00297   const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);
00298   if ((number&1)!=0) {
00299     number--;
00300     CoinSimplexInt iRow = thisIndex[number];
00301     CoinFactorizationDouble regionValue = region[iRow];
00302     CoinFactorizationDouble value = thisElement[number]; 
00303     region[iRow] = regionValue - value * pivotValue;
00304   }
00305   if ((number&2)!=0) {
00306     CoinSimplexInt iRow0 = thisIndex[number-1];
00307     CoinFactorizationDouble regionValue0 = region[iRow0];
00308     CoinFactorizationDouble value0 = thisElement[number-1]; 
00309     CoinSimplexInt iRow1 = thisIndex[number-2];
00310     CoinFactorizationDouble regionValue1 = region[iRow1];
00311     CoinFactorizationDouble value1 = thisElement[number-2]; 
00312     region[iRow0] = regionValue0 - value0 * pivotValue;
00313     region[iRow1] = regionValue1 - value1 * pivotValue;
00314     number-=2;
00315   }
00316 #if AVX2==22
00317   CoinFactorizationDouble temp[4] __attribute__ ((aligned (32)));
00318   __m256d pv = _mm256_broadcast_sd(&pivotValue);
00319   for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
00320     __m256d elements=_mm256_loadu_pd(thisElement+j-3);
00321     CoinSimplexInt iRow0 = thisIndex[j-3];
00322     CoinSimplexInt iRow1 = thisIndex[j-2];
00323     CoinSimplexInt iRow2 = thisIndex[j-1];
00324     CoinSimplexInt iRow3 = thisIndex[j-0];
00325     temp[0] = region[iRow0];
00326     temp[1] = region[iRow1];
00327     temp[2] = region[iRow2];
00328     temp[3] = region[iRow3];
00329     __m256d t0=_mm256_load_pd(temp);
00330     t0 -= pv*elements;
00331     _mm256_store_pd (temp, t0);
00332     region[iRow0] = temp[0];
00333     region[iRow1] = temp[1];
00334     region[iRow2] = temp[2];
00335     region[iRow3] = temp[3];
00336   }
00337 #else
00338 #pragma cilk grainsize=CILK_FOR_GRAINSIZE
00339   cilk_for (CoinBigIndex j=number-1 ; j >=0; j-=4 ) {
00340     CoinSimplexInt iRow0 = thisIndex[j];
00341     CoinSimplexInt iRow1 = thisIndex[j-1];
00342     CoinFactorizationDouble regionValue0 = region[iRow0];
00343     CoinFactorizationDouble regionValue1 = region[iRow1];
00344     region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
00345     region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
00346     CoinSimplexInt iRow2 = thisIndex[j-2];
00347     CoinSimplexInt iRow3 = thisIndex[j-3];
00348     CoinFactorizationDouble regionValue2 = region[iRow2];
00349     CoinFactorizationDouble regionValue3 = region[iRow3];
00350     region[iRow2] = regionValue2 - thisElement[j-2] * pivotValue;
00351     region[iRow3] = regionValue3 - thisElement[j-3] * pivotValue;
00352   }
00353 #endif
00354 #elif UNROLL_SCATTER==3
00355   const int * COIN_RESTRICT thisIndex = reinterpret_cast<const int *>(thisElement+number);
00356   CoinSimplexInt iRow0;
00357   CoinSimplexInt iRow1;
00358   CoinFactorizationDouble regionValue0;
00359   CoinFactorizationDouble regionValue1;
00360   switch(static_cast<unsigned int>(number)) {
00361   case 0:
00362     break;
00363   case 1:
00364     iRow0 = thisIndex[0];
00365     regionValue0 = region[iRow0];
00366     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00367     break;
00368   case 2:
00369     iRow0 = thisIndex[0];
00370     iRow1 = thisIndex[1];
00371     regionValue0 = region[iRow0];
00372     regionValue1 = region[iRow1];
00373     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00374     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00375     break;
00376   case 3:
00377     iRow0 = thisIndex[0];
00378     iRow1 = thisIndex[1];
00379     regionValue0 = region[iRow0];
00380     regionValue1 = region[iRow1];
00381     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00382     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00383     iRow0 = thisIndex[2];
00384     regionValue0 = region[iRow0];
00385     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00386     break;
00387   case 4:
00388     iRow0 = thisIndex[0];
00389     iRow1 = thisIndex[1];
00390     regionValue0 = region[iRow0];
00391     regionValue1 = region[iRow1];
00392     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00393     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00394     iRow0 = thisIndex[2];
00395     iRow1 = thisIndex[3];
00396     regionValue0 = region[iRow0];
00397     regionValue1 = region[iRow1];
00398     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00399     region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00400     break;
00401   case 5:
00402     iRow0 = thisIndex[0];
00403     iRow1 = thisIndex[1];
00404     regionValue0 = region[iRow0];
00405     regionValue1 = region[iRow1];
00406     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00407     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00408     iRow0 = thisIndex[2];
00409     iRow1 = thisIndex[3];
00410     regionValue0 = region[iRow0];
00411     regionValue1 = region[iRow1];
00412     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00413     region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00414     iRow0 = thisIndex[4];
00415     regionValue0 = region[iRow0];
00416     region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
00417     break;
00418   case 6:
00419     iRow0 = thisIndex[0];
00420     iRow1 = thisIndex[1];
00421     regionValue0 = region[iRow0];
00422     regionValue1 = region[iRow1];
00423     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00424     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00425     iRow0 = thisIndex[2];
00426     iRow1 = thisIndex[3];
00427     regionValue0 = region[iRow0];
00428     regionValue1 = region[iRow1];
00429     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00430     region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00431     iRow0 = thisIndex[4];
00432     iRow1 = thisIndex[5];
00433     regionValue0 = region[iRow0];
00434     regionValue1 = region[iRow1];
00435     region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
00436     region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
00437     break;
00438   case 7:
00439     iRow0 = thisIndex[0];
00440     iRow1 = thisIndex[1];
00441     regionValue0 = region[iRow0];
00442     regionValue1 = region[iRow1];
00443     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00444     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00445     iRow0 = thisIndex[2];
00446     iRow1 = thisIndex[3];
00447     regionValue0 = region[iRow0];
00448     regionValue1 = region[iRow1];
00449     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00450     region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00451     iRow0 = thisIndex[4];
00452     iRow1 = thisIndex[5];
00453     regionValue0 = region[iRow0];
00454     regionValue1 = region[iRow1];
00455     region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
00456     region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
00457     iRow0 = thisIndex[6];
00458     regionValue0 = region[iRow0];
00459     region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
00460     break;
00461   case 8:
00462     iRow0 = thisIndex[0];
00463     iRow1 = thisIndex[1];
00464     regionValue0 = region[iRow0];
00465     regionValue1 = region[iRow1];
00466     region[iRow0] = regionValue0 - thisElement[0] * pivotValue;
00467     region[iRow1] = regionValue1 - thisElement[1] * pivotValue;
00468     iRow0 = thisIndex[2];
00469     iRow1 = thisIndex[3];
00470     regionValue0 = region[iRow0];
00471     regionValue1 = region[iRow1];
00472     region[iRow0] = regionValue0 - thisElement[2] * pivotValue;
00473     region[iRow1] = regionValue1 - thisElement[3] * pivotValue;
00474     iRow0 = thisIndex[4];
00475     iRow1 = thisIndex[5];
00476     regionValue0 = region[iRow0];
00477     regionValue1 = region[iRow1];
00478     region[iRow0] = regionValue0 - thisElement[4] * pivotValue;
00479     region[iRow1] = regionValue1 - thisElement[5] * pivotValue;
00480     iRow0 = thisIndex[6];
00481     iRow1 = thisIndex[7];
00482     regionValue0 = region[iRow0];
00483     regionValue1 = region[iRow1];
00484     region[iRow0] = regionValue0 - thisElement[6] * pivotValue;
00485     region[iRow1] = regionValue1 - thisElement[7] * pivotValue;
00486     break;
00487   default:
00488     if ((number&1)!=0) {
00489       number--;
00490       CoinSimplexInt iRow = thisIndex[number];
00491       CoinFactorizationDouble regionValue = region[iRow];
00492       CoinFactorizationDouble value = thisElement[number];
00493       region[iRow] = regionValue - value * pivotValue;
00494     }
00495     for (CoinBigIndex j=number-1 ; j >=0; j-=2 ) {
00496       CoinSimplexInt iRow0 = thisIndex[j];
00497       CoinSimplexInt iRow1 = thisIndex[j-1];
00498       CoinFactorizationDouble regionValue0 = region[iRow0];
00499       CoinFactorizationDouble regionValue1 = region[iRow1];
00500       region[iRow0] = regionValue0 - thisElement[j] * pivotValue;
00501       region[iRow1] = regionValue1 - thisElement[j-1] * pivotValue;
00502     }
00503     break;
00504   }
00505 #endif
00506 }
00507 #endif
00508 //#define COIN_PREFETCH
00509 #ifdef COIN_PREFETCH
00510 #if 1
00511 #define coin_prefetch(mem)                                              \
00512   __asm__ __volatile__ ("prefetchnta %0" : : "m" (*(reinterpret_cast<char *>(mem))))
00513 #define coin_prefetch_const(mem)                                        \
00514   __asm__ __volatile__ ("prefetchnta %0" : : "m" (*(reinterpret_cast<const char *>(mem))))
00515 #else
00516 #define coin_prefetch(mem)                                              \
00517   __asm__ __volatile__ ("prefetch %0" : : "m" (*(reinterpret_cast<char *>(mem))))
00518 #define coin_prefetch_const(mem)                                        \
00519   __asm__ __volatile__ ("prefetch %0" : : "m" (*(reinterpret_cast<const char *>(mem))))
00520 #endif
00521 #else
00522 // dummy
00523 #define coin_prefetch(mem)
00524 #define coin_prefetch_const(mem)
00525 #endif
00526 #define NEW_CHUNK_SIZE 4
00527 #define NEW_CHUNK_SIZE_INCREMENT (NEW_CHUNK_SIZE+NEW_CHUNK_SIZE/2);
00528 #define NEW_CHUNK_SIZE_OFFSET (NEW_CHUNK_SIZE/2)
00529 // leaf, pure, nothrow and hot give warnings 
00530 // fastcall and sseregparm give wrong results
00531 //#define SCATTER_ATTRIBUTE __attribute__ ((leaf,fastcall,pure,sseregparm,nothrow,hot))
00532 #define SCATTER_ATTRIBUTE 
00533 typedef void (*scatterUpdate) (int,CoinFactorizationDouble,const CoinFactorizationDouble *, double *) SCATTER_ATTRIBUTE ;
00534 typedef struct {
00535   scatterUpdate functionPointer;
00536   CoinBigIndex offset;
00537   int number;
00538 } scatterStruct;
00539 void CoinAbcScatterUpdate0(int numberIn, CoinFactorizationDouble multiplier,
00540                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00541                           CoinFactorizationDouble * COIN_RESTRICT region)  SCATTER_ATTRIBUTE ;
00542 void CoinAbcScatterUpdate1(int numberIn, CoinFactorizationDouble multiplier,
00543                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00544                            CoinFactorizationDouble * COIN_RESTRICT region)  SCATTER_ATTRIBUTE ;
00545 void CoinAbcScatterUpdate2(int numberIn, CoinFactorizationDouble multiplier,
00546                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00547                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00548 void CoinAbcScatterUpdate3(int numberIn, CoinFactorizationDouble multiplier,
00549                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00550                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00551 void CoinAbcScatterUpdate4(int numberIn, CoinFactorizationDouble multiplier,
00552                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00553                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00554 void CoinAbcScatterUpdate5(int numberIn, CoinFactorizationDouble multiplier,
00555                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00556                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00557 void CoinAbcScatterUpdate6(int numberIn, CoinFactorizationDouble multiplier,
00558                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00559                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00560 void CoinAbcScatterUpdate7(int numberIn, CoinFactorizationDouble multiplier,
00561                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00562                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00563 void CoinAbcScatterUpdate8(int numberIn, CoinFactorizationDouble multiplier,
00564                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00565                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00566 void CoinAbcScatterUpdate4N(int numberIn, CoinFactorizationDouble multiplier,
00567                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00568                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00569 void CoinAbcScatterUpdate4NPlus1(int numberIn, CoinFactorizationDouble multiplier,
00570                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00571                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00572 void CoinAbcScatterUpdate4NPlus2(int numberIn, CoinFactorizationDouble multiplier,
00573                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00574                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00575 void CoinAbcScatterUpdate4NPlus3(int numberIn, CoinFactorizationDouble multiplier,
00576                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00577                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00578 void CoinAbcScatterUpdate1Subtract(int numberIn, CoinFactorizationDouble multiplier,
00579                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00580                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00581 void CoinAbcScatterUpdate2Subtract(int numberIn, CoinFactorizationDouble multiplier,
00582                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00583                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00584 void CoinAbcScatterUpdate3Subtract(int numberIn, CoinFactorizationDouble multiplier,
00585                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00586                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00587 void CoinAbcScatterUpdate4Subtract(int numberIn, CoinFactorizationDouble multiplier,
00588                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00589                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00590 void CoinAbcScatterUpdate5Subtract(int numberIn, CoinFactorizationDouble multiplier,
00591                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00592                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00593 void CoinAbcScatterUpdate6Subtract(int numberIn, CoinFactorizationDouble multiplier,
00594                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00595                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00596 void CoinAbcScatterUpdate7Subtract(int numberIn, CoinFactorizationDouble multiplier,
00597                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00598                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00599 void CoinAbcScatterUpdate8Subtract(int numberIn, CoinFactorizationDouble multiplier,
00600                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00601                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00602 void CoinAbcScatterUpdate4NSubtract(int numberIn, CoinFactorizationDouble multiplier,
00603                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00604                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00605 void CoinAbcScatterUpdate4NPlus1Subtract(int numberIn, CoinFactorizationDouble multiplier,
00606                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00607                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00608 void CoinAbcScatterUpdate4NPlus2Subtract(int numberIn, CoinFactorizationDouble multiplier,
00609                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00610                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00611 void CoinAbcScatterUpdate4NPlus3Subtract(int numberIn, CoinFactorizationDouble multiplier,
00612                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00613                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00614 void CoinAbcScatterUpdate1Add(int numberIn, CoinFactorizationDouble multiplier,
00615                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00616                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00617 void CoinAbcScatterUpdate2Add(int numberIn, CoinFactorizationDouble multiplier,
00618                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00619                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00620 void CoinAbcScatterUpdate3Add(int numberIn, CoinFactorizationDouble multiplier,
00621                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00622                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00623 void CoinAbcScatterUpdate4Add(int numberIn, CoinFactorizationDouble multiplier,
00624                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00625                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00626 void CoinAbcScatterUpdate5Add(int numberIn, CoinFactorizationDouble multiplier,
00627                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00628                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00629 void CoinAbcScatterUpdate6Add(int numberIn, CoinFactorizationDouble multiplier,
00630                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00631                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00632 void CoinAbcScatterUpdate7Add(int numberIn, CoinFactorizationDouble multiplier,
00633                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00634                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00635 void CoinAbcScatterUpdate8Add(int numberIn, CoinFactorizationDouble multiplier,
00636                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00637                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00638 void CoinAbcScatterUpdate4NAdd(int numberIn, CoinFactorizationDouble multiplier,
00639                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00640                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00641 void CoinAbcScatterUpdate4NPlus1Add(int numberIn, CoinFactorizationDouble multiplier,
00642                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00643                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00644 void CoinAbcScatterUpdate4NPlus2Add(int numberIn, CoinFactorizationDouble multiplier,
00645                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00646                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00647 void CoinAbcScatterUpdate4NPlus3Add(int numberIn, CoinFactorizationDouble multiplier,
00648                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00649                           CoinFactorizationDouble * COIN_RESTRICT region) SCATTER_ATTRIBUTE ;
00650 #if INLINE_SCATTER==0
00651 void CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
00652                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00653                           const int *  COIN_RESTRICT thisIndex,
00654                           CoinFactorizationDouble * COIN_RESTRICT region,
00655                           double * COIN_RESTRICT work);
00656 #else
00657 #if 0
00658 void ABC_INLINE inline CoinAbcScatterUpdate(int number,CoinFactorizationDouble pivotValue,
00659                                             const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00660                                             const int *  COIN_RESTRICT thisIndex,
00661                                             CoinFactorizationDouble * COIN_RESTRICT region,
00662                                             double * COIN_RESTRICT /*work*/)
00663 {
00664 #if UNROLL_SCATTER==0
00665   for (CoinBigIndex j=number-1 ; j >=0; j-- ) {
00666     CoinSimplexInt iRow = thisIndex[j];
00667     CoinFactorizationDouble regionValue = region[iRow];
00668     CoinFactorizationDouble value = thisElement[j];
00669     assert (value);
00670     region[iRow] = regionValue - value * pivotValue;
00671   }
00672 #elif UNROLL_SCATTER==1
00673   if ((number&1)!=0) {
00674     CoinSimplexInt iRow = thisIndex[0];
00675     thisIndex++;
00676     CoinFactorizationDouble regionValue = region[iRow];
00677     CoinFactorizationDouble value = thisElement[0];
00678     thisElement++;
00679     region[iRow] = regionValue - value * pivotValue;
00680   }
00681   number = number>>1;
00682   CoinFactorizationDouble work2[4];
00683   for ( ; number !=0; number-- ) {
00684     CoinSimplexInt iRow0 = thisIndex[0];
00685     CoinSimplexInt iRow1 = thisIndex[1];
00686     work2[0] = region[iRow0];
00687     work2[1] = region[iRow1];
00688 #if 0
00689     work2[2] = region[iRow0];
00690     work2[3] = region[iRow1];
00691     //__v4df b = __builtin_ia32_maskloadpd256(work2);
00692     __v4df b = __builtin_ia32_loadupd256(work2);
00693     //__v4df b = _mm256_load_pd(work2);
00694 #endif
00695     work2[0] -= thisElement[0] * pivotValue;
00696     work2[1] -= thisElement[1] * pivotValue;
00697     region[iRow0] = work2[0];
00698     region[iRow1] = work2[1];
00699     thisIndex+=2;
00700     thisElement+=2;
00701   }
00702 #endif
00703 }
00704 #endif
00705 #endif
00706 #define UNROLL_GATHER 0
00707 #define INLINE_GATHER 1
00708 #if INLINE_GATHER==0
00709 CoinFactorizationDouble CoinAbcGatherUpdate(CoinSimplexInt number,
00710                           const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00711                           const int *  COIN_RESTRICT thisIndex,
00712                            CoinFactorizationDouble * COIN_RESTRICT region);
00713 #else
00714 CoinFactorizationDouble ABC_INLINE inline CoinAbcGatherUpdate(CoinSimplexInt number,
00715                                              const CoinFactorizationDouble *  COIN_RESTRICT thisElement,
00716                                              const int *  COIN_RESTRICT thisIndex,
00717                                              CoinFactorizationDouble * COIN_RESTRICT region)
00718 {
00719 #if UNROLL_GATHER==0
00720   CoinFactorizationDouble pivotValue=0.0;
00721   for (CoinBigIndex j = 0; j < number; j ++ ) {
00722     CoinFactorizationDouble value = thisElement[j];
00723     CoinSimplexInt jRow = thisIndex[j];
00724     value *= region[jRow];
00725     pivotValue -= value;
00726   }
00727   return pivotValue;
00728 #else
00729 #error code
00730 #endif
00731 }
00732 #endif
00733 #define UNROLL_MULTIPLY_INDEXED 0
00734 #define INLINE_MULTIPLY_INDEXED 0
00735 #if INLINE_MULTIPLY_INDEXED==0
00736 void CoinAbcMultiplyIndexed(int number,
00737                             const double *  COIN_RESTRICT multiplier,
00738                             const int *  COIN_RESTRICT thisIndex,
00739                             CoinFactorizationDouble * COIN_RESTRICT region);
00740 void CoinAbcMultiplyIndexed(int number,
00741                             const long double *  COIN_RESTRICT multiplier,
00742                             const int *  COIN_RESTRICT thisIndex,
00743                             long double * COIN_RESTRICT region);
00744 #else
00745 void ABC_INLINE inline CoinAbcMultiplyIndexed(int number,
00746                             const double *  COIN_RESTRICT multiplier,
00747                             const int *  COIN_RESTRICT thisIndex,
00748                             CoinFactorizationDouble * COIN_RESTRICT region)
00749 {
00750 }
00751 #endif
00752 double CoinAbcMaximumAbsElement(const double * region, int size);
00753 void CoinAbcMinMaxAbsElement(const double * region, int size,double & minimum , double & maximum);
00754 void CoinAbcMinMaxAbsNormalValues(const double * region, int size,double & minimum , double & maximum);
00755 void CoinAbcScale(double * region, double multiplier,int size);
00756 void CoinAbcScaleNormalValues(double * region, double multiplier,double killIfLessThanThis,int size);
00758 double CoinAbcMaximumAbsElementAndScale(double * region, double multiplier,int size);
00759 void CoinAbcSetElements(double * region, int size, double value);
00760 void CoinAbcMultiplyAdd(const double * region1, int size, double multiplier1,
00761                  double * regionChanged, double multiplier2);
00762 double CoinAbcInnerProduct(const double * region1, int size, const double * region2);
00763 void CoinAbcGetNorms(const double * region, int size, double & norm1, double & norm2);
00765 void CoinAbcScatterTo(const double * regionFrom, double * regionTo, const int * index,int number);
00767 void CoinAbcGatherFrom(const double * regionFrom, double * regionTo, const int * index,int number);
00769 void CoinAbcScatterZeroTo(double * regionTo, const int * index,int number);
00771 void CoinAbcScatterToList(const double * regionFrom, double * regionTo, 
00772                    const int * indexList, const int * indexScatter ,int number);
00774 void CoinAbcInverseSqrts(double * array, int n);
00775 void CoinAbcReciprocal(double * array, int n, const double *input);
00776 void CoinAbcMemcpyLong(double * array,const double * arrayFrom,int size);
00777 void CoinAbcMemcpyLong(int * array,const int * arrayFrom,int size);
00778 void CoinAbcMemcpyLong(unsigned char * array,const unsigned char * arrayFrom,int size);
00779 void CoinAbcMemset0Long(double * array,int size);
00780 void CoinAbcMemset0Long(int * array,int size);
00781 void CoinAbcMemset0Long(unsigned char * array,int size);
00782 void CoinAbcMemmove(double * array,const double * arrayFrom,int size);
00783 void CoinAbcMemmove(int * array,const int * arrayFrom,int size);
00784 void CoinAbcMemmove(unsigned char * array,const unsigned char * arrayFrom,int size);
00786 void CoinAbcMemmoveAndZero(double * array,double * arrayFrom,int size);
00788 int CoinAbcCompact(int numberSections,int alreadyDone,double * array,const int * starts, const int * lengths); 
00790 int CoinAbcCompact(int numberSections,int alreadyDone,int * array,const int * starts, const int * lengths); 
00791 #endif
00792 #if ABC_CREATE_SCATTER_FUNCTION
00793 SCATTER_ATTRIBUTE void functionName(ScatterUpdate1)(int numberIn, CoinFactorizationDouble multiplier,
00794                           const CoinFactorizationDouble *  COIN_RESTRICT element,
00795                           CoinFactorizationDouble * COIN_RESTRICT region)
00796 {
00797 #ifndef NDEBUG
00798   assert (numberIn==1);
00799 #endif
00800   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+1);
00801   int iColumn0=thisColumn[0];
00802   double value0=region[iColumn0];
00803   value0 OPERATION multiplier*element[0];
00804   region[iColumn0]=value0;
00805 }
00806 SCATTER_ATTRIBUTE void functionName(ScatterUpdate2)(int numberIn, CoinFactorizationDouble multiplier,
00807                           const CoinFactorizationDouble *  COIN_RESTRICT element,
00808                           CoinFactorizationDouble * COIN_RESTRICT region)
00809 {
00810 #ifndef NDEBUG
00811   assert (numberIn==2);
00812 #endif
00813   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+2);
00814 #if NEW_CHUNK_SIZE==2
00815   int nFull=2&(~(NEW_CHUNK_SIZE-1));
00816   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
00817     coin_prefetch(element+NEW_CHUNK_SIZE_INCREMENT);
00818     int iColumn0=thisColumn[0];
00819     int iColumn1=thisColumn[1];
00820     CoinFactorizationDouble value0=region[iColumn0];
00821     CoinFactorizationDouble value1=region[iColumn1];
00822     value0 OPERATION multiplier*element[0+NEW_CHUNK_SIZE_OFFSET];
00823     value1 OPERATION multiplier*element[1+NEW_CHUNK_SIZE_OFFSET];
00824     region[iColumn0]=value0;
00825     region[iColumn1]=value1;
00826     element+=NEW_CHUNK_SIZE_INCREMENT;
00827     thisColumn = reinterpret_cast<const int *>(element);
00828   }
00829 #endif
00830 #if NEW_CHUNK_SIZE==4
00831   int iColumn0=thisColumn[0];
00832   int iColumn1=thisColumn[1];
00833   CoinFactorizationDouble value0=region[iColumn0];
00834   CoinFactorizationDouble value1=region[iColumn1];
00835   value0 OPERATION multiplier*element[0];
00836   value1 OPERATION multiplier*element[1];
00837   region[iColumn0]=value0;
00838   region[iColumn1]=value1;
00839 #endif
00840 }
00841 SCATTER_ATTRIBUTE void functionName(ScatterUpdate3)(int numberIn, CoinFactorizationDouble multiplier,
00842                           const CoinFactorizationDouble *  COIN_RESTRICT element,
00843                           CoinFactorizationDouble * COIN_RESTRICT region)
00844 {
00845 #ifndef NDEBUG
00846   assert (numberIn==3);
00847 #endif
00848   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+3);
00849 #if AVX2==1
00850   double temp[2];
00851 #endif
00852 #if NEW_CHUNK_SIZE==2
00853   int nFull=3&(~(NEW_CHUNK_SIZE-1));
00854   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
00855     //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
00856     int iColumn0=thisColumn[0];
00857     int iColumn1=thisColumn[1];
00858     CoinFactorizationDouble value0=region[iColumn0];
00859     CoinFactorizationDouble value1=region[iColumn1];
00860     value0 OPERATION multiplier*element[0];
00861     value1 OPERATION multiplier*element[1];
00862     region[iColumn0]=value0;
00863     region[iColumn1]=value1;
00864     element+=NEW_CHUNK_SIZE;
00865     thisColumn+ = NEW_CHUNK_SIZE;
00866   }
00867 #endif
00868 #if NEW_CHUNK_SIZE==2
00869   int iColumn0=thisColumn[0];
00870   double value0=region[iColumn0];
00871   value0 OPERATION multiplier*element[0];
00872   region[iColumn0]=value0;
00873 #else
00874   int iColumn0=thisColumn[0];
00875   int iColumn1=thisColumn[1];
00876   int iColumn2=thisColumn[2];
00877 #if AVX2==1
00878   __v2df bb;
00879   double value2=region[iColumn2];
00880   value2 OPERATION multiplier*element[2];
00881   set_const_v2df(bb,multiplier);
00882   temp[0]=region[iColumn0];
00883   temp[1]=region[iColumn1];
00884   region[iColumn2]=value2;
00885   __v2df v0 = __builtin_ia32_loadupd (temp);
00886   __v2df a = __builtin_ia32_loadupd (element);
00887   a *= bb;
00888   v0 OPERATION a;
00889   __builtin_ia32_storeupd (temp, v0);
00890   region[iColumn0]=temp[0];
00891   region[iColumn1]=temp[1];
00892 #else
00893   double value0=region[iColumn0];
00894   double value1=region[iColumn1];
00895   double value2=region[iColumn2];
00896   value0 OPERATION multiplier*element[0];
00897   value1 OPERATION multiplier*element[1];
00898   value2 OPERATION multiplier*element[2];
00899   region[iColumn0]=value0;
00900   region[iColumn1]=value1;
00901   region[iColumn2]=value2;
00902 #endif
00903 #endif
00904 }
00905 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4)(int numberIn, CoinFactorizationDouble multiplier,
00906                            const CoinFactorizationDouble *  COIN_RESTRICT element,
00907                            CoinFactorizationDouble * COIN_RESTRICT region)
00908 {
00909 #ifndef NDEBUG
00910   assert (numberIn==4);
00911 #endif
00912   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+4);
00913   int nFull=4&(~(NEW_CHUNK_SIZE-1));
00914 #if AVX2==1
00915   double temp[4];
00916 #endif
00917   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
00918     //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
00919 #if NEW_CHUNK_SIZE==2
00920     int iColumn0=thisColumn[0];
00921     int iColumn1=thisColumn[1];
00922     double value0=region[iColumn0];
00923     double value1=region[iColumn1];
00924     value0 OPERATION multiplier*element[0];
00925     value1 OPERATION multiplier*element[1];
00926     region[iColumn0]=value0;
00927     region[iColumn1]=value1;
00928 #elif NEW_CHUNK_SIZE==4
00929     int iColumn0=thisColumn[0];
00930     int iColumn1=thisColumn[1];
00931     int iColumn2=thisColumn[2];
00932     int iColumn3=thisColumn[3];
00933 #if AVX2==1
00934     __v2df bb;
00935     set_const_v2df(bb,multiplier);
00936     temp[0]=region[iColumn0];
00937     temp[1]=region[iColumn1];
00938     temp[2]=region[iColumn2];
00939     temp[3]=region[iColumn3];
00940     __v2df v0 = __builtin_ia32_loadupd (temp);
00941     __v2df v1 = __builtin_ia32_loadupd (temp+2);
00942     __v2df a = __builtin_ia32_loadupd (element);
00943     a *= bb;
00944     v0 OPERATION a;
00945     a = __builtin_ia32_loadupd (element+2);
00946     a *= bb;
00947     v1 OPERATION a;
00948     __builtin_ia32_storeupd (temp, v0);
00949     __builtin_ia32_storeupd (temp+2, v1);
00950     region[iColumn0]=temp[0];
00951     region[iColumn1]=temp[1];
00952     region[iColumn2]=temp[2];
00953     region[iColumn3]=temp[3];
00954 #else
00955     double value0=region[iColumn0];
00956     double value1=region[iColumn1];
00957     double value2=region[iColumn2];
00958     double value3=region[iColumn3];
00959     value0 OPERATION multiplier*element[0];
00960     value1 OPERATION multiplier*element[1];
00961     value2 OPERATION multiplier*element[2];
00962     value3 OPERATION multiplier*element[3];
00963     region[iColumn0]=value0;
00964     region[iColumn1]=value1;
00965     region[iColumn2]=value2;
00966     region[iColumn3]=value3;
00967 #endif
00968 #else
00969     abort();
00970 #endif
00971     element+=NEW_CHUNK_SIZE;
00972     thisColumn += NEW_CHUNK_SIZE;
00973   }
00974 }
00975 SCATTER_ATTRIBUTE void functionName(ScatterUpdate5)(int numberIn, CoinFactorizationDouble multiplier,
00976                            const CoinFactorizationDouble *  COIN_RESTRICT element,
00977                            CoinFactorizationDouble * COIN_RESTRICT region)
00978 {
00979 #ifndef NDEBUG
00980   assert (numberIn==5);
00981 #endif
00982   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+5);
00983   int nFull=5&(~(NEW_CHUNK_SIZE-1));
00984 #if AVX2==1
00985   double temp[4];
00986 #endif
00987   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
00988     //coin_prefetch_const(element+NEW_CHUNK_SIZE_INCREMENT);
00989 #if NEW_CHUNK_SIZE==2
00990     int iColumn0=thisColumn[0];
00991     int iColumn1=thisColumn[1];
00992     double value0=region[iColumn0];
00993     double value1=region[iColumn1];
00994     value0 OPERATION multiplier*element[0];
00995     value1 OPERATION multiplier*element[1];
00996     region[iColumn0]=value0;
00997     region[iColumn1]=value1;
00998 #elif NEW_CHUNK_SIZE==4
00999     int iColumn0=thisColumn[0];
01000     int iColumn1=thisColumn[1];
01001     int iColumn2=thisColumn[2];
01002     int iColumn3=thisColumn[3];
01003 #if AVX2==1
01004     __v2df bb;
01005     set_const_v2df(bb,multiplier);
01006     temp[0]=region[iColumn0];
01007     temp[1]=region[iColumn1];
01008     temp[2]=region[iColumn2];
01009     temp[3]=region[iColumn3];
01010     __v2df v0 = __builtin_ia32_loadupd (temp);
01011     __v2df v1 = __builtin_ia32_loadupd (temp+2);
01012     __v2df a = __builtin_ia32_loadupd (element);
01013     a *= bb;
01014     v0 OPERATION a;
01015     a = __builtin_ia32_loadupd (element+2);
01016     a *= bb;
01017     v1 OPERATION a;
01018     __builtin_ia32_storeupd (temp, v0);
01019     __builtin_ia32_storeupd (temp+2, v1);
01020     region[iColumn0]=temp[0];
01021     region[iColumn1]=temp[1];
01022     region[iColumn2]=temp[2];
01023     region[iColumn3]=temp[3];
01024 #else
01025     double value0=region[iColumn0];
01026     double value1=region[iColumn1];
01027     double value2=region[iColumn2];
01028     double value3=region[iColumn3];
01029     value0 OPERATION multiplier*element[0];
01030     value1 OPERATION multiplier*element[1];
01031     value2 OPERATION multiplier*element[2];
01032     value3 OPERATION multiplier*element[3];
01033     region[iColumn0]=value0;
01034     region[iColumn1]=value1;
01035     region[iColumn2]=value2;
01036     region[iColumn3]=value3;
01037 #endif
01038 #else
01039     abort();
01040 #endif
01041     element+=NEW_CHUNK_SIZE;
01042     thisColumn += NEW_CHUNK_SIZE;
01043   }
01044   int iColumn0=thisColumn[0];
01045   double value0=region[iColumn0];
01046   value0 OPERATION multiplier*element[0];
01047   region[iColumn0]=value0;
01048 }
01049 SCATTER_ATTRIBUTE void functionName(ScatterUpdate6)(int numberIn, CoinFactorizationDouble multiplier,
01050                            const CoinFactorizationDouble *  COIN_RESTRICT element,
01051                            CoinFactorizationDouble * COIN_RESTRICT region)
01052 {
01053 #ifndef NDEBUG
01054   assert (numberIn==6);
01055 #endif
01056   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+6);
01057   int nFull=6&(~(NEW_CHUNK_SIZE-1));
01058 #if AVX2==1
01059   double temp[4];
01060 #endif
01061   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
01062     coin_prefetch_const(element+6);
01063 #if NEW_CHUNK_SIZE==2
01064     int iColumn0=thisColumn[0];
01065     int iColumn1=thisColumn[1];
01066     double value0=region[iColumn0];
01067     double value1=region[iColumn1];
01068     value0 OPERATION multiplier*element[0];
01069     value1 OPERATION multiplier*element[1];
01070     region[iColumn0]=value0;
01071     region[iColumn1]=value1;
01072 #elif NEW_CHUNK_SIZE==4
01073     int iColumn0=thisColumn[0];
01074     int iColumn1=thisColumn[1];
01075     int iColumn2=thisColumn[2];
01076     int iColumn3=thisColumn[3];
01077 #if AVX2==1
01078     __v2df bb;
01079     set_const_v2df(bb,multiplier);
01080     temp[0]=region[iColumn0];
01081     temp[1]=region[iColumn1];
01082     temp[2]=region[iColumn2];
01083     temp[3]=region[iColumn3];
01084     __v2df v0 = __builtin_ia32_loadupd (temp);
01085     __v2df v1 = __builtin_ia32_loadupd (temp+2);
01086     __v2df a = __builtin_ia32_loadupd (element);
01087     a *= bb;
01088     v0 OPERATION a;
01089     a = __builtin_ia32_loadupd (element+2);
01090     a *= bb;
01091     v1 OPERATION a;
01092     __builtin_ia32_storeupd (temp, v0);
01093     __builtin_ia32_storeupd (temp+2, v1);
01094     region[iColumn0]=temp[0];
01095     region[iColumn1]=temp[1];
01096     region[iColumn2]=temp[2];
01097     region[iColumn3]=temp[3];
01098 #else
01099     double value0=region[iColumn0];
01100     double value1=region[iColumn1];
01101     double value2=region[iColumn2];
01102     double value3=region[iColumn3];
01103     value0 OPERATION multiplier*element[0];
01104     value1 OPERATION multiplier*element[1];
01105     value2 OPERATION multiplier*element[2];
01106     value3 OPERATION multiplier*element[3];
01107     region[iColumn0]=value0;
01108     region[iColumn1]=value1;
01109     region[iColumn2]=value2;
01110     region[iColumn3]=value3;
01111 #endif
01112 #else
01113     abort();
01114 #endif
01115     element+=NEW_CHUNK_SIZE;
01116     thisColumn += NEW_CHUNK_SIZE;
01117   }
01118 #if NEW_CHUNK_SIZE==4
01119   int iColumn0=thisColumn[0];
01120   int iColumn1=thisColumn[1];
01121   double value0=region[iColumn0];
01122   double value1=region[iColumn1];
01123   value0 OPERATION multiplier*element[0];
01124   value1 OPERATION multiplier*element[1];
01125   region[iColumn0]=value0;
01126   region[iColumn1]=value1;
01127 #endif
01128 }
01129 SCATTER_ATTRIBUTE void functionName(ScatterUpdate7)(int numberIn, CoinFactorizationDouble multiplier,
01130                            const CoinFactorizationDouble *  COIN_RESTRICT element,
01131                            CoinFactorizationDouble * COIN_RESTRICT region)
01132 {
01133 #ifndef NDEBUG
01134   assert (numberIn==7);
01135 #endif
01136   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+7);
01137   int nFull=7&(~(NEW_CHUNK_SIZE-1));
01138 #if AVX2==1
01139   double temp[4];
01140 #endif
01141   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
01142     coin_prefetch_const(element+6);
01143 #if NEW_CHUNK_SIZE==2
01144     int iColumn0=thisColumn[0];
01145     int iColumn1=thisColumn[1];
01146     double value0=region[iColumn0];
01147     double value1=region[iColumn1];
01148     value0 OPERATION multiplier*element[0];
01149     value1 OPERATION multiplier*element[1];
01150     region[iColumn0]=value0;
01151     region[iColumn1]=value1;
01152 #elif NEW_CHUNK_SIZE==4
01153     int iColumn0=thisColumn[0];
01154     int iColumn1=thisColumn[1];
01155     int iColumn2=thisColumn[2];
01156     int iColumn3=thisColumn[3];
01157 #if AVX2==1
01158     __v2df bb;
01159     set_const_v2df(bb,multiplier);
01160     temp[0]=region[iColumn0];
01161     temp[1]=region[iColumn1];
01162     temp[2]=region[iColumn2];
01163     temp[3]=region[iColumn3];
01164     __v2df v0 = __builtin_ia32_loadupd (temp);
01165     __v2df v1 = __builtin_ia32_loadupd (temp+2);
01166     __v2df a = __builtin_ia32_loadupd (element);
01167     a *= bb;
01168     v0 OPERATION a;
01169     a = __builtin_ia32_loadupd (element+2);
01170     a *= bb;
01171     v1 OPERATION a;
01172     __builtin_ia32_storeupd (temp, v0);
01173     __builtin_ia32_storeupd (temp+2, v1);
01174     region[iColumn0]=temp[0];
01175     region[iColumn1]=temp[1];
01176     region[iColumn2]=temp[2];
01177     region[iColumn3]=temp[3];
01178 #else
01179     double value0=region[iColumn0];
01180     double value1=region[iColumn1];
01181     double value2=region[iColumn2];
01182     double value3=region[iColumn3];
01183     value0 OPERATION multiplier*element[0];
01184     value1 OPERATION multiplier*element[1];
01185     value2 OPERATION multiplier*element[2];
01186     value3 OPERATION multiplier*element[3];
01187     region[iColumn0]=value0;
01188     region[iColumn1]=value1;
01189     region[iColumn2]=value2;
01190     region[iColumn3]=value3;
01191 #endif
01192 #else
01193     abort();
01194 #endif
01195     element+=NEW_CHUNK_SIZE;
01196     thisColumn += NEW_CHUNK_SIZE;
01197   }
01198 #if NEW_CHUNK_SIZE==2
01199   int iColumn0=thisColumn[0];
01200   double value0=region[iColumn0];
01201   value0 OPERATION multiplier*element[0];
01202   region[iColumn0]=value0;
01203 #else
01204   int iColumn0=thisColumn[0];
01205   int iColumn1=thisColumn[1];
01206   int iColumn2=thisColumn[2];
01207   double value0=region[iColumn0];
01208   double value1=region[iColumn1];
01209   double value2=region[iColumn2];
01210   value0 OPERATION multiplier*element[0];
01211   value1 OPERATION multiplier*element[1];
01212   value2 OPERATION multiplier*element[2];
01213   region[iColumn0]=value0;
01214   region[iColumn1]=value1;
01215   region[iColumn2]=value2;
01216 #endif
01217 }
01218 SCATTER_ATTRIBUTE void functionName(ScatterUpdate8)(int numberIn, CoinFactorizationDouble multiplier,
01219                            const CoinFactorizationDouble *  COIN_RESTRICT element,
01220                            CoinFactorizationDouble * COIN_RESTRICT region)
01221 {
01222 #ifndef NDEBUG
01223   assert (numberIn==8);
01224 #endif
01225   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+8);
01226   int nFull=8&(~(NEW_CHUNK_SIZE-1));
01227 #if AVX2==1
01228   double temp[4];
01229 #endif
01230   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
01231     coin_prefetch_const(element+6);
01232 #if NEW_CHUNK_SIZE==2
01233     int iColumn0=thisColumn[0];
01234     int iColumn1=thisColumn[1];
01235     double value0=region[iColumn0];
01236     double value1=region[iColumn1];
01237     value0 OPERATION multiplier*element[0];
01238     value1 OPERATION multiplier*element[1];
01239     region[iColumn0]=value0;
01240     region[iColumn1]=value1;
01241 #elif NEW_CHUNK_SIZE==4
01242     int iColumn0=thisColumn[0];
01243     int iColumn1=thisColumn[1];
01244     int iColumn2=thisColumn[2];
01245     int iColumn3=thisColumn[3];
01246 #if AVX2==1
01247     __v2df bb;
01248     set_const_v2df(bb,multiplier);
01249     temp[0]=region[iColumn0];
01250     temp[1]=region[iColumn1];
01251     temp[2]=region[iColumn2];
01252     temp[3]=region[iColumn3];
01253     __v2df v0 = __builtin_ia32_loadupd (temp);
01254     __v2df v1 = __builtin_ia32_loadupd (temp+2);
01255     __v2df a = __builtin_ia32_loadupd (element);
01256     a *= bb;
01257     v0 OPERATION a;
01258     a = __builtin_ia32_loadupd (element+2);
01259     a *= bb;
01260     v1 OPERATION a;
01261     __builtin_ia32_storeupd (temp, v0);
01262     __builtin_ia32_storeupd (temp+2, v1);
01263     region[iColumn0]=temp[0];
01264     region[iColumn1]=temp[1];
01265     region[iColumn2]=temp[2];
01266     region[iColumn3]=temp[3];
01267 #else
01268     double value0=region[iColumn0];
01269     double value1=region[iColumn1];
01270     double value2=region[iColumn2];
01271     double value3=region[iColumn3];
01272     value0 OPERATION multiplier*element[0];
01273     value1 OPERATION multiplier*element[1];
01274     value2 OPERATION multiplier*element[2];
01275     value3 OPERATION multiplier*element[3];
01276     region[iColumn0]=value0;
01277     region[iColumn1]=value1;
01278     region[iColumn2]=value2;
01279     region[iColumn3]=value3;
01280 #endif
01281 #else
01282     abort();
01283 #endif
01284     element+=NEW_CHUNK_SIZE;
01285     thisColumn += NEW_CHUNK_SIZE;
01286   }
01287 }
01288 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4N)(int numberIn, CoinFactorizationDouble multiplier,
01289                             const CoinFactorizationDouble *  COIN_RESTRICT element,
01290                             CoinFactorizationDouble * COIN_RESTRICT region)
01291 {
01292   assert ((numberIn&3)==0);
01293   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);
01294   int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
01295 #if AVX2==1
01296   double temp[4];
01297 #endif
01298   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
01299     coin_prefetch_const(element+16);
01300     coin_prefetch_const(thisColumn+32);
01301 #if NEW_CHUNK_SIZE==2
01302     int iColumn0=thisColumn[0];
01303     int iColumn1=thisColumn[1];
01304     double value0=region[iColumn0];
01305     double value1=region[iColumn1];
01306     value0 OPERATION multiplier*element[0];
01307     value1 OPERATION multiplier*element[1];
01308     region[iColumn0]=value0;
01309     region[iColumn1]=value1;
01310 #elif NEW_CHUNK_SIZE==4
01311     int iColumn0=thisColumn[0];
01312     int iColumn1=thisColumn[1];
01313     int iColumn2=thisColumn[2];
01314     int iColumn3=thisColumn[3];
01315 #if AVX2==1
01316     __v2df bb;
01317     set_const_v2df(bb,multiplier);
01318     temp[0]=region[iColumn0];
01319     temp[1]=region[iColumn1];
01320     temp[2]=region[iColumn2];
01321     temp[3]=region[iColumn3];
01322     __v2df v0 = __builtin_ia32_loadupd (temp);
01323     __v2df v1 = __builtin_ia32_loadupd (temp+2);
01324     __v2df a = __builtin_ia32_loadupd (element);
01325     a *= bb;
01326     v0 OPERATION a;
01327     a = __builtin_ia32_loadupd (element+2);
01328     a *= bb;
01329     v1 OPERATION a;
01330     __builtin_ia32_storeupd (temp, v0);
01331     __builtin_ia32_storeupd (temp+2, v1);
01332     region[iColumn0]=temp[0];
01333     region[iColumn1]=temp[1];
01334     region[iColumn2]=temp[2];
01335     region[iColumn3]=temp[3];
01336 #else
01337     double value0=region[iColumn0];
01338     double value1=region[iColumn1];
01339     double value2=region[iColumn2];
01340     double value3=region[iColumn3];
01341     value0 OPERATION multiplier*element[0];
01342     value1 OPERATION multiplier*element[1];
01343     value2 OPERATION multiplier*element[2];
01344     value3 OPERATION multiplier*element[3];
01345     region[iColumn0]=value0;
01346     region[iColumn1]=value1;
01347     region[iColumn2]=value2;
01348     region[iColumn3]=value3;
01349 #endif
01350 #else
01351     abort();
01352 #endif
01353     element+=NEW_CHUNK_SIZE;
01354     thisColumn += NEW_CHUNK_SIZE;
01355   }
01356 }
01357 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus1)(int numberIn, CoinFactorizationDouble multiplier,
01358                                  const CoinFactorizationDouble *  COIN_RESTRICT element,
01359                                  CoinFactorizationDouble * COIN_RESTRICT region)
01360 {
01361   assert ((numberIn&3)==1);
01362   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);
01363   int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
01364 #if AVX2==1
01365   double temp[4];
01366 #endif
01367   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
01368     coin_prefetch_const(element+16);
01369     coin_prefetch_const(thisColumn+32);
01370 #if NEW_CHUNK_SIZE==2
01371     int iColumn0=thisColumn[0];
01372     int iColumn1=thisColumn[1];
01373     double value0=region[iColumn0];
01374     double value1=region[iColumn1];
01375     value0 OPERATION multiplier*element[0];
01376     value1 OPERATION multiplier*element[1];
01377     region[iColumn0]=value0;
01378     region[iColumn1]=value1;
01379 #elif NEW_CHUNK_SIZE==4
01380     int iColumn0=thisColumn[0];
01381     int iColumn1=thisColumn[1];
01382     int iColumn2=thisColumn[2];
01383     int iColumn3=thisColumn[3];
01384 #if AVX2==1
01385     __v2df bb;
01386     set_const_v2df(bb,multiplier);
01387     temp[0]=region[iColumn0];
01388     temp[1]=region[iColumn1];
01389     temp[2]=region[iColumn2];
01390     temp[3]=region[iColumn3];
01391     __v2df v0 = __builtin_ia32_loadupd (temp);
01392     __v2df v1 = __builtin_ia32_loadupd (temp+2);
01393     __v2df a = __builtin_ia32_loadupd (element);
01394     a *= bb;
01395     v0 OPERATION a;
01396     a = __builtin_ia32_loadupd (element+2);
01397     a *= bb;
01398     v1 OPERATION a;
01399     __builtin_ia32_storeupd (temp, v0);
01400     __builtin_ia32_storeupd (temp+2, v1);
01401     region[iColumn0]=temp[0];
01402     region[iColumn1]=temp[1];
01403     region[iColumn2]=temp[2];
01404     region[iColumn3]=temp[3];
01405 #else
01406     double value0=region[iColumn0];
01407     double value1=region[iColumn1];
01408     double value2=region[iColumn2];
01409     double value3=region[iColumn3];
01410     value0 OPERATION multiplier*element[0];
01411     value1 OPERATION multiplier*element[1];
01412     value2 OPERATION multiplier*element[2];
01413     value3 OPERATION multiplier*element[3];
01414     region[iColumn0]=value0;
01415     region[iColumn1]=value1;
01416     region[iColumn2]=value2;
01417     region[iColumn3]=value3;
01418 #endif
01419 #else
01420     abort();
01421 #endif
01422     element+=NEW_CHUNK_SIZE;
01423     thisColumn += NEW_CHUNK_SIZE;
01424   }
01425   int iColumn0=thisColumn[0];
01426   double value0=region[iColumn0];
01427   value0 OPERATION multiplier*element[0];
01428   region[iColumn0]=value0;
01429 }
01430 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus2)(int numberIn, CoinFactorizationDouble multiplier,
01431                                  const CoinFactorizationDouble *  COIN_RESTRICT element,
01432                                  CoinFactorizationDouble * COIN_RESTRICT region)
01433 {
01434   assert ((numberIn&3)==2);
01435   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);
01436   int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
01437 #if AVX2==1
01438   double temp[4];
01439 #endif
01440   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
01441     coin_prefetch_const(element+16);
01442     coin_prefetch_const(thisColumn+32);
01443 #if NEW_CHUNK_SIZE==2
01444     int iColumn0=thisColumn[0];
01445     int iColumn1=thisColumn[1];
01446     double value0=region[iColumn0];
01447     double value1=region[iColumn1];
01448     value0 OPERATION multiplier*element[0];
01449     value1 OPERATION multiplier*element[1];
01450     region[iColumn0]=value0;
01451     region[iColumn1]=value1;
01452 #elif NEW_CHUNK_SIZE==4
01453     int iColumn0=thisColumn[0];
01454     int iColumn1=thisColumn[1];
01455     int iColumn2=thisColumn[2];
01456     int iColumn3=thisColumn[3];
01457 #if AVX2==1
01458     __v2df bb;
01459     set_const_v2df(bb,multiplier);
01460     temp[0]=region[iColumn0];
01461     temp[1]=region[iColumn1];
01462     temp[2]=region[iColumn2];
01463     temp[3]=region[iColumn3];
01464     __v2df v0 = __builtin_ia32_loadupd (temp);
01465     __v2df v1 = __builtin_ia32_loadupd (temp+2);
01466     __v2df a = __builtin_ia32_loadupd (element);
01467     a *= bb;
01468     v0 OPERATION a;
01469     a = __builtin_ia32_loadupd (element+2);
01470     a *= bb;
01471     v1 OPERATION a;
01472     __builtin_ia32_storeupd (temp, v0);
01473     __builtin_ia32_storeupd (temp+2, v1);
01474     region[iColumn0]=temp[0];
01475     region[iColumn1]=temp[1];
01476     region[iColumn2]=temp[2];
01477     region[iColumn3]=temp[3];
01478 #else
01479     double value0=region[iColumn0];
01480     double value1=region[iColumn1];
01481     double value2=region[iColumn2];
01482     double value3=region[iColumn3];
01483     value0 OPERATION multiplier*element[0];
01484     value1 OPERATION multiplier*element[1];
01485     value2 OPERATION multiplier*element[2];
01486     value3 OPERATION multiplier*element[3];
01487     region[iColumn0]=value0;
01488     region[iColumn1]=value1;
01489     region[iColumn2]=value2;
01490     region[iColumn3]=value3;
01491 #endif
01492 #else
01493     abort();
01494 #endif
01495     element+=NEW_CHUNK_SIZE;
01496     thisColumn += NEW_CHUNK_SIZE;
01497   }
01498 #if NEW_CHUNK_SIZE==4
01499   int iColumn0=thisColumn[0];
01500   int iColumn1=thisColumn[1];
01501   double value0=region[iColumn0];
01502   double value1=region[iColumn1];
01503   value0 OPERATION multiplier*element[0];
01504   value1 OPERATION multiplier*element[1];
01505   region[iColumn0]=value0;
01506   region[iColumn1]=value1;
01507 #endif
01508 }
01509 SCATTER_ATTRIBUTE void functionName(ScatterUpdate4NPlus3)(int numberIn, CoinFactorizationDouble multiplier,
01510                                  const CoinFactorizationDouble *  COIN_RESTRICT element,
01511                                  CoinFactorizationDouble * COIN_RESTRICT region)
01512 {
01513   assert ((numberIn&3)==3);
01514   const int * COIN_RESTRICT thisColumn = reinterpret_cast<const int *>(element+numberIn);
01515   int nFull=numberIn&(~(NEW_CHUNK_SIZE-1));
01516 #if AVX2==1
01517   double temp[4];
01518 #endif
01519   for (int j=0;j<nFull;j+=NEW_CHUNK_SIZE) {
01520     coin_prefetch_const(element+16);
01521     coin_prefetch_const(thisColumn+32);
01522 #if NEW_CHUNK_SIZE==2
01523     int iColumn0=thisColumn[0];
01524     int iColumn1=thisColumn[1];
01525     double value0=region[iColumn0];
01526     double value1=region[iColumn1];
01527     value0 OPERATION multiplier*element[0];
01528     value1 OPERATION multiplier*element[1];
01529     region[iColumn0]=value0;
01530     region[iColumn1]=value1;
01531 #elif NEW_CHUNK_SIZE==4
01532     int iColumn0=thisColumn[0];
01533     int iColumn1=thisColumn[1];
01534     int iColumn2=thisColumn[2];
01535     int iColumn3=thisColumn[3];
01536 #if AVX2==1
01537     __v2df bb;
01538     set_const_v2df(bb,multiplier);
01539     temp[0]=region[iColumn0];
01540     temp[1]=region[iColumn1];
01541     temp[2]=region[iColumn2];
01542     temp[3]=region[iColumn3];
01543     __v2df v0 = __builtin_ia32_loadupd (temp);
01544     __v2df v1 = __builtin_ia32_loadupd (temp+2);
01545     __v2df a = __builtin_ia32_loadupd (element);
01546     a *= bb;
01547     v0 OPERATION a;
01548     a = __builtin_ia32_loadupd (element+2);
01549     a *= bb;
01550     v1 OPERATION a;
01551     __builtin_ia32_storeupd (temp, v0);
01552     __builtin_ia32_storeupd (temp+2, v1);
01553     region[iColumn0]=temp[0];
01554     region[iColumn1]=temp[1];
01555     region[iColumn2]=temp[2];
01556     region[iColumn3]=temp[3];
01557 #else
01558     double value0=region[iColumn0];
01559     double value1=region[iColumn1];
01560     double value2=region[iColumn2];
01561     double value3=region[iColumn3];
01562     value0 OPERATION multiplier*element[0];
01563     value1 OPERATION multiplier*element[1];
01564     value2 OPERATION multiplier*element[2];
01565     value3 OPERATION multiplier*element[3];
01566     region[iColumn0]=value0;
01567     region[iColumn1]=value1;
01568     region[iColumn2]=value2;
01569     region[iColumn3]=value3;
01570 #endif
01571 #else
01572     abort();
01573 #endif
01574     element+=NEW_CHUNK_SIZE;
01575     thisColumn += NEW_CHUNK_SIZE;
01576   }
01577 #if NEW_CHUNK_SIZE==2
01578   int iColumn0=thisColumn[0];
01579   double value0=region[iColumn0];
01580   value0 OPERATION multiplier*element[0];
01581   region[iColumn0]=value0;
01582 #else
01583   int iColumn0=thisColumn[0];
01584   int iColumn1=thisColumn[1];
01585   int iColumn2=thisColumn[2];
01586   double value0=region[iColumn0];
01587   double value1=region[iColumn1];
01588   double value2=region[iColumn2];
01589   value0 OPERATION multiplier*element[0];
01590   value1 OPERATION multiplier*element[1];
01591   value2 OPERATION multiplier*element[2];
01592   region[iColumn0]=value0;
01593   region[iColumn1]=value1;
01594   region[iColumn2]=value2;
01595 #endif
01596 }
01597 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on 10 Mar 2015 for Clp by  doxygen 1.6.1