blitz/tuning.h

Go to the documentation of this file.
00001 /***************************************************************************
00002  * blitz/tuning.h      Platform-specific code tuning
00003  *
00004  * $Id: tuning.h,v 1.4 2003/01/14 11:29:18 patricg Exp $
00005  *
00006  * Copyright (C) 1997-2001 Todd Veldhuizen <tveldhui@oonumerics.org>
00007  *
00008  * This program is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU General Public License
00010  * as published by the Free Software Foundation; either version 2
00011  * of the License, or (at your option) any later version.
00012  *
00013  * This program is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * Suggestions:          blitz-dev@oonumerics.org
00019  * Bugs:                 blitz-bugs@oonumerics.org
00020  *
00021  * For more information, please see the Blitz++ Home Page:
00022  *    http://oonumerics.org/blitz/
00023  *
00024  ***************************************************************************/
00025 
00026 #ifndef BZ_TUNING_H
00027 #define BZ_TUNING_H
00028 
00029 // These estimates should be conservative (i.e. underestimate the
00030 // cache sizes).
00031 #define BZ_L1_CACHE_ESTIMATED_SIZE    8192
00032 #define BZ_L2_CACHE_ESTIMATED_SIZE    65536
00033 
00034 
00035 #undef  BZ_PARTIAL_LOOP_UNROLL
00036 #define BZ_PASS_EXPR_BY_VALUE
00037 #undef  BZ_PTR_INC_FASTER_THAN_INDIRECTION
00038 #define BZ_MANUAL_VECEXPR_COPY_CONSTRUCTOR
00039 #undef  BZ_KCC_COPY_PROPAGATION_KLUDGE
00040 #undef  BZ_ALTERNATE_FORWARD_BACKWARD_TRAVERSALS
00041 #undef  BZ_ARRAY_EXPR_PASS_INDEX_BY_VALUE
00042 #define BZ_INLINE_GROUP1
00043 #define BZ_INLINE_GROUP2
00044 #define BZ_COLLAPSE_LOOPS
00045 #define BZ_USE_FAST_READ_ARRAY_EXPR
00046 #define BZ_ARRAY_EXPR_USE_COMMON_STRIDE
00047 #undef  BZ_ARRAY_SPACE_FILLING_TRAVERSAL
00048 #undef  BZ_ARRAY_FAST_TRAVERSAL_UNROLL
00049 #undef  BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS
00050 #undef  BZ_ARRAY_STACK_TRAVERSAL_UNROLL
00051 #define BZ_ARRAY_2D_STENCIL_TILING
00052 #define BZ_ARRAY_2D_STENCIL_TILE_SIZE       128
00053 #undef  BZ_INTERLACE_ARRAYS
00054 #undef  BZ_ALIGN_BLOCKS_ON_CACHELINE_BOUNDARY
00055 #define BZ_FAST_COMPILE
00056 
00057 
00058 #ifndef BZ_DISABLE_NEW_ET
00059  #define BZ_NEW_EXPRESSION_TEMPLATES
00060 #endif
00061 
00062 #ifdef BZ_FAST_COMPILE
00063 #define BZ_ETPARMS_CONSTREF
00064 #define BZ_NO_INLINE_ET
00065 #endif
00066 
00067 /*
00068  * Platform-specific tuning
00069  */
00070 
00071 #ifdef _CRAYT3E
00072  // The backend compiler on the T3E does a better job of
00073  // loop unrolling.
00074  #undef BZ_PARTIAL_LOOP_UNROLL
00075  #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL
00076  #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL
00077 #endif
00078 
00079 #ifdef __GNUC__
00080  // The egcs compiler does a good job of loop unrolling, if
00081  // -funroll-loops is used.
00082  #undef BZ_PARTIAL_LOOP_UNROLL
00083  #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL
00084  #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL
00085 #endif
00086 
00087 #ifdef  BZ_DISABLE_KCC_COPY_PROPAGATION_KLUDGE
00088  #undef BZ_KCC_COPY_PROPAGATION_KLUDGE
00089 #endif
00090 
00091 #ifdef  BZ_INLINE_GROUP1
00092  #define _bz_inline1 inline
00093 #else
00094  #define _bz_inline1
00095 #endif
00096 
00097 #ifdef  BZ_INLINE_GROUP2
00098  #define _bz_inline2 inline
00099 #else
00100  #define _bz_inline2
00101 #endif
00102 
00103 #ifdef  BZ_NO_INLINE_ET
00104  #define _bz_inline_et 
00105 #else
00106  #define _bz_inline_et inline
00107 #endif
00108 
00109 #ifdef  BZ_ETPARMS_CONSTREF
00110  #define BZ_ETPARM(X) const X&
00111 #else
00112  #define BZ_ETPARM(X) X
00113 #endif
00114 
00115 #ifdef __DECCXX
00116  // The DEC cxx compiler has problems with loop unrolling
00117  // because of aliasing.  Loop unrolling and anti-aliasing
00118  // is done by Blitz++.
00119 
00120   #define  BZ_PARTIAL_LOOP_UNROLL
00121   #define  BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS
00122   #define  BZ_ARRAY_STACK_TRAVERSAL_UNROLL
00123 #endif
00124 
00125 /*
00126  * BZ_NO_PROPAGATE(X) prevents the compiler from performing
00127  * copy propagation on a variable.  This is used for loop
00128  * unrolling to prevent KAI C++ from rearranging the
00129  * ordering of memory accesses.
00130  */
00131 
00132 #define BZ_NO_PROPAGATE(X)   X
00133 
00134 #ifdef __KCC
00135 #ifdef BZ_USE_NO_PROPAGATE
00136     extern "C" int __kai_apply(const char*, ...);
00137 
00138     #undef  BZ_NO_PROPAGATE(X)
00139     #define BZ_NO_PROPAGATE(X)  __kai_apply("(%a)",&X)
00140 #endif
00141 #endif
00142 
00143 #endif // BZ_TUNING_H

Generated on Mon Dec 3 09:22:41 2007 for blitz by  doxygen 1.5.1