Actual source code: sseenabled.c

  1: /* $Id: sseenabled.c,v 1.15 2001/07/20 21:03:24 buschelm Exp $ */
 2:  #include petscsys.h

  4: #ifdef PETSC_HAVE_SSE

  6: #include PETSC_HAVE_SSE
  7: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */

  9: #include <string.h>

 11: #undef __FUNCT__
 13: int PetscSSEHardwareTest(PetscTruth *flag) {
 14:   int  ierr;
 15:   char *vendor;
 16:   char Intel[13]="GenuineIntel";
 17:   char AMD[13]  ="AuthenticAMD";

 20:   PetscMalloc(13*sizeof(char),&vendor);
 21:   strcpy(vendor,"************");
 22:   CPUID_GET_VENDOR(vendor);
 23:   if (!strcmp(vendor,Intel) || !strcmp(vendor,AMD)) {
 24:     /* Both Intel and AMD use bit 25 of CPUID_FEATURES */
 25:     /* to denote availability of SSE Support */
 26:     unsigned long myeax,myebx,myecx,myedx;
 27:     CPUID(CPUID_FEATURES,&myeax,&myebx,&myecx,&myedx);
 28:     if (myedx & SSE_FEATURE_FLAG) {
 29:       *flag = PETSC_TRUE;
 30:     } else {
 31:       *flag = PETSC_FALSE;
 32:     }
 33:   }
 34:   PetscFree(vendor);
 35:   return(0);
 36: }

 38: #ifdef PARCH_linux
 39: #include <signal.h>
 40: /* 
 41:    Early versions of the Linux kernel disables SSE hardware because
 42:    it does not know how to preserve the SSE state at a context switch.
 43:    To detect this feature, try an sse instruction in another process.  
 44:    If it works, great!  If not, an illegal instruction signal will be thrown,
 45:    so catch it and return an error code. 
 46: */
 47: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)

 49: static void PetscSSEDisabledHandler(int sig) {
 50:   signal(SIGILL,SIG_IGN);
 51:   exit(-1);
 52: }

 54: #undef __FUNCT__
 56: int PetscSSEOSEnabledTest_Linux(PetscTruth *flag) {
 57:   int status, pid = 0;
 59:   signal(SIGILL,PetscSSEDisabledHandler);
 60:   pid = fork();
 61:   if (pid==0) {
 62:     SSE_SCOPE_BEGIN;
 63:       XOR_PS(XMM0,XMM0);
 64:     SSE_SCOPE_END;
 65:     exit(0);
 66:   } else {
 67:     wait(&status);
 68:   }
 69:   if (!status) {
 70:     *flag = PETSC_TRUE;
 71:   } else {
 72:     *flag = PETSC_FALSE;
 73:   }
 74:   return(0);
 75: }

 77: #endif
 78: #ifdef PARCH_win32
 79: /* 
 80:    Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
 81:    Windows ME/2000 doesn't disable SSE Hardware 
 82: */
 83: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
 84: #endif 

 86: #undef __FUNCT__
 88: int PetscSSEOSEnabledTest_TRUE(PetscTruth *flag) {
 90:   if (flag) {
 91:     *flag = PETSC_TRUE;
 92:   }
 93:   return(0);
 94: }

 96: #else  /* Not defined PETSC_HAVE_SSE */

 98: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
 99: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)

101: #undef __FUNCT__
103: int PetscSSEEnabledTest_FALSE(PetscTruth *flag) {
105:   if (flag) {
106:     *flag = PETSC_FALSE;
107:   }
108:   return(0);
109: }

111: #endif /* defined PETSC_HAVE_SSE */

113: #undef __FUNCT__
115: /*@C
116:      PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction 
117:      set can be used.  Some operating systems do not allow the use of these instructions despite
118:      hardware availability.

120:      Collective on MPI_Comm

122:      Input Parameter:
123: .    comm - the MPI Communicator

125:      Output Parameters:
126: .    lflag - Local Flag:  PETSC_TRUE if enabled in this process
127: .    gflag - Global Flag: PETSC_TRUE if enabled for all processes in comm

129:      Notes:
130:      PETSC_NULL can be specified for lflag or gflag if either of these values are not desired.

132:      Options Database Keys:
133: .    -disable_sse - Disable use of hand tuned Intel SSE implementations
134: .    -enable_sse  - Enable use of hand tuned Intel SSE implementations

136:      Level: developer
137: @*/
138: static PetscTruth petsc_sse_local_is_untested  = PETSC_TRUE;
139: static PetscTruth petsc_sse_enabled_local      = PETSC_FALSE;
140: static PetscTruth petsc_sse_global_is_untested = PETSC_TRUE;
141: static PetscTruth petsc_sse_enabled_global     = PETSC_FALSE;
142: int PetscSSEIsEnabled(MPI_Comm comm,PetscTruth *lflag,PetscTruth *gflag) {
144:   PetscTruth disabled_option,enabled_option;


148:   if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
149:     disabled_option = PETSC_FALSE;
150:     enabled_option  = PETSC_FALSE;

152:     PetscOptionsName("-disable_sse",
153:                             "Disable use of hand tuned Intel SSE implementations.","PetscSSEIsEnabled",&disabled_option);
154:     if (disabled_option) {
155:       petsc_sse_local_is_untested  = PETSC_FALSE;
156:       petsc_sse_global_is_untested = PETSC_FALSE;
157:     }

159:     PetscOptionsName("-enable_sse",
160:                             "Enable use of hand tuned Intel SSE implementations.","PetscSSEIsEnabled",&enabled_option);
161:     if (enabled_option) {
162:       petsc_sse_local_is_untested  = PETSC_FALSE;
163:       petsc_sse_enabled_local      = PETSC_TRUE;
164:       petsc_sse_global_is_untested = PETSC_FALSE;
165:       petsc_sse_enabled_global     = PETSC_TRUE;
166:     }

168:     if (petsc_sse_local_is_untested) {
169:       PetscSSEHardwareTest(&petsc_sse_enabled_local);
170:       if (petsc_sse_enabled_local) {
171:         PetscSSEOSEnabledTest(&petsc_sse_enabled_local);
172:       }
173:       petsc_sse_local_is_untested = PETSC_FALSE;
174:     }

176:     if (gflag && petsc_sse_global_is_untested) {
177:       MPI_Allreduce(&petsc_sse_enabled_local,&petsc_sse_enabled_global,1,MPI_INT,MPI_LAND,comm);
178:       petsc_sse_global_is_untested = PETSC_FALSE;
179:     }
180:   }

182:   if (lflag) {
183:     *lflag = petsc_sse_enabled_local;
184:   }
185:   if (gflag) {
186:     *gflag = petsc_sse_enabled_global;
187:   }
188:   return(0);
189: }