Actual source code: sseenabled.c
1: /* $Id: sseenabled.c,v 1.15 2001/07/20 21:03:24 buschelm Exp $ */
2: #include petscsys.h
4: #ifdef PETSC_HAVE_SSE
6: #include PETSC_HAVE_SSE
7: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */
9: #include <string.h>
11: #undef __FUNCT__
13: int PetscSSEHardwareTest(PetscTruth *flag) {
14: int ierr;
15: char *vendor;
16: char Intel[13]="GenuineIntel";
17: char AMD[13] ="AuthenticAMD";
20: PetscMalloc(13*sizeof(char),&vendor);
21: strcpy(vendor,"************");
22: CPUID_GET_VENDOR(vendor);
23: if (!strcmp(vendor,Intel) || !strcmp(vendor,AMD)) {
24: /* Both Intel and AMD use bit 25 of CPUID_FEATURES */
25: /* to denote availability of SSE Support */
26: unsigned long myeax,myebx,myecx,myedx;
27: CPUID(CPUID_FEATURES,&myeax,&myebx,&myecx,&myedx);
28: if (myedx & SSE_FEATURE_FLAG) {
29: *flag = PETSC_TRUE;
30: } else {
31: *flag = PETSC_FALSE;
32: }
33: }
34: PetscFree(vendor);
35: return(0);
36: }
38: #ifdef PARCH_linux
39: #include <signal.h>
40: /*
41: Early versions of the Linux kernel disables SSE hardware because
42: it does not know how to preserve the SSE state at a context switch.
43: To detect this feature, try an sse instruction in another process.
44: If it works, great! If not, an illegal instruction signal will be thrown,
45: so catch it and return an error code.
46: */
47: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)
49: static void PetscSSEDisabledHandler(int sig) {
50: signal(SIGILL,SIG_IGN);
51: exit(-1);
52: }
54: #undef __FUNCT__
56: int PetscSSEOSEnabledTest_Linux(PetscTruth *flag) {
57: int status, pid = 0;
59: signal(SIGILL,PetscSSEDisabledHandler);
60: pid = fork();
61: if (pid==0) {
62: SSE_SCOPE_BEGIN;
63: XOR_PS(XMM0,XMM0);
64: SSE_SCOPE_END;
65: exit(0);
66: } else {
67: wait(&status);
68: }
69: if (!status) {
70: *flag = PETSC_TRUE;
71: } else {
72: *flag = PETSC_FALSE;
73: }
74: return(0);
75: }
77: #endif
78: #ifdef PARCH_win32
79: /*
80: Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
81: Windows ME/2000 doesn't disable SSE Hardware
82: */
83: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
84: #endif
86: #undef __FUNCT__
88: int PetscSSEOSEnabledTest_TRUE(PetscTruth *flag) {
90: if (flag) {
91: *flag = PETSC_TRUE;
92: }
93: return(0);
94: }
96: #else /* Not defined PETSC_HAVE_SSE */
98: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
99: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)
101: #undef __FUNCT__
103: int PetscSSEEnabledTest_FALSE(PetscTruth *flag) {
105: if (flag) {
106: *flag = PETSC_FALSE;
107: }
108: return(0);
109: }
111: #endif /* defined PETSC_HAVE_SSE */
113: #undef __FUNCT__
115: /*@C
116: PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
117: set can be used. Some operating systems do not allow the use of these instructions despite
118: hardware availability.
120: Collective on MPI_Comm
122: Input Parameter:
123: . comm - the MPI Communicator
125: Output Parameters:
126: . lflag - Local Flag: PETSC_TRUE if enabled in this process
127: . gflag - Global Flag: PETSC_TRUE if enabled for all processes in comm
129: Notes:
130: PETSC_NULL can be specified for lflag or gflag if either of these values are not desired.
132: Options Database Keys:
133: . -disable_sse - Disable use of hand tuned Intel SSE implementations
134: . -enable_sse - Enable use of hand tuned Intel SSE implementations
136: Level: developer
137: @*/
138: static PetscTruth petsc_sse_local_is_untested = PETSC_TRUE;
139: static PetscTruth petsc_sse_enabled_local = PETSC_FALSE;
140: static PetscTruth petsc_sse_global_is_untested = PETSC_TRUE;
141: static PetscTruth petsc_sse_enabled_global = PETSC_FALSE;
142: int PetscSSEIsEnabled(MPI_Comm comm,PetscTruth *lflag,PetscTruth *gflag) {
144: PetscTruth disabled_option,enabled_option;
148: if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
149: disabled_option = PETSC_FALSE;
150: enabled_option = PETSC_FALSE;
152: PetscOptionsName("-disable_sse",
153: "Disable use of hand tuned Intel SSE implementations.","PetscSSEIsEnabled",&disabled_option);
154: if (disabled_option) {
155: petsc_sse_local_is_untested = PETSC_FALSE;
156: petsc_sse_global_is_untested = PETSC_FALSE;
157: }
159: PetscOptionsName("-enable_sse",
160: "Enable use of hand tuned Intel SSE implementations.","PetscSSEIsEnabled",&enabled_option);
161: if (enabled_option) {
162: petsc_sse_local_is_untested = PETSC_FALSE;
163: petsc_sse_enabled_local = PETSC_TRUE;
164: petsc_sse_global_is_untested = PETSC_FALSE;
165: petsc_sse_enabled_global = PETSC_TRUE;
166: }
168: if (petsc_sse_local_is_untested) {
169: PetscSSEHardwareTest(&petsc_sse_enabled_local);
170: if (petsc_sse_enabled_local) {
171: PetscSSEOSEnabledTest(&petsc_sse_enabled_local);
172: }
173: petsc_sse_local_is_untested = PETSC_FALSE;
174: }
176: if (gflag && petsc_sse_global_is_untested) {
177: MPI_Allreduce(&petsc_sse_enabled_local,&petsc_sse_enabled_global,1,MPI_INT,MPI_LAND,comm);
178: petsc_sse_global_is_untested = PETSC_FALSE;
179: }
180: }
182: if (lflag) {
183: *lflag = petsc_sse_enabled_local;
184: }
185: if (gflag) {
186: *gflag = petsc_sse_enabled_global;
187: }
188: return(0);
189: }