Figure A   NativeSize.cpp

 //==================================================
 // NativeSize - Matt Pietrek 1998
 // Microsoft Systems Journal, July 1998
 // FILE: NativeSize.CPP
 // To compile: CL /O1 NativeSize.CPP
 //==================================================
 #include <windows.h>
 #include <stdio.h>
 
 #define LOOP_ITERATIONS 10          // Outer loop count
 #define LOOP_ITERATIONS2 65535      // Inner loop count
 
 // Declare the loop count variables as global so that the optimizer can't do
 // fancy tricks using registers
 volatile WORD iWORD;
 volatile WORD iWORD2;
 volatile DWORD iDWORD;
 volatile DWORD iDWORD2;
 
 int main()
 {
     LARGE_INTEGER tBefore, tAfter, i64PerfFrequency;
     float tWORD, tDWORD;
 
     // Figure out how often the performance counter increments
     QueryPerformanceFrequency( &i64PerfFrequency );
 
     // Set this thread's priority as high as reasonably possible to prevent
     // timeslice interruptions
     SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL );
 
 
     // =======================================================================
     // First, calculate the time to do the nested loops using a WORD counter
     
     Sleep( 0 ); // Try to start on a fresh timeslice
         
     // Get performance counter before the loop
     QueryPerformanceCounter( &tBefore );
 
     for ( iWORD = 0; iWORD < LOOP_ITERATIONS; iWORD++ )
     { 
         for ( iWORD2 = 0; iWORD2 < LOOP_ITERATIONS2; iWORD2++ )
         {
             // The NOP fakes the optimizer out so that it doesn't toss
             // out the loop code entirely
             __asm NOP
         }
     }
     
     // Get performance counter after the loops
     QueryPerformanceCounter( &tAfter );
 
     tWORD = tAfter.QuadPart - tBefore.QuadPart;
     tWORD = tWORD / i64PerfFrequency.QuadPart;
             
     printf( "WORD version took %.4f seconds\n", tWORD );
 
     // =======================================================================
     // Next, calculate the time to do the same loops using a DWORD counter
     
     Sleep( 0 ); // Try to start on a fresh timeslice
         
     // Get performance counter before the loop
     QueryPerformanceCounter( &tBefore );
 
     for ( iDWORD = 0; iDWORD < LOOP_ITERATIONS; iDWORD++ )
     { 
         for ( iDWORD2 = 0; iDWORD2 < LOOP_ITERATIONS2; iDWORD2++ )
         {
             // The NOP fakes the optimizer out so that it doesn't toss
             // out the loop code entirely
             __asm NOP
         }
     }   
 
     // Get performance counter after the loops
     QueryPerformanceCounter( &tAfter );
 
     tDWORD = tAfter.QuadPart - tBefore.QuadPart;
     tDWORD = tDWORD / i64PerfFrequency.QuadPart;
 
     printf( "DWORD version took %.4f seconds\n", tDWORD );
 
     // Display the ratio of the WORD time to the DWORD time
     printf( "Relative performance: %.2f : 1\n", tWORD / tDWORD );
 
     return 0;
 }