Note: Forums will be making significant UX changes to address key usability improvements surrounding search, discoverability and navigation. To learn more about these changes please visit the announcement which can be found HERE.
Please help- Why windows multi-threading data fetching IOPS too faster (beyond HW max benchmark)?

Proposed Please help- Why windows multi-threading data fetching IOPS too faster (beyond HW max benchmark)?

  • 10. února 2012 7:03
     
      Obsahuje kód

    Greetings, 

          Can anybody help me a little out of my difficulty? 

          I have a SSD and I am trying to use it to simulate my program I/O performance, however, IOPS calculated from my program is much much faster than IOMeter. 

          My SSD is PLEXTOR PX-128M3S, by IOMeter, its max 512B random read IOPS is around 94k (queue depth is 32). 
          However my program (32 windows threads) can reach around 500k 512B IOPS, around 5 times of IOMeter!!! I did data validation but didn't find any error in data fetching. It's because my data fetching in order?

          I paste my code belwo (it mainly fetch 512B from file and release it; I did use 4bytes (an int) to validate program logic and didn't find problem), can anybody help me figure out where I am wrong? 

          Thanks so much in advance!!

    Nai Yan. 

    #include <stdio.h>
    #include <Windows.h>
    
    /* 
    **  Purpose: Verify file random read IOPS in comparison with IOMeter    
    **  Author:  Nai Yan
    **  Date:    Feb. 9th, 2012
    **/
    
    //Global variables
    long completeIOs = 0; 
    long completeBytes = 0;
    int  threadCount = 32;
    unsigned long long length = 1073741824;                  //4G test file
    
    int interval = 1024;
    
    int resultArrayLen = 320000;
    
    int *result = new int[resultArrayLen];
    
    //Method declarison
    double GetSecs(void);					           //Calculate out duration
    int InitPool(long long,char*,int);		     		  //Initialize test data for testing, if successful, return 1; otherwise, return a non 1 value. 
    int * FileRead(char * path);
    unsigned int DataVerification(int*, int sampleItem);		                 //Verify data fetched from pool
    
    int main()
    {
    	int sampleItem = 0x1;
    	char * fPath = "G:\\workspace\\4G.bin";
    	unsigned int invalidIO = 0;
    
    	if (InitPool(length,fPath,sampleItem)!= 1)
    	   printf("File write err... \n");
    
    	//start do random I/Os from initialized file
    	double start = GetSecs();
    
    	int * fetchResult = FileRead(fPath);
    	
    	double end = GetSecs();
    
    	printf("File read IOPS is %.4f per second.. \n",completeIOs/(end - start));
    
    	//start data validation, for 4 bytes fetch only
    
    //	invalidIO = DataVerification(fetchResult,sampleItem);
    
    //	if (invalidIO !=0)
    //	{
    //		printf("Total invalid data fetch IOs are %d", invalidIO);
    //	}
    
    	return 0;
    }
    
    
    
    int InitPool(long long length, char* path, int sample)
    {
    	printf("Start initializing test data ... \n");
    
    	FILE * fp = fopen(path,"wb");
    
    	if (fp == NULL)
    	{
    		printf("file open err... \n");
    		exit (-1);
    	}
    
    	else									//initialize file for testing
    	{
    		fseek(fp,0L,SEEK_SET);
    
    		for (int i=0; i<length; i++)
    		{
    			fwrite(&sample,sizeof(int),1,fp);
    		}
    
    		fclose(fp);
    
    		fp = NULL;
    
    		printf("Data initialization is complete...\n");
    
    		return 1;
    
    	}
    }
    
    double GetSecs(void)
    
    {
        LARGE_INTEGER frequency;
        LARGE_INTEGER start;
    
        if(! QueryPerformanceFrequency(&frequency)) 
            printf("QueryPerformanceFrequency Failed\n");
    
        if(! QueryPerformanceCounter(&start))
            printf("QueryPerformanceCounter Failed\n");
    	
    	return ((double)start.QuadPart/(double)frequency.QuadPart);
        
    }
    
    class input
    {
    public:
    	char *path;
    	int starting;
    
    	input (int st, char * filePath):starting(st),path(filePath){}
    
    };
    
    //Workers
    DWORD WINAPI FileReadThreadEntry(LPVOID lpThreadParameter)
    {
    	input * in = (input*) lpThreadParameter; 
    
    	char* path = in->path;
    
    	FILE * fp = fopen(path,"rb");
    
    	int sPos = in->starting;
    
    //	int * result = in->r;
    
    	if(fp != NULL)
    	{
    		fpos_t pos;
    		for (int i=0; i<resultArrayLen/threadCount;i++)
    		{
    
    			pos = i * interval;
    			fsetpos(fp,&pos);
    			//For 512 bytes fetch each time
    			unsigned char *c =new unsigned char [512];
    			if (fread(c,512,1,fp) ==1)
    			{
    				InterlockedIncrement(&completeIOs);
    				delete c;
    			}
    
    			//For 4 bytes fetch each time
    			/*if (fread(&result[sPos + i],sizeof(int),1,fp) ==1)
    			{
    				InterlockedIncrement(&completeIOs);
    			}*/
    
    			else
    			{
    				printf("file read err...\n");
    				exit(-1);
    			}
    		}
    
    		fclose(fp);
    		fp = NULL;
    		}
    
    	else
    	{
    		printf("File open err... \n");
    		exit(-1);
    	}
    }
    
    int * FileRead(char * p)
    {
    	printf("Starting reading file ... \n");
    	
    		
    	HANDLE mWorkThread[256];                      //max 256 threads
    	completeIOs = 0;
    		
    	int slice = int (resultArrayLen/threadCount);
    
    	for(int i = 0; i < threadCount; i++)
    	{
    		mWorkThread[i] = CreateThread(
    					NULL,
    					0,
    					FileReadThreadEntry,
    					(LPVOID)(new input(i*slice,p)),
    					0, 
    					NULL);
    	}
    
       WaitForMultipleObjects(threadCount, mWorkThread, TRUE, INFINITE);
       
       printf("File read complete... \n");
    
       return result;
    
    }
    
    unsigned int DataVerification(int* result, int sampleItem)
    {
    	unsigned int invalid = 0;
    	for (int i=0; i< resultArrayLen/interval;i++)
    	{
    		if (result[i]!=sampleItem)
    		{
    			invalid ++;
    			continue;
    		}
    	}
    
    	return invalid;
    }

    • Přesunutý Rahul V. Patil 23. května 2012 20:30 Related to where the other thread is (From:Parallel Computing in C++ and Native Code)
    •  

Všechny reakce

  • 17. února 2012 12:38
     
     Navržená odpověď

    You're doing cached I/O and thus you are effectively only measuring the system overhead.

    If you want to actually measure the underlying hardware you need to do uncached I/O using CreateFile/ReadFile with FILE_FLAG_NO_BUFFERING (also see http://msdn.microsoft.com/en-us/library/windows/desktop/cc644950(v=vs.85).aspx)

    /Stefan

  • 2. března 2012 8:46
     
     

    Thank you, Stefan. I just tried with your suggestion. It seems that Windows ReadFile is close to reality (IOMeter). However I am still looking for the root cause why Windows ReadFile is so slower than C++ fread - I am using another thread (http://social.msdn.microsoft.com/Forums/en-US/vcgeneral/thread/62c4583f-0c1d-4cf7-ba68-fd6ea684280a) to follow up this question. Sorry, I forgot to update you for this.

    If you have further comments on this question, please help!

    Nai Yan.