为什么Windows C++多线程IOPS比IOMeter快得多?
问题描述:
我有一个SSD,我试图用它来模拟我的程序I/O性能,但是,从我的程序计算出的IOPS比IOMeter快得多。我的SSD是IOMeter公司的PLEXTOR PX-128M3S,其最大512B随机读取IOPS大约为94k(队列深度为32)。然而我的程序(32个窗口线程)可以达到500k左右的512B IOPS,大约是IOMeter的5倍!我做了数据验证,但没有发现数据读取中的任何错误。这是因为我的数据按顺序获取?我主要从文件中获取512B并释放它;我确实使用了4个字节(一个int)来验证程序逻辑并没有发现问题),任何人都可以帮我弄清楚我错在哪里?为什么Windows C++多线程IOPS比IOMeter快得多?
非常感谢!
#include <stdio.h>
#include <Windows.h>
//Global variables
long completeIOs = 0;
long completeBytes = 0;
int threadCount = 32;
unsigned long long length = 1073741824; //4G test file
int interval = 1024;
int resultArrayLen = 320000;
int *result = new int[resultArrayLen];
//Method declarison
double GetSecs(void); //Calculate out duration
int InitPool(long long,char*,int); //Initialize test data for testing, if successful, return 1; otherwise, return a non 1 value.
int * FileRead(char * path);
unsigned int DataVerification(int*, int sampleItem); //Verify data fetched from pool
int main()
{
int sampleItem = 0x1;
char * fPath = "G:\\workspace\\4G.bin";
unsigned int invalidIO = 0;
if (InitPool(length,fPath,sampleItem)!= 1)
printf("File write err... \n");
//start do random I/Os from initialized file
double start = GetSecs();
int * fetchResult = FileRead(fPath);
double end = GetSecs();
printf("File read IOPS is %.4f per second.. \n",completeIOs/(end - start));
//start data validation, for 4 bytes fetch only
// invalidIO = DataVerification(fetchResult,sampleItem);
// if (invalidIO !=0)
// {
// printf("Total invalid data fetch IOs are %d", invalidIO);
// }
return 0;
}
int InitPool(long long length, char* path, int sample)
{
printf("Start initializing test data ... \n");
FILE * fp = fopen(path,"wb");
if (fp == NULL)
{
printf("file open err... \n");
exit (-1);
}
else //initialize file for testing
{
fseek(fp,0L,SEEK_SET);
for (int i=0; i<length; i++)
{
fwrite(&sample,sizeof(int),1,fp);
}
fclose(fp);
fp = NULL;
printf("Data initialization is complete...\n");
return 1;
}
}
double GetSecs(void)
{
LARGE_INTEGER frequency;
LARGE_INTEGER start;
if(! QueryPerformanceFrequency(&frequency))
printf("QueryPerformanceFrequency Failed\n");
if(! QueryPerformanceCounter(&start))
printf("QueryPerformanceCounter Failed\n");
return ((double)start.QuadPart/(double)frequency.QuadPart);
}
class input
{
public:
char *path;
int starting;
input (int st, char * filePath):starting(st),path(filePath){}
};
//Workers
DWORD WINAPI FileReadThreadEntry(LPVOID lpThreadParameter)
{
input * in = (input*) lpThreadParameter;
char* path = in->path;
FILE * fp = fopen(path,"rb");
int sPos = in->starting;
// int * result = in->r;
if(fp != NULL)
{
fpos_t pos;
for (int i=0; i<resultArrayLen/threadCount;i++)
{
pos = i * interval;
fsetpos(fp,&pos);
//For 512 bytes fetch each time
unsigned char *c =new unsigned char [512];
if (fread(c,512,1,fp) ==1)
{
InterlockedIncrement(&completeIOs);
delete c;
}
//For 4 bytes fetch each time
/*if (fread(&result[sPos + i],sizeof(int),1,fp) ==1)
{
InterlockedIncrement(&completeIOs);
}*/
else
{
printf("file read err...\n");
exit(-1);
}
}
fclose(fp);
fp = NULL;
}
else
{
printf("File open err... \n");
exit(-1);
}
}
int * FileRead(char * p)
{
printf("Starting reading file ... \n");
HANDLE mWorkThread[256]; //max 256 threads
completeIOs = 0;
int slice = int (resultArrayLen/threadCount);
for(int i = 0; i < threadCount; i++)
{
mWorkThread[i] = CreateThread(
NULL,
0,
FileReadThreadEntry,
(LPVOID)(new input(i*slice,p)),
0,
NULL);
}
WaitForMultipleObjects(threadCount, mWorkThread, TRUE, INFINITE);
printf("File read complete... \n");
return result;
}
unsigned int DataVerification(int* result, int sampleItem)
{
unsigned int invalid = 0;
for (int i=0; i< resultArrayLen/interval;i++)
{
if (result[i]!=sampleItem)
{
invalid ++;
continue;
}
}
return invalid;
}
答
我没有看在足够的细节是某些,但我没有看到有任何代码的数据刷新到磁盘和/或确保您的读取确实发自盘。既然如此,看来你所测量的主要是操作系统磁盘缓存的性能。虽然磁盘可能会对您测量的性能有所贡献,但它可能只是一个小贡献者,其他因素占主导地位。由于代码显然是为Windows编写的,因此您可能会考虑(例如)使用CreateFile打开文件,并在此时传递FILE_FLAG_NO_BUFFERING标志。这将(至少大部分)从等式中删除操作系统缓存,并强制每个读取或写入直接处理磁盘本身。