728x90
샘플코드
#include <iostream>
#include <chrono>
using namespace std;
using namespace chrono;
const int MAGNITUDE = 100000;
const int PAGE_SIZE = 0x1000;
const int CACHE_LINE = 0x10;
int main()
{
{
char* data = (char*)malloc(PAGE_SIZE * MAGNITUDE);
int count = 0;
auto begin = chrono::high_resolution_clock::now();
for (size_t i = 0; i < PAGE_SIZE * MAGNITUDE; i += PAGE_SIZE)
{
data[i] = 'a';
count++;
}
auto end = chrono::high_resolution_clock::now();
chrono::duration<double> duration = end - begin;
cout << "duration(per a page) :: " << duration.count() << endl;
}
cout << endl;
{
char* data = (char*)malloc(CACHE_LINE *MAGNITUDE);
int count = 0;
auto begin = chrono::high_resolution_clock::now();
for (size_t i = 0; i < CACHE_LINE * MAGNITUDE; i += CACHE_LINE)
{
data[i] = 'a';
count++;
}
auto end = chrono::high_resolution_clock::now();
chrono::duration<double> duration = end - begin;
cout << "duration(per a cache line) :: " << duration.count() << endl;
}
cout << endl;
{
char* data = (char*)malloc(MAGNITUDE);
int count = 0;
auto begin = chrono::high_resolution_clock::now();
for (size_t i = 0; i < MAGNITUDE; i++)
{
data[i] = 'a';
count++;
}
auto end = chrono::high_resolution_clock::now();
chrono::duration<double> duration = end - begin;
cout << "duration (per a byte) :: " << duration.count() << endl;
}
cout << endl;
{
char* data = (char*)malloc(MAGNITUDE * 3);
int count = 0;
auto begin = chrono::high_resolution_clock::now();
for (size_t i = 0; i < MAGNITUDE * 3; i +=3)
{
data[i] = 'a';
count++;
}
auto end = chrono::high_resolution_clock::now();
chrono::duration<double> duration = end - begin;
cout << "duration (per 3bytes) :: " << duration.count() << endl;
}
cout << endl;
}
결과
실제로 임의의 3bytes 간격으로 수행 했을 때는 별차이가 없는 수행속도를 보여준다. 하지만 캐시라인, 페이지 사이즈 경계를 벗어나는 경우 확연한 속도 차이를 보여준다.
728x90