OpenMP in Linux. What is wrong?

REASY · 1 ноя 2010

Доброго времени суток!
Недавно познакомился с библиотекой OpenMP. Написал последовательный и параллельный map. Вот код. Файл map_reduce_omp.hpp:

Код (Text):

bool init_omp(int max_threads, bool is_dynamic)

{

if (max_threads <= 0)

max_threads = omp_get_num_procs();

omp_set_dynamic(is_dynamic);

omp_set_num_threads(max_threads);

#ifndef _OPENMP

printf("-err. OPENMP not active!!!\n");

getchar();

return false;

#endif

return true;

}

template <typename T>

T* map_serial(T* array, int size, T (*f)(const T &), double *time)

{

if (size <= 0)

return NULL;

T* result = new T[size];

if (result == NULL)

return NULL;

*time = omp_get_wtime();

for(int i = 0; i < size; i++)

result[i] = f(array[i]);

*time = omp_get_wtime() - *time;

return result;

}

template <typename T>

T* map_omp(T* array, int size, T (*f)(const T &), double *time)

{

if (size <= 0)

return NULL;

T* result = new T[size];

if (result == NULL)

return NULL;

*time = omp_get_wtime();

int i;

#pragma omp parallel shared(array, result) private(i)

{

#pragma omp for

for(i = 0; i < size; i++)

result[i] = f(array[i]);

}

*time = omp_get_wtime() - *time;

return result;

}

template <typename T>

T mul_2(const T &x)

{

return (x * 2);

}

............................

Вот программа, тестирующая их. Файл test_map.cpp:

Код (Text):

//#include <windows.h>

#include <stdio.h>

#include <stdlib.h>

#include <memory.h>

#include <iostream>

#include <ctime>

#include <cmath>

#include "map_reduce_omp.hpp"

using namespace std;

const int start = 1000000;

const int end = 128000000;

int main(int argc, char *argv[])

{

int i, sz;

int a[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};

const int size = sizeof(a)/sizeof(int);

int* array = new int[size];

memcpy(array, a, sizeof(int)*size);

if(!init_omp(0, false))

return -1;

cout << "************Map Multiply************" << endl;

cout << "Array: " << endl;

PrintArray(array, size);

cout << endl;

double time = 0;

int* g = map_serial<int>(array, size, mul_2, &time);

cout << "***Serial*** Array[i] * 2 : " << endl;

PrintArray(g, size);

cout << endl;

int* b = map_omp<int>(array, size, mul_2, &time);

cout << "***Parallel*** Array[i] * 2 : " << endl;

PrintArray(b, size);

cout << endl;

delete[] b;

delete[] g;

delete[] array;

cout << endl;

cout << "******************************************" << endl;

cout << "Testing SERIAL and PARALLEL map." << endl;

printf("%3d <= array size <= %d\n",start, end);

cout << "******************************************" << endl;

// LARGE_INTEGER tStart, tEnd, freq;

// SetThreadAffinityMask(GetCurrentThread(), 1);

// QueryPerformanceFrequency(&freq);

printf("Accuracy: %lf\n", omp_get_wtick());

for (sz = start; sz <= end; sz *= 2)

{

array = new int[sz];

for (i = 0; i < sz; i++)

array[i] = rand();

cout << "Size = " << sz << endl;

cout << "map_serial() ";

int* t = map_serial<int>(array, sz, mul_2, &time);

if (t == NULL){

delete[] array;

cout << "-err. Can't allocate memory. Exiting..." << endl;

return -1;

}

printf("Time: %lf ms\n", time*1000);

#ifdef _TEST

cout << "Check calcs...";

for (i = 0; i < sz; i++)

if (t[i] != array[i]* 2){

cout << "-err. Something wrong!!!" << endl;

break;

}

if (i == sz)

cout << " OK" << endl;

#endif

delete[] t;

cout << "map_omp() ";

// QueryPerformanceCounter(&tStart);

int *r = map_omp<int>(array, sz, mul_2, &time);

if (r == NULL){

cout << "-err. Can't allocate memory. Exiting..." << endl;

delete[] array;

return -1;

}

printf("Time: %lf ms\n", time*1000);

/*

QueryPerformanceCounter(&tEnd);

tEnd.QuadPart -= tStart.QuadPart;

double span = 1000*(double) tEnd.QuadPart / freq.QuadPart;

cout << endl << "span = " << span << endl;

*/

#ifdef _TEST

cout << "Check calcs...";

for (i = 0; i < sz; i++)

if (r[i] != array[i]* 2){

cout << "-err. Something wrong!!!" << endl;

break;

}

if (i == sz)

cout << " OK" << endl;

#endif

cout << endl;

delete[] r;

delete[] array;

}

cout << "Press Enter to terminate.";

cin.get();

}

Теперь вопрос: Почему когда собираю в Linux'e(openSUSE 11.3) время параллельного выполнения больше последовательного?
CPU: AMD Athlon(tm) 64 X2 Dual Core Processor 6000+, 2 Gb RAM.
Вот результаты тестов на Windows XP собранный с помощью MS Visual Studio 2008. Release (Microsoft (R) C/C++ версии 15.00.30729.01)(В Диспетчере задач видно, что при параллельном выполнении оба ядра загружены на 99%):
************Map Multiply************
Array:
1 2 3 4 5 6 7 8 9 10
***Serial*** Array * 2 :
2 4 6 8 10 12 14 16 18 20
***Parallel*** Array * 2 :
2 4 6 8 10 12 14 16 18 20

******************************************
Testing SERIAL and PARALLEL map.
1000000 <= array size <= 128000000
******************************************
Accuracy: 0.000000
Size = 1000000
map_serial() Time: 4.901182 ms
map_omp() Time: 3.589562 ms

Size = 2000000
map_serial() Time: 9.933665 ms
map_omp() Time: 7.087493 ms

Size = 4000000
map_serial() Time: 19.601095 ms
map_omp() Time: 14.547938 ms

Size = 8000000
map_serial() Time: 39.309465 ms
map_omp() Time: 28.374556 ms

Size = 16000000
map_serial() Time: 80.801331 ms
map_omp() Time: 59.188808 ms

Size = 32000000
map_serial() Time: 156.464299 ms
map_omp() Time: 112.683875 ms

Size = 64000000
map_serial() Time: 333.650785 ms
map_omp() Time: 230.298544 ms

Size = 128000000
map_serial() Time: 630.555839 ms
map_omp() Time: 542.296297 ms

А вот время работы той же программы, собранной в Linux(openSUSE 11.3, Linux linux-030z 2.6.34-12-desktop #1 SMP PREEMPT 2010-06-29 02:39:08 +0200 i686 athlon i386 GNU/Linux)(а в этом случае top показывает, что только одно ядро используется на 100%, другое нет). Собирал g++ test_map.cpp -o test_map -O3 --openmp. :
************Map Multiply************
Array:
1 2 3 4 5 6 7 8 9 10
***Serial*** Array * 2 :
2 4 6 8 10 12 14 16 18 20
***Parallel*** Array * 2 :
2 4 6 8 10 12 14 16 18 20

******************************************
Testing SERIAL and PARALLEL map.
1000000 <= array size <= 128000000
******************************************
Accuracy: 0.000000
Size = 1000000
map_serial() Time: 4.824302 ms
map_omp() Time: 13.938827 ms

Size = 2000000
map_serial() Time: 9.297206 ms
map_omp() Time: 8.859497 ms

Size = 4000000
map_serial() Time: 18.562291 ms
map_omp() Time: 17.737430 ms

Size = 8000000
map_serial() Time: 37.429889 ms
map_omp() Time: 62.919832 ms

Size = 16000000
map_serial() Time: 74.855028 ms
map_omp() Time: 71.277933 ms

Size = 32000000
map_serial() Time: 149.903632 ms
map_omp() Time: 173.817039 ms

Size = 64000000
map_serial() Time: 312.852235 ms
map_omp() Time: 325.017878 ms

Size = 128000000
map_serial() Time: 548.998882 ms
map_omp() Time: 637.584637 ms

valterg · 8 ноя 2010

Возможно дело в переменной окружения OMP_NUM_THREADS.
Видимо автоматом она не задана. Соответственно 2-е ядро не используется.
А вообще лучше такие вопросы на opennet.ru задавать. Там масса ребят мапы тестирует

contopt · 2 дек 2010

valterg сказал(а):

Возможно дело в переменной окружения OMP_NUM_THREADS.
Видимо автоматом она не задана. Соответственно 2-е ядро не используется.
А вообще лучше такие вопросы на opennet.ru задавать. Там масса ребят мапы тестирует
Нажмите, чтобы раскрыть...

Там ребята не только map'ы тестируют

Войти или зарегистрироваться

OpenMP in Linux. What is wrong?

REASY New Member

valterg Active Member

contopt New Member

Войти или зарегистрироваться

OpenMP in Linux. What is wrong?

REASY New Member

valterg Active Member

contopt New Member

Быстрый поиск