double threshold; /* Порог для пороговой обработки коэффициентов */
double thresh_curr; /* Значение порога, смасштабированное для конкретного уровня разложения */
double len_curr; /* Длина отрезка массива коэеффициентов для конкретного уровня разложения */
int k; /* Счётчик цикла */
int i;
/* Инициализация переменных */
decomp_level = get_power_of_2(len); /* Вычисление уровня вейвлет-разложения */
if (decomp_level == -1) return -2; /* TODO: код ошибки: "массив данных имеет длину не равную степени 2-ки" */
threshold_rescales = (double*) malloc(decomp_level * sizeof(double));
if (threshold_rescales == 0) return -1; //TODO: код ошибки: "Не достаточно памяти"
wavelet = gsl_wavelet_alloc (gsl_wavelet_daubechies, 8);
work = gsl_wavelet_workspace_alloc (len);
waveletfloat.nc = wavelet->nc;
waveletfloat.type = wavelet->type;
waveletfloat.offset = wavelet->offset;
waveletfloat.g1 = (float*)malloc(wavelet->nc*sizeof(float));
waveletfloat.g2 = (float*)malloc(wavelet->nc*sizeof(float));
waveletfloat.h1 = (float*)malloc(wavelet->nc*sizeof(float));
waveletfloat.h2 = (float*)malloc(wavelet->nc*sizeof(float));
doubleCopyToFloat(wavelet->g1, (float*)waveletfloat.g1, wavelet->nc);
doubleCopyToFloat(wavelet->g2, (float*)waveletfloat.g2, wavelet->nc);
doubleCopyToFloat(wavelet->h1, (float*)waveletfloat.h1, wavelet->nc);
doubleCopyToFloat(wavelet->h2, (float*)waveletfloat.h2, wavelet->nc);
workfloat.n = work->n;
workfloat.scratch = (float*)malloc(sizeof(float)*work->n);
doubleCopyToFloat(work->scratch, workfloat.scratch, work->n);
initGPUDevice();
QTime FullFilterTime;
FullFilterTime.start();
/* Прямое вейвлет-преобразование */
doubleCopyToFloat(data, floatdata, len);
cudaWaveletTransform_forward (&waveletfloat, floatdata, 1, len, &workfloat);
/* ----------------------------------------- */
/* Начало фильтрации */
noise_estimation_float(floatdata, threshold_rescales, decomp_level); /* Вычисление масштабных коэфф-в для каждого уровня разложения */
threshold = select_threshold(len); /* Оценка значения базового порога */
i = len / 2;
len_curr = len / 2;
for (k = decomp_level - 1; k >= 0; k--)
{
thresh_curr = threshold * threshold_rescales[decomp_level - 1 - k];
cudaDenoise( &(floatdata[i]), len_curr, (float)thresh_curr);
len_curr /= 2;
i -= len_curr;
}
/* Конец фильтрации */
/* ----------------------------------------- */
/* Обратное вейвлет-преобразование (восстановление сигнала) */
cudaWaveletTransform_inverse (&waveletfloat, floatdata, 1, len, &workfloat);
floatCopyToDouble(floatdata, data, len);
exitGPUDevice();
qDebug("Cuda FullFilterTime: %d ms", FullFilterTime.elapsed());
free((void*)waveletfloat.g1);
free((void*)waveletfloat.g2);
free((void*)waveletfloat.h1);
free((void*)waveletfloat.h2);
free((void*)workfloat.scratch);
free (threshold_rescales);
gsl_wavelet_free (wavelet);
gsl_wavelet_workspace_free (work);
return 0;
}
Приложение Б. Характеристики сопроцессора на основе GPGPU
evgeniy@evgeniy-desktop:~/NVIDIA_GPU_Computing_SDK/C/bin/linux/release$ sudo ./deviceQuery
./deviceQuery Starting...
CUDA Device Query (Runtime API) version (CUDART static linking)
There is 1 device supporting CUDA
Device 0: "GeForce 9800 GT"
CUDA Driver Version: 3.0
CUDA Runtime Version: 3.0
CUDA Capability Major revision number: 1
CUDA Capability Minor revision number: 1
Total amount of global memory: 1073020928 bytes
Number of multiprocessors: 14
Number of cores: 112
Total amount of constant memory: 65536 bytes
Total amount of shared memory per block: 16384 bytes
Total number of registers available per block: 8192
Warp size: 32
Maximum number of threads per block: 512
Maximum sizes of each dimension of a block: 512 x 512 x 64
Maximum sizes of each dimension of a grid: 65535 x 65535 x 1
Maximum memory pitch: 2147483647 bytes
Texture alignment: 256 bytes
Clock rate: 1.50 GHz
Concurrent copy and execution: Yes
Run time limit on kernels: Yes
Integrated: No
Support host page-locked memory mapping: No
Compute mode: Default (multiple host threads can use this device simultaneously)
deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 134564823, CUDA Runtime Version = 3.0, NumDevs = 1, Device = GeForce 9800 GT
Приложение В. Запуск теста вейвлет преобразования
evgeniy@evgeniy-desktop:~/work/dissertation/seismo_source/seismo_detector$ ./seismodetector
sample count: 65536
CPU FullFilterTime: 12 ms
Cuda FullFilterTime: 67 ms
sample count: 131072
CPU FullFilterTime: 26 ms
Cuda FullFilterTime: 67 ms
sample count: 262144
CPU FullFilterTime: 81 ms
Cuda FullFilterTime: 61 ms
sample count: 524288
CPU FullFilterTime: 159 ms
Cuda FullFilterTime: 73 ms
sample count: 1048576
CPU FullFilterTime: 319 ms
Cuda FullFilterTime: 85 ms
sample count: 2097152
CPU FullFilterTime: 677 ms
Cuda FullFilterTime: 97 ms
sample count: 4194304
CPU FullFilterTime: 1401 ms
Cuda FullFilterTime: 133 ms
sample count: 8388608
CPU FullFilterTime: 2855 ms
Cuda FullFilterTime: 209 ms
sample count: 16777216
CPU FullFilterTime: 5964 ms
Cuda FullFilterTime: 355 ms
sample count: 33554432
CPU FullFilterTime: 11979 ms
Cuda FullFilterTime: 638 ms
sample count: 67108864
CPU FullFilterTime: 31802 ms
Cuda FullFilterTime: 1350 ms