tmp.cols = filter.cols * gate.cols;
tmp.ptr = malloc(tmp.rows * (tmp.cols * 2) * sizeof(float));
-
- printf("%ix%i (knk)\n", tmp.rows, tmp.cols);
- unsigned long int us1, us2;
- us1 = get_time();
- cpx_mtx_knk_metal(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
- us2 = get_time();
- printf("\tMetal: %lu\n", us2 - us1);
- us1 = get_time();
- cpx_mtx_knk_threads(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
- us2 = get_time();
- printf("\tThreads: %lu\n", us2 - us1);
- us1 = get_time();
- cpx_mtx_knk(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
- us2 = get_time();
- printf("\tBare: %lu\n", us2 - us1);
-
#ifdef SPEED_TEST
printf("%ix%i (knk)\n", tmp.rows, tmp.cols);
unsigned long int us1, us2;
us2 = get_time();
printf("\tBare: %lu\n", us2 - us1);
#else
+ if (USE_GPU && 0)
+ {
+ cpx_mtx_knk_metal(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
+ }
+ else if (USE_THREADS)
+ {
+ cpx_mtx_knk_threads(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
+ }
+ else
+ {
+ cpx_mtx_knk(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols);
+ }
#endif
void main(int argc, char** argv)
{
USE_GPU = cpx_mtx_begin();
- USE_GPU = 0;
- USE_THREADS = 0;
RANDOM_FILE = fopen("/dev/TrueRNG0", "r");
if (!RANDOM_FILE) RANDOM_FILE = fopen("/dev/random", "r");
err = clSetKernelArg(kernel, 6, sizeof(int), &colsB); gpuerr(clSetKernelArg);
//Run the program
- err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 1, NULL, (size_t[]){rowsR}, NULL, 0, NULL, NULL);
+ err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 2, NULL, (size_t[]){rowsR, colsR}, NULL, 0, NULL, NULL);
gpuerr(clEnqueueNDRangeKernel);
//Wait for completion