void cpx_mtx_knk(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsA, int rowsB, int colsB)
{
int rowsR = rowsA * rowsB;
- int colsR = colsA * colsB;
for (int i = 0; i < rowsR; i++)
{
GPU_GLOBAL_ID_0 = i;
- kernel_knk(ptrR, ptrA, ptrB, rowsR, colsR, rowsA, colsA, rowsB, colsB);
+ kernel_knk(ptrR, ptrA, ptrB, rowsA, colsA, rowsB, colsB);
}
}
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&memR); gpuerr(clSetKernelArg);
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); gpuerr(clSetKernelArg);
err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 3, sizeof(int), &rowsR); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 4, sizeof(int), &colsR); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 5, sizeof(int), &rowsA); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 6, sizeof(int), &colsA); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 7, sizeof(int), &rowsB); gpuerr(clSetKernelArg);
- err = clSetKernelArg(kernel, 8, sizeof(int), &colsB); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 4, sizeof(int), &colsA); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 5, sizeof(int), &rowsB); gpuerr(clSetKernelArg);
+ err = clSetKernelArg(kernel, 6, sizeof(int), &colsB); gpuerr(clSetKernelArg);
//Run the program
err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 1, NULL, (size_t[]){rowsR}, NULL, 0, NULL, NULL);