From: miha-q <> Date: Thu, 7 Mar 2024 22:31:47 +0000 (-0500) Subject: Thu Mar 7 05:31:47 PM EST 2024 X-Git-Url: http://www.foleosoft.com/?a=commitdiff_plain;h=1bf899b14f62d0b18ba7d69014bbbfe7c0cfdacb;p=QAnsel.git Thu Mar 7 05:31:47 PM EST 2024 --- diff --git a/src/.kernel.tmp.1 b/src/.kernel.tmp.1 index d4b4f0a..cf0af43 100644 --- a/src/.kernel.tmp.1 +++ b/src/.kernel.tmp.1 @@ -144,4 +144,5 @@ __kernel void kernel_knk_2x2 } } + } diff --git a/src/.kernel.tmp.2 b/src/.kernel.tmp.2 index 55338c1..b1fc0e0 100644 Binary files a/src/.kernel.tmp.2 and b/src/.kernel.tmp.2 differ diff --git a/src/QAnsel.c b/src/QAnsel.c index 773edf0..4743458 100644 --- a/src/QAnsel.c +++ b/src/QAnsel.c @@ -263,7 +263,7 @@ void qansel_instruction(cpx_mtx_t* stateVector, unsigned char qubitCount, QInstr } else { - cpx_mtx_knk(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols); + cpx_mtx_knk_2x2(tmp.ptr, filter.ptr, gate.ptr, filter.rows, filter.cols, gate.rows, gate.cols); } #endif diff --git a/src/complex.c b/src/complex.c index b1d783f..0ab2a5c 100644 --- a/src/complex.c +++ b/src/complex.c @@ -144,6 +144,20 @@ void cpx_mtx_knk(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsA, in } } + +void cpx_mtx_knk_2x2(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsA, int rowsB, int colsB) +{ + int rowsR = rowsA * rowsB; + int colsR = colsA * colsB; + for (int i = 0; i < rowsR / 2; i++) + { + for (int j = 0; j < colsR / 2; j++) + { + kernel_knk_2x2(ptrR, ptrA, rowsA, colsA, ptrB[0], ptrB[1], ptrB[2], ptrB[3], ptrB[4], ptrB[5], ptrB[6], ptrB[7], i, j); + } + } +} + /*-----------------------------------------------------------------------------------*/ /*THREADED*/ /*-----------------------------------------------------------------------------------*/ @@ -621,7 +635,7 @@ void cpx_mtx_knk_metal_2x2(float* ptrR, float* ptrA, float* ptrB, int rowsA, int err = clSetKernelArg(kernel,11, sizeof(float), &gate7); gpuerr(clSetKernelArg); //Run the program - err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 1, NULL, (size_t[]){rowsR / 2, colsR / 2}, NULL, 0, NULL, NULL); + err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 2, NULL, (size_t[]){rowsR / 2, colsR / 2}, NULL, 0, NULL, NULL); gpuerr(clEnqueueNDRangeKernel); //Wait for completion diff --git a/src/kernel.cl b/src/kernel.cl index a7f2cc5..cf07f6c 100644 --- a/src/kernel.cl +++ b/src/kernel.cl @@ -154,4 +154,5 @@ __kernel void kernel_knk_2x2 } } + } \ No newline at end of file diff --git a/src/kernel_cpu.cl b/src/kernel_cpu.cl index ca2a9a8..54a0975 100644 --- a/src/kernel_cpu.cl +++ b/src/kernel_cpu.cl @@ -149,4 +149,5 @@ void kernel_knk_2x2 } } + } diff --git a/src/kernel_gpu.cl b/src/kernel_gpu.cl index 5525da9..9990611 100644 --- a/src/kernel_gpu.cl +++ b/src/kernel_gpu.cl @@ -354,6 +354,6 @@ unsigned char kernel_gpu[] = { 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, - 0x7d, 0x0a, 0x7d, 0x00 + 0x7d, 0x0a, 0x0a, 0x7d, 0x00 }; -unsigned int kernel_gpu_len = 4264; +unsigned int kernel_gpu_len = 4265;