From: miha-q <>
Date: Mon, 4 Mar 2024 17:12:35 +0000 (-0500)
Subject: Mon Mar  4 12:12:35 PM EST 2024
X-Git-Url: http://www.foleosoft.com/?a=commitdiff_plain;h=d06fd9e2dc741fb7270941a5b85644e1b59e920d;p=QAnsel.git

Mon Mar  4 12:12:35 PM EST 2024
---

diff --git a/src/complex.c b/src/complex.c
index f616e8d..d6e5610 100644
--- a/src/complex.c
+++ b/src/complex.c
@@ -236,10 +236,8 @@ void cpx_mtx_clean()
 	free(cpx_mtx_cache);
 }
 
-void cpx_mtx_dot_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsB, int shared)
+void cpx_mtx_dot_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int colsA, int rowsB, int colsB)
 {
-    int colsA = shared;
-    int rowsB = shared;
     int rowsR = rowsA;
     int colsR = colsB;
 
@@ -297,8 +295,9 @@ void cpx_mtx_dot_metal(float* ptrR, float* ptrA, float* ptrB, int rowsA, int col
 	err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&memA); gpuerr(clSetKernelArg);
 	err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&memB); gpuerr(clSetKernelArg);
 	err = clSetKernelArg(kernel, 3, sizeof(int), &rowsA); gpuerr(clSetKernelArg);
-	err = clSetKernelArg(kernel, 4, sizeof(int), &colsB); gpuerr(clSetKernelArg);
-	err = clSetKernelArg(kernel, 5, sizeof(int), &shared); gpuerr(clSetKernelArg);
+	err = clSetKernelArg(kernel, 4, sizeof(int), &colsA); gpuerr(clSetKernelArg);
+	err = clSetKernelArg(kernel, 5, sizeof(int), &rowsB); gpuerr(clSetKernelArg);
+	err = clSetKernelArg(kernel, 6, sizeof(int), &colsB); gpuerr(clSetKernelArg);
 
 	//Run the program
 	err = clEnqueueNDRangeKernel(cpx_mtx_command_queue, kernel, 2, NULL, (size_t[]){rowsR, colsR}, NULL, 0, NULL, NULL);
diff --git a/src/kernel.cl b/src/kernel.cl
index f72d87e..0d9c69a 100644
--- a/src/kernel.cl
+++ b/src/kernel.cl
@@ -4,12 +4,11 @@ __kernel void kernel_dot
     __global float* ptrA,
     __global float* ptrB,
     const int rowsA,
-    const int colsB,
-    const int shared
+    const int colsA,
+    const int rowsB,
+    const int colsB
 )
 {
-    const int colsA = shared;
-    const int rowsB = shared;
     const int rowsR = rowsA;
     const int colsR = colsB;
     const int rowR = get_global_id(0);
@@ -20,7 +19,7 @@ __kernel void kernel_dot
 
     const int posR = rowR * (colsR * 2) + (colR * 2);
 
-    for (int i = 0; i < shared; i++)
+    for (int i = 0; i < colsA; i++)
     {
         int posA = rowR * (colsA * 2) + (i * 2);
         int posB = i * (colsB * 2) + (colR * 2);