From d676a22a34ee549587e105da7bb1c43e562e0a79 Mon Sep 17 00:00:00 2001 From: psychocrypt Date: Mon, 3 Apr 2017 21:34:43 +0200 Subject: [PATCH] fix condition `global_worker_size` needs to be a multiple of `local_worker_size`, this is not always the case in the current code. Remove the bit magic with a easy to read `ceil`. --- amd_gpu/gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/amd_gpu/gpu.c b/amd_gpu/gpu.c index 7571e5d..17c62db 100644 --- a/amd_gpu/gpu.c +++ b/amd_gpu/gpu.c @@ -675,7 +675,7 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput) return(ERR_OCL_API); } - BranchNonces[i] += BranchNonces[i] + (w_size - (BranchNonces[i] & (w_size - 1))); + BranchNonces[i] = ((size_t)ceil( (double)BranchNonces[i] / (double)w_size) ) * w_size; if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[i + 3], 1, &ctx->Nonce, BranchNonces + i, &w_size, 0, NULL, NULL)) != CL_SUCCESS) { printer_print_msg("Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), i + 3);