import pyopencl as cl
import numpy as np
import numpy.linalg as la
import pyopencl.array
import pyopencl.clrandom
Change the code below to:
Benchmark $1\times 1$ workgroups against $16\times 16$ workgroups
time()
from the time
module. (i.e. import time
)queue.finish()
.ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
a = np.random.rand(1024, 1024).astype(np.float32)
prg = cl.Program(ctx, """
__kernel void twice(__global float *a)
{
int gid0 = get_global_id(0);
int gid1 = get_global_id(1);
int i = gid1 * 1024 + gid0;
a[i] = 2*a[i];
}
""").build()
twice = prg.twice
a_dev = cl.array.to_device(queue, a)
twice(queue, a_dev.shape, None, a_dev.data)
print(la.norm(a_dev.get() - 2*a), la.norm(a))