PyOpenCL: An exercise

In [1]:
import pyopencl as cl
import numpy as np
import numpy.linalg as la
import pyopencl.array
import pyopencl.clrandom

Change the code below to:

  • Compute $c_i = a_ib_i$
  • Use work groups of $16\times 16$ items
  • Benchmark $1\times 1$ workgroups against $16\times 16$ workgroups

    • Use time() from the time module. (i.e. import time)
    • Use queue.finish().
In [2]:
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
In [3]:
a = np.random.rand(1024, 1024).astype(np.float32)
In [4]:
prg = cl.Program(ctx, """
    __kernel void twice(__global float *a)
    {
      int gid0 = get_global_id(0);
      int gid1 = get_global_id(1);
      int i = gid1 * 1024 + gid0;
      a[i] = 2*a[i];
    }
    """).build()
twice = prg.twice
In [5]:
a_dev = cl.array.to_device(queue, a)
twice(queue, a_dev.shape, None, a_dev.data)
Out[5]:
<pyopencl.cffi_cl.Event at 0x7f679a3145f8>
In [6]:
print(la.norm(a_dev.get() - 2*a), la.norm(a))
0.0 591.347
In [ ]: