Loopy: Reductions

Setup code

In [1]:
import numpy as np
import pyopencl as cl
import pyopencl.array
import pyopencl.clrandom
import loopy as lp
In [2]:
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
In [3]:
n = 1024
a = cl.clrandom.rand(queue, (n, n), dtype=np.float32)
b = cl.clrandom.rand(queue, (n, n), dtype=np.float32)

Capturing matrix-matrix multiplication

In [4]:
knl = lp.make_kernel(
    "{[i,j,k]: 0<=i,j,k<n}",
    "c[i, j] = sum(k, a[i, k]*b[k, j])"
    )
In [5]:
knl = lp.set_options(knl, write_cl=True)
evt, _ = knl(queue, a=a, b=b)
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))

__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global float const *restrict a, __global float const *restrict b, __global float *restrict c, int const n)
{
  float acc_k;

  for (int j = 0; j <= -1 + n; ++j)
    for (int i = 0; i <= -1 + n; ++i)
    {
      acc_k = 0.0f;
      for (int k = 0; k <= -1 + n; ++k)
        acc_k = acc_k + a[n * i + k] * b[n * k + j];
      c[n * i + j] = acc_k;
    }
}
/home/andreas/src/loopy/loopy/compiled.py:841: LoopyWarning: kernel scheduling was ambiguous--more than one schedule found, ignoring
  kernel = get_one_scheduled_kernel(kernel)
/home/andreas/src/loopy/loopy/diagnostic.py:60: LoopyAdvisory: No device parameter was passed to the PyOpenCLTarget. Perhaps you want to pass a device to benefit from additional checking. (add 'no_device_in_pre_codegen_checks' to silenced_warnings kernel argument to disable)
  warn(text, type)
In [ ]: