#!/usr/bin/env python3 from sympy import symbols, init_printing, simplify, diff from sympy.utilities.codegen import codegen import numpy as np import re import os import pyopencl as cl # https://docs.sympy.org/latest/tutorials/intro-tutorial/intro.html init_printing(use_unicode=True) class GradSolver(): def __init__(self, eq, variables, ranges): self.eq = eq self.variables = variables self.ranges = ranges def prepare(self): ## (self.eq - 0) ** 2 self.cost = self.eq ** 2 self.deq = [simplify(diff(self.cost, v)) for v in self.variables] print("eq:", self.eq) print("cost:", self.cost) print("dcost/dv:", self.deq, self.variables) self.x0 = np.array([0, 0]) self.pgrad = None def step(self, gamma=0.1, beta=0.9): x0 = self.x0 fx0 = self.eq.evalf(subs={"x":x0[0], "y": x0[1]}) print("x0: ", x0, "f(x0):", fx0) grad = np.array([df.evalf(subs={"x": x0[0], "y": x0[1]}) for df in self.deq]) print("grad: ", grad) # implementation of momentum if self.pgrad is None: self.pgrad = grad grad = beta * self.pgrad + (1-beta) * grad self.pgrad = grad x1 = x0 - gamma * grad for i in range(len(x1)): if x1[i] < self.ranges[i][0]: x1[i] = self.ranges[i][0] elif x1[i] > self.ranges[i][1]: x1[i] = self.ranges[i][1] self.x0 = x1 return self.eq.evalf(subs={"x": x1[0], "y": x1[1]}) class OpenCLGradSolver(GradSolver): def convert_opencl(self, code): pattern = r"^#include.*$" code = re.sub(pattern, '', code, flags=re.MULTILINE) code = code.replace("double", "float") return code def prepare(self, nthreads=32): super().prepare() self.nthreads = nthreads gen = codegen(("eq", self.eq), "C", "eq", header=False) code = self.convert_opencl(gen[0][1]) for f, v in zip(self.deq, self.variables): gen = codegen(("deq_d" + v.name, f), "C", "deq", header=False) code += self.convert_opencl(gen[0][1]) self.kernel_code = code self.kernel_code += "#define NVAR " + str(len(self.variables)) + "\n" kernel_file = os.path.dirname(os.path.realpath('__file__')) + "/gradient.cl" with open(kernel_file, "r") as f: self.kernel_code += f.read() self.kernel_code += "void eval_deq(vec grad, __global vec x0){\n" for i, v in zip(range(len(self.variables)), self.variables): self.kernel_code += " grad[%d] = deq_d%s("%(i, v.name) self.kernel_code += ','.join(["x0[%d]"%(j,) for j in range(len(self.variables))]) self.kernel_code += ");\n" self.kernel_code += "}\n" self.kernel_code += "float eval_eq(__global const vec x0){\n" self.kernel_code += " return eq(" self.kernel_code += ','.join(["x0[%d]"%(j,) for j in range(len(self.variables))]) self.kernel_code += ");\n" self.kernel_code += "}\n" print(self.kernel_code) # now with the gore opencl code self.ctx = cl.create_some_context(answers=[0]) self.queue = cl.CommandQueue(self.ctx) self.prg = cl.Program(self.ctx, self.kernel_code).build() mf = cl.mem_flags self.x0 = np.random.rand(nthreads, len(self.variables)).astype(np.float32) #print("x0", self.x0) self.pgrad = np.ndarray((nthreads, len(self.variables)), dtype=np.float32) self.result = np.ndarray(shape=nthreads, dtype=np.float32) self.x0_cl= cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.x0) self.pgrad_cl = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.pgrad) self.result_cl = cl.Buffer(self.ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=self.result) self.prg.init_pgrad(self.queue, (self.nthreads,), None, self.x0_cl, self.pgrad_cl) cl.enqueue_copy(self.queue, self.pgrad, self.pgrad_cl) def step(self, gamma=0.1, beta=0.9): self.prg.step(self.queue, (self.nthreads,), None, cl.cltypes.float(gamma), cl.cltypes.float(beta), self.x0_cl, self.pgrad_cl) cl.enqueue_copy(self.queue, self.x0, self.x0_cl) cl.enqueue_copy(self.queue, self.pgrad, self.pgrad_cl) #print("cl x0 ", self.x0) #print("cl pgrad ", self.pgrad) self.prg.eval(self.queue, (self.nthreads,) , None, self.x0_cl, self.result_cl) cl.enqueue_copy(self.queue, self.result, self.result_cl) #print("cl res", self.result) return sum(self.result) def main(): x, y, z = symbols("x y z") eq = 3 * x**3 - 2* x**2 - 3*x + 1 + 2*y**2 - 2 * x * y + 2 * x**2 * y + x * y * z solver = OpenCLGradSolver(eq, (x, y, z), ((-100, 100), (-100, 100))) #solver2 = GradSolver(eq, (x, y), ((-100, 100), (-100, 100))) solver.prepare(nthreads=64) #solver2.prepare() # return for i in range(3000): #f2 = solver2.step(gamma = 0.01) f = solver.step(gamma = 0.01) #print("---") if abs(f) < 1e-8: print("Converged in ", i, "steps") break else: print("Didn't converge in %d steps"%(i,)) print("x0:", solver.x0) print("pgrad:", solver.pgrad) print("result:", solver.result) if __name__ == "__main__": main()