from numba import cuda
import cupy as cp
from utils import compute_lcm

with open('epgpu.cu') as f:
    code = f.read()

kers = ('gpu_egyensulyi', )
ep_pontok_module = cp.RawModule(code=code, options=('--std=c++11',), name_expressions=kers)
fun = ep_pontok_module.get_function(kers[0])

def start_kernel(Cx, Cy, Dx, Dy, Dz, v, w):
    print(f"Cnt: {Cx.size}x{Dx.size}={Cx.size*Dx.size}")
    print("Res size (byte): ", Cx.size*Dx.size*4*4)
    lcm = compute_lcm(Cx.size, Dx.size)
    #print(Cx.size, ",", Cy.size, ",", Dx.size, ",", Dy.size, ",", Dz.size)
    egyensulyi_mtx = cp.zeros((Cx.size*Dx.size, 4, 4), dtype=cp.int8)
    numBlock = int((Cx.size*Dx.size + 256 - 1) / 256)
    fun((numBlock,), (256,), (v, w, Cx, Cy, Dx, Dy, Dz, Cx.size, Dx.size, lcm, egyensulyi_mtx))

    return egyensulyi_mtx
