안녕하세요 파이썬에서 pycuda로 juliaSet을 만들어 보고 있습니다.
import ctypes
import pycuda.autoinit
import numpy as np
import cv2
import pycuda.driver as cuda
import numpy
from pycuda.compiler import SourceModule
bmp = np.zeros((1000,1000), dtype=np.float32)
ptr = bmp.ctypes.data_as(ctypes.c_void_p)
bmp = bmp.astype(numpy.float32)
a_gpu = cuda.mem_alloc(bmp.nbytes)
cuda.memcpy_htod(a_gpu, bmp)
mod = SourceModule("""
struct cuComplex {
float r; float i;
__device__ cuComplex(float a, float b) : r(a),i(b) {}
__device__ float magnitude2(void) { return r * r + i * i; }
__device__ cuComplex operator* (const cuComplex& a){
return cuComplex(r * a.r - i * a.i, i * a.r + r * a.i);}
__device__ cuComplex operator+ (const cuComplex& a){
return cuComplex(r + a.r, i + a.i);}
};
__device__ int julia(int x, int y){
const float scale = 1.5;
float jx = scale * (float)(1000/2-x)/(1000/2);
float jy = scale * (float)(1000/2-y)/(1000/2);
cuComplex c(-0.8,0.156);
cuComplex a(jx,jy);
for (int i = 0;i<200;i++){
a = a * a + c;
if(a.magnitude2()>1000)
return 0;
}
return 1;
}
__global__ void juliaSet(float *ptr)
{
int x = blockIdx.x;
int y = blockIdx.y;
int offset = x + y * gridDim.x;
int juliaValue = julia(x,y);
ptr[offset*4+0]=255*juliaValue;
ptr[offset*4+1]=0;
ptr[offset*4+2]=0;
ptr[offset*4+3]=255;
}
""")
func = mod.get_function("juliaSet")
bmp_kernel = numpy.empty_like(bmp)
func(a_gpu,block=(1000,1000,1))
cuda.memcpy_dtoh(bmp_kernel, a_gpu)
cv2.imshow('bmp', bmp)
cv2.waitKey()
이런식으로 코딩을 했는데 오류가
pycuda._driver.LogicError: cuFuncSetBlockShape failed: invalid argument
이렇게 뜹니다. 혹시 고쳐야 할 부분이 있을까요??