diff --git a/Makefile b/Makefile index 7702b13..a21ea78 100644 --- a/Makefile +++ b/Makefile @@ -1,2 +1,7 @@ +.PHONY: hello collatz + +collatz: collatz.cpp + clang++ -o collatz collatz.cpp -lOpenCL + hello: main.cpp g++ -o hello main.cpp -lOpenCL diff --git a/collatz-kernel.cl b/collatz-kernel.cl new file mode 100644 index 0000000..c49bdb8 --- /dev/null +++ b/collatz-kernel.cl @@ -0,0 +1,21 @@ +#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable + +__kernel void collatz(__global unsigned int* inputNums, + __global unsigned int* triesArr) { + int N = get_global_id(1) * get_global_size(0) + get_global_id(0); + int val = N + 1; + int tries = 0; + + inputNums[N] = val; + + while(val != 1) { + if(val % 2 == 0) { + val = val / 2; + } else { + val = 3 * val + 1; + } + tries++; + } + + triesArr[N] = tries; +} diff --git a/collatz.cpp b/collatz.cpp new file mode 100644 index 0000000..134bb8a --- /dev/null +++ b/collatz.cpp @@ -0,0 +1,101 @@ +#define CL_TARGET_OPENCL_VERSION 120 + +#include +#include +#include +#include +#include +#include +#include +#include + +const std::string hw("Hello World!"); + +inline void checkErr(cl_int err, const char* name) { + if(err != CL_SUCCESS) { + std::cerr << "ERROR: " << name << " (" << err << ")" << std::endl; + exit(EXIT_FAILURE); + } +} + +void initMatrix(float *mat, int size) { + for(int i = 0; i < size; i++) { + mat[i] = rand() % 50; + } +} + +int main(void) { + cl_int err; + + std::vector platformList; + cl::Platform::get(&platformList); + checkErr(platformList.size() != 0 ? CL_SUCCESS : -1, "cl::Platform::get"); + + std::cerr << "Number of platforms: " << platformList.size() << std::endl; + + for(int i = 0; i < platformList.size(); i++) { + std::string platformVendor; + platformList[i].getInfo((cl_platform_info)CL_PLATFORM_VENDOR, &platformVendor); + std::cerr << "Platform #" << i << " Vendor: " << platformVendor << std::endl; + std::string platformName; + platformList[i].getInfo((cl_platform_info)CL_PLATFORM_NAME, &platformName); + std::cerr << "Platform #" << i << " Name: " << platformName << std::endl; + } + + cl_context_properties cprops[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(platformList[0])(), 0 }; + cl::Context context(CL_DEVICE_TYPE_GPU, cprops, NULL, NULL, &err); + checkErr(err, "Context::Context()"); + + // Matrices as seen on OpenCL device + cl::Buffer inputNums_device; + cl::Buffer tries_device; + + // Host side allocations + const unsigned int NUM_ITEMS = 1024*1024; + + size_t arrSize = NUM_ITEMS * sizeof(int); + + unsigned int *inputNums_host = (unsigned int*)malloc(arrSize); + unsigned int *tries_host = (unsigned int*)malloc(arrSize); + + std::vector devices; + devices = context.getInfo(); + checkErr(devices.size() > 0 ? CL_SUCCESS : -1, "devices.size() > 0"); + + std::ifstream file("collatz-kernel.cl"); + checkErr(file.is_open() ? CL_SUCCESS : -1, "collatz-kernel.cl"); + + std::string programSourceString(std::istreambuf_iterator(file), (std::istreambuf_iterator())); + cl::Program::Sources programSource(1, std::make_pair(programSourceString.c_str(), programSourceString.length() + 1)); + cl::Program program(context, programSource); + err = program.build(devices, ""); + checkErr(err, "Program::Build()"); + + cl::Kernel kernel(program, "collatz", &err); + checkErr(err, "Kernel::Kernel()"); + + inputNums_device = cl::Buffer(context, CL_MEM_WRITE_ONLY, arrSize, NULL, &err); + tries_device = cl::Buffer(context, CL_MEM_WRITE_ONLY, arrSize, NULL, &err); + + size_t localWorkSize[2], globalWorkSize[2]; + + err = kernel.setArg(0, inputNums_device); + err = kernel.setArg(1, tries_device); + localWorkSize[0] = 16; + localWorkSize[1] = 16; + globalWorkSize[0] = 1024; + globalWorkSize[1] = 1024; + + cl::CommandQueue queue(context, devices[0], 0, &err); + checkErr(err, "CommandQueue::CommandQueue()"); + cl::Event event; + err = queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(1024, 1024), cl::NDRange(16, 16), NULL, &event); + checkErr(err, "CommandQueue::enqueueNDRangeKernel()"); + + event.wait(); + err = queue.enqueueReadBuffer(tries_device, CL_TRUE, 0, arrSize, tries_host); + err = queue.enqueueReadBuffer(inputNums_device, CL_TRUE, 0, arrSize, inputNums_host); + checkErr(err, "CommandQueue::enqueueReadBuffer()"); + + return EXIT_SUCCESS; +}