diff --git a/.gitignore b/.gitignore index 4bf0ea2a..60f79ead 100644 --- a/.gitignore +++ b/.gitignore @@ -73,3 +73,7 @@ src/cuda/level2/dwt2d/input.bmp.dwt.r build-aux configure *m4 + +# CLion +cmake-build-*/ +.idea/* \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 809d0566..5ce59cbc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.8) set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED True) +set(CMAKE_CUDA_ARCHITECTURES native) project(Altis LANGUAGES CXX CUDA) diff --git a/src/cuda/level0/devicememory/DeviceMemory.cu b/src/cuda/level0/devicememory/DeviceMemory.cu index ba0e2858..64e53973 100644 --- a/src/cuda/level0/devicememory/DeviceMemory.cu +++ b/src/cuda/level0/devicememory/DeviceMemory.cu @@ -125,39 +125,39 @@ __device__ int getRand(int seed, int mod); /// /// Bodun Hu (bodunhu@utexas.edu), 5/19/2020. /// +/// The texture. /// An int to process. /// [in,out] If non-null, the out. /// The width. //////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void readTexels(int n, float *d_out, int width); +__global__ void readTexels(cudaTextureObject_t tex, int n, float *d_out, int width); //////////////////////////////////////////////////////////////////////////////////////////////////// /// Reads texels in cache. /// /// Bodun Hu (bodunhu@utexas.edu), 5/19/2020. /// +/// The texture. /// An int to process. /// [in,out] If non-null, the out. //////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void readTexelsInCache(int n, float *d_out); +__global__ void readTexelsInCache(cudaTextureObject_t tex, int n, float *d_out); //////////////////////////////////////////////////////////////////////////////////////////////////// /// Reads texels random. /// /// Bodun Hu (bodunhu@utexas.edu), 5/19/2020. /// +/// The texture. /// An int to process. /// [in,out] If non-null, the out. /// The width. /// The height. //////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void readTexelsRandom(int n, float *d_out, int width, int height); -// Texture to use for the benchmarks -/// The tex a. -texture texA; +__global__ void readTexelsRandom(cudaTextureObject_t tex, int n, float *d_out, int width, int height); // **************************************************************************** // Function: addBenchmarkSpecOptions (From SHOC) @@ -456,10 +456,12 @@ void TestTextureMem(ResultDatabase &resultDB, OptionParser &op, double scalet) { checkCudaErrors(cudaEventCreate(&stop)); // make sure our texture behaves like we want.... - texA.normalized = false; - texA.addressMode[0] = cudaAddressModeClamp; - texA.addressMode[1] = cudaAddressModeClamp; - texA.filterMode = cudaFilterModePoint; + cudaTextureDesc texDesc { + .addressMode = { cudaAddressModeClamp, cudaAddressModeClamp }, + .filterMode = cudaFilterModePoint, + .normalizedCoords = false + }; + cudaChannelFormatDesc formatDesc = cudaCreateChannelDesc(); for (int j = 0; j < nsizes; j++) { if(!quiet) { @@ -499,15 +501,19 @@ void TestTextureMem(ResultDatabase &resultDB, OptionParser &op, double scalet) { // Allocate a cuda array cudaArray *cuArray; - checkCudaErrors(cudaMallocArray(&cuArray, &texA.channelDesc, width, height)); + checkCudaErrors(cudaMallocArray(&cuArray, &formatDesc, width, height)); // Copy in source data - //checkCudaErrors(cudaMemcpyToArray(cuArray, 0, 0, h_in, size, cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMemcpy2DToArray(cuArray, 0, 0, h_in, width, width, height, cudaMemcpyDefault)); - // Bind texture to the array - checkCudaErrors(cudaBindTextureToArray(texA, cuArray)); + cudaResourceDesc resource { + .resType = cudaResourceTypeArray, + .res = { + .array = { .array = cuArray } + } + }; + cudaTextureObject_t tex; + checkCudaErrors(cudaCreateTextureObject(&tex, &resource, &texDesc, nullptr)); for (int p = 0; p < passes; p++) { // Test 1: Repeated Linear Access @@ -516,7 +522,7 @@ void TestTextureMem(ResultDatabase &resultDB, OptionParser &op, double scalet) { checkCudaErrors(cudaEventRecord(start, 0)); // read texels from texture for (int iter = 0; iter < iterations; iter++) { - readTexels<<>>(kernelRepFactor, d_out, width); + readTexels<<>>(tex, kernelRepFactor, d_out, width); } checkCudaErrors(cudaEventRecord(stop, 0)); checkCudaErrors(cudaEventSynchronize(stop)); @@ -540,7 +546,7 @@ void TestTextureMem(ResultDatabase &resultDB, OptionParser &op, double scalet) { // Test 2 Repeated Cache Access checkCudaErrors(cudaEventRecord(start, 0)); for (int iter = 0; iter < iterations; iter++) { - readTexelsInCache<<>>(kernelRepFactor, d_out); + readTexelsInCache<<>>(tex, kernelRepFactor, d_out); } cudaEventRecord(stop, 0); cudaEventSynchronize(stop); @@ -564,7 +570,7 @@ void TestTextureMem(ResultDatabase &resultDB, OptionParser &op, double scalet) { // read texels from texture for (int iter = 0; iter < iterations; iter++) { - readTexelsRandom<<>>(kernelRepFactor, d_out, width, height); + readTexelsRandom<<>>(tex, kernelRepFactor, d_out, width, height); } checkCudaErrors(cudaEventRecord(stop, 0)); @@ -585,7 +591,7 @@ void TestTextureMem(ResultDatabase &resultDB, OptionParser &op, double scalet) { delete[] h_out; checkCudaErrors(cudaFree(d_out)); checkCudaErrors(cudaFreeArray(cuArray)); - checkCudaErrors(cudaUnbindTexture(texA)); + checkCudaErrors(cudaDestroyTextureObject(tex)); } checkCudaErrors(cudaEventDestroy(start)); checkCudaErrors(cudaEventDestroy(stop)); @@ -833,19 +839,20 @@ __global__ void writeLocalMemory(float *output, int size, int repeat) { /// /// Bodun Hu (bodunhu@utexas.edu), 5/19/2020. /// +/// The texture. /// An int to process. /// [in,out] If non-null, the out. /// The width. //////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void readTexels(int n, float *d_out, int width) { +__global__ void readTexels(cudaTextureObject_t tex, int n, float *d_out, int width) { int idx_x = (blockIdx.x * blockDim.x) + threadIdx.x; int idx_y = (blockIdx.y * blockDim.y) + threadIdx.y; int out_idx = idx_y * gridDim.x + idx_x; float sum = 0.0f; int width_bits = width - 1; for (int i = 0; i < n; i++) { - float4 v = tex2D(texA, float(idx_x), float(idx_y)); + float4 v = tex2D(tex, float(idx_x), float(idx_y)); idx_x = (idx_x + 1) & width_bits; sum += v.x; } @@ -859,17 +866,18 @@ __global__ void readTexels(int n, float *d_out, int width) { /// /// Bodun Hu (bodunhu@utexas.edu), 5/19/2020. /// +/// The texture. /// An int to process. /// [in,out] If non-null, the out. //////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void readTexelsInCache(int n, float *d_out) { +__global__ void readTexelsInCache(cudaTextureObject_t tex, int n, float *d_out) { int idx_x = (blockIdx.x * blockDim.x) + threadIdx.x; int idx_y = (blockIdx.y * blockDim.y) + threadIdx.y; int out_idx = idx_y * gridDim.x + idx_x; float sum = 0.0f; for (int i = 0; i < n; i++) { - float4 v = tex2D(texA, float(idx_x), float(idx_y)); + float4 v = tex2D(tex, float(idx_x), float(idx_y)); sum += v.x; } d_out[out_idx] = sum; @@ -882,13 +890,14 @@ __global__ void readTexelsInCache(int n, float *d_out) { /// /// Bodun Hu (bodunhu@utexas.edu), 5/19/2020. /// +/// The texture. /// An int to process. /// [in,out] If non-null, the out. /// The width. /// The height. //////////////////////////////////////////////////////////////////////////////////////////////////// -__global__ void readTexelsRandom(int n, float *d_out, int width, int height) { +__global__ void readTexelsRandom(cudaTextureObject_t tex, int n, float *d_out, int width, int height) { int idx_x = (blockIdx.x * blockDim.x) + threadIdx.x; int idx_y = (blockIdx.y * blockDim.y) + threadIdx.y; int out_idx = idx_y * gridDim.x + idx_x; @@ -896,7 +905,7 @@ __global__ void readTexelsRandom(int n, float *d_out, int width, int height) { int width_bits = width - 1; int height_bits = height - 1; for (int i = 0; i < n; i++) { - float4 v = tex2D(texA, float(idx_x), float(idx_y)); + float4 v = tex2D(tex, float(idx_x), float(idx_y)); idx_x = (idx_x * 3 + 29) & (width_bits); idx_y = (idx_y * 5 + 11) & (height_bits); sum += v.x;