diff --git a/.gitignore b/.gitignore
index 4bf0ea2a..60f79ead 100644
--- a/.gitignore
+++ b/.gitignore
@@ -73,3 +73,7 @@ src/cuda/level2/dwt2d/input.bmp.dwt.r
build-aux
configure
*m4
+
+# CLion
+cmake-build-*/
+.idea/*
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 809d0566..5ce59cbc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.8)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED True)
+set(CMAKE_CUDA_ARCHITECTURES native)
project(Altis LANGUAGES CXX CUDA)
diff --git a/src/cuda/level0/devicememory/DeviceMemory.cu b/src/cuda/level0/devicememory/DeviceMemory.cu
index ba0e2858..64e53973 100644
--- a/src/cuda/level0/devicememory/DeviceMemory.cu
+++ b/src/cuda/level0/devicememory/DeviceMemory.cu
@@ -125,39 +125,39 @@ __device__ int getRand(int seed, int mod);
///
/// Bodun Hu (bodunhu@utexas.edu), 5/19/2020.
///
+/// The texture.
/// An int to process.
/// [in,out] If non-null, the out.
/// The width.
////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void readTexels(int n, float *d_out, int width);
+__global__ void readTexels(cudaTextureObject_t tex, int n, float *d_out, int width);
////////////////////////////////////////////////////////////////////////////////////////////////////
/// Reads texels in cache.
///
/// Bodun Hu (bodunhu@utexas.edu), 5/19/2020.
///
+/// The texture.
/// An int to process.
/// [in,out] If non-null, the out.
////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void readTexelsInCache(int n, float *d_out);
+__global__ void readTexelsInCache(cudaTextureObject_t tex, int n, float *d_out);
////////////////////////////////////////////////////////////////////////////////////////////////////
/// Reads texels random.
///
/// Bodun Hu (bodunhu@utexas.edu), 5/19/2020.
///
+/// The texture.
/// An int to process.
/// [in,out] If non-null, the out.
/// The width.
/// The height.
////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void readTexelsRandom(int n, float *d_out, int width, int height);
-// Texture to use for the benchmarks
-/// The tex a.
-texture texA;
+__global__ void readTexelsRandom(cudaTextureObject_t tex, int n, float *d_out, int width, int height);
// ****************************************************************************
// Function: addBenchmarkSpecOptions (From SHOC)
@@ -456,10 +456,12 @@ void TestTextureMem(ResultDatabase &resultDB, OptionParser &op, double scalet) {
checkCudaErrors(cudaEventCreate(&stop));
// make sure our texture behaves like we want....
- texA.normalized = false;
- texA.addressMode[0] = cudaAddressModeClamp;
- texA.addressMode[1] = cudaAddressModeClamp;
- texA.filterMode = cudaFilterModePoint;
+ cudaTextureDesc texDesc {
+ .addressMode = { cudaAddressModeClamp, cudaAddressModeClamp },
+ .filterMode = cudaFilterModePoint,
+ .normalizedCoords = false
+ };
+ cudaChannelFormatDesc formatDesc = cudaCreateChannelDesc();
for (int j = 0; j < nsizes; j++) {
if(!quiet) {
@@ -499,15 +501,19 @@ void TestTextureMem(ResultDatabase &resultDB, OptionParser &op, double scalet) {
// Allocate a cuda array
cudaArray *cuArray;
- checkCudaErrors(cudaMallocArray(&cuArray, &texA.channelDesc, width, height));
+ checkCudaErrors(cudaMallocArray(&cuArray, &formatDesc, width, height));
// Copy in source data
- //checkCudaErrors(cudaMemcpyToArray(cuArray, 0, 0, h_in, size, cudaMemcpyHostToDevice));
-
checkCudaErrors(cudaMemcpy2DToArray(cuArray, 0, 0, h_in, width, width, height, cudaMemcpyDefault));
- // Bind texture to the array
- checkCudaErrors(cudaBindTextureToArray(texA, cuArray));
+ cudaResourceDesc resource {
+ .resType = cudaResourceTypeArray,
+ .res = {
+ .array = { .array = cuArray }
+ }
+ };
+ cudaTextureObject_t tex;
+ checkCudaErrors(cudaCreateTextureObject(&tex, &resource, &texDesc, nullptr));
for (int p = 0; p < passes; p++) {
// Test 1: Repeated Linear Access
@@ -516,7 +522,7 @@ void TestTextureMem(ResultDatabase &resultDB, OptionParser &op, double scalet) {
checkCudaErrors(cudaEventRecord(start, 0));
// read texels from texture
for (int iter = 0; iter < iterations; iter++) {
- readTexels<<>>(kernelRepFactor, d_out, width);
+ readTexels<<>>(tex, kernelRepFactor, d_out, width);
}
checkCudaErrors(cudaEventRecord(stop, 0));
checkCudaErrors(cudaEventSynchronize(stop));
@@ -540,7 +546,7 @@ void TestTextureMem(ResultDatabase &resultDB, OptionParser &op, double scalet) {
// Test 2 Repeated Cache Access
checkCudaErrors(cudaEventRecord(start, 0));
for (int iter = 0; iter < iterations; iter++) {
- readTexelsInCache<<>>(kernelRepFactor, d_out);
+ readTexelsInCache<<>>(tex, kernelRepFactor, d_out);
}
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
@@ -564,7 +570,7 @@ void TestTextureMem(ResultDatabase &resultDB, OptionParser &op, double scalet) {
// read texels from texture
for (int iter = 0; iter < iterations; iter++) {
- readTexelsRandom<<>>(kernelRepFactor, d_out, width, height);
+ readTexelsRandom<<>>(tex, kernelRepFactor, d_out, width, height);
}
checkCudaErrors(cudaEventRecord(stop, 0));
@@ -585,7 +591,7 @@ void TestTextureMem(ResultDatabase &resultDB, OptionParser &op, double scalet) {
delete[] h_out;
checkCudaErrors(cudaFree(d_out));
checkCudaErrors(cudaFreeArray(cuArray));
- checkCudaErrors(cudaUnbindTexture(texA));
+ checkCudaErrors(cudaDestroyTextureObject(tex));
}
checkCudaErrors(cudaEventDestroy(start));
checkCudaErrors(cudaEventDestroy(stop));
@@ -833,19 +839,20 @@ __global__ void writeLocalMemory(float *output, int size, int repeat) {
///
/// Bodun Hu (bodunhu@utexas.edu), 5/19/2020.
///
+/// The texture.
/// An int to process.
/// [in,out] If non-null, the out.
/// The width.
////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void readTexels(int n, float *d_out, int width) {
+__global__ void readTexels(cudaTextureObject_t tex, int n, float *d_out, int width) {
int idx_x = (blockIdx.x * blockDim.x) + threadIdx.x;
int idx_y = (blockIdx.y * blockDim.y) + threadIdx.y;
int out_idx = idx_y * gridDim.x + idx_x;
float sum = 0.0f;
int width_bits = width - 1;
for (int i = 0; i < n; i++) {
- float4 v = tex2D(texA, float(idx_x), float(idx_y));
+ float4 v = tex2D(tex, float(idx_x), float(idx_y));
idx_x = (idx_x + 1) & width_bits;
sum += v.x;
}
@@ -859,17 +866,18 @@ __global__ void readTexels(int n, float *d_out, int width) {
///
/// Bodun Hu (bodunhu@utexas.edu), 5/19/2020.
///
+/// The texture.
/// An int to process.
/// [in,out] If non-null, the out.
////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void readTexelsInCache(int n, float *d_out) {
+__global__ void readTexelsInCache(cudaTextureObject_t tex, int n, float *d_out) {
int idx_x = (blockIdx.x * blockDim.x) + threadIdx.x;
int idx_y = (blockIdx.y * blockDim.y) + threadIdx.y;
int out_idx = idx_y * gridDim.x + idx_x;
float sum = 0.0f;
for (int i = 0; i < n; i++) {
- float4 v = tex2D(texA, float(idx_x), float(idx_y));
+ float4 v = tex2D(tex, float(idx_x), float(idx_y));
sum += v.x;
}
d_out[out_idx] = sum;
@@ -882,13 +890,14 @@ __global__ void readTexelsInCache(int n, float *d_out) {
///
/// Bodun Hu (bodunhu@utexas.edu), 5/19/2020.
///
+/// The texture.
/// An int to process.
/// [in,out] If non-null, the out.
/// The width.
/// The height.
////////////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void readTexelsRandom(int n, float *d_out, int width, int height) {
+__global__ void readTexelsRandom(cudaTextureObject_t tex, int n, float *d_out, int width, int height) {
int idx_x = (blockIdx.x * blockDim.x) + threadIdx.x;
int idx_y = (blockIdx.y * blockDim.y) + threadIdx.y;
int out_idx = idx_y * gridDim.x + idx_x;
@@ -896,7 +905,7 @@ __global__ void readTexelsRandom(int n, float *d_out, int width, int height) {
int width_bits = width - 1;
int height_bits = height - 1;
for (int i = 0; i < n; i++) {
- float4 v = tex2D(texA, float(idx_x), float(idx_y));
+ float4 v = tex2D(tex, float(idx_x), float(idx_y));
idx_x = (idx_x * 3 + 29) & (width_bits);
idx_y = (idx_y * 5 + 11) & (height_bits);
sum += v.x;