Something More for Research

Explorer of Research #HEMBAD

CUDA Thread Model Code


NVIDIA CUDA Thread Model

Sometimes it can be a bit tricky to figure out the global (unique) thread index,

especially if you are working with multi-dimensional grids of multi-dimensional
blocks of threads. I could not really find a simple cheat-sheet that would
demonstrate what exactly you need to do to calculate a global thread index for
every configuration you might need to use. I know that with a little effort anyone
can figure it out but I thought I would share some of my code with you to make
your life easier. At the end of the day, sharing is caring 🙂

which you can compile with nvcc 
arch=sm_20

</pre>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <conio.h>
#include <stdio.h>

//device functions
__device__ int getGlobalIdx_1D_1D()
{
return blockIdx.x *blockDim.x + threadIdx.x;
}

__device__ int getGlobalIdx_1D_2D()
{
return blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
}

__device__ int getGlobalIdx_1D_3D()
{
return blockIdx.x * blockDim.x * blockDim.y * blockDim.z + threadIdx.z * blockDim.y * blockDim.x + threadIdx.y * blockDim.x + threadIdx.x;
}

__device__ int getGlobalIdx_2D_1D()
{
int blockId = blockIdx.y * gridDim.x
+ blockIdx.x;

int threadId = blockId * blockDim.x + threadIdx.x;

return threadId;
}

__device__ int getGlobalIdx_2D_2D()
{
int blockId = blockIdx.x
+ blockIdx.y * gridDim.x;

int threadId = blockId * (blockDim.x * blockDim.y)
+ (threadIdx.y * blockDim.x)
+ threadIdx.x;

return threadId;
}

__device__ int getGlobalIdx_2D_3D()
{
int blockId = blockIdx.x
+ blockIdx.y * gridDim.x;

int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
+ (threadIdx.z * (blockDim.x * blockDim.y))
+ (threadIdx.y * blockDim.x)
+ threadIdx.x;

return threadId;
}

__device__ int getGlobalIdx_3D_1D()
{
int blockId = blockIdx.x
+ blockIdx.y * gridDim.x
+ gridDim.x * gridDim.y * blockIdx.z;

int threadId = blockId * blockDim.x + threadIdx.x;

return threadId;
}

__device__ int getGlobalIdx_3D_2D()
{
int blockId = blockIdx.x
+ blockIdx.y * gridDim.x
+ gridDim.x * gridDim.y * blockIdx.z;

int threadId = blockId * (blockDim.x * blockDim.y)
+ (threadIdx.y * blockDim.x)
+ threadIdx.x;

return threadId;
}

__device__ int getGlobalIdx_3D_3D()
{
int blockId = blockIdx.x
+ blockIdx.y * gridDim.x
+ gridDim.x * gridDim.y * blockIdx.z;

int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
+ (threadIdx.z * (blockDim.x * blockDim.y))
+ (threadIdx.y * blockDim.x)
+ threadIdx.x;

return threadId;
}

//kernels
__global__ void kernel_1D_1D()
{
printf("Local thread ID: %i Global thread ID: %i\n", threadIdx.x, getGlobalIdx_1D_1D());
printf("threadIdx:(%d, %d, %d) blockIdx:(%d, %d, %d) blockDim:(%d, %d, %d) " "gridDim:(%d, %d, %d)\n\n", threadIdx.x, threadIdx.y, threadIdx.z, blockIdx.x, blockIdx.y, blockIdx.z, blockDim.x, blockDim.y, blockDim.z, gridDim.x,gridDim.y,gridDim.z);
}
__global__ void kernel_1D_2D()
{
printf("Local thread IDs: (%i,%i) Global thread ID: %i\n", threadIdx.x, threadIdx.y, getGlobalIdx_1D_2D());
printf("threadIdx:(%d, %d, %d) blockIdx:(%d, %d, %d) blockDim:(%d, %d, %d) " "gridDim:(%d, %d, %d)\n\n", threadIdx.x, threadIdx.y, threadIdx.z, blockIdx.x, blockIdx.y, blockIdx.z, blockDim.x, blockDim.y, blockDim.z, gridDim.x,gridDim.y,gridDim.z);
}

__global__ void kernel_1D_3D()
{
printf("Local thread IDs: (%i,%i,%i) Global thread ID: %i\n", threadIdx.x, threadIdx.y, threadIdx.z, getGlobalIdx_1D_3D());
printf("threadIdx:(%d, %d, %d) blockIdx:(%d, %d, %d) blockDim:(%d, %d, %d) " "gridDim:(%d, %d, %d)\n\n", threadIdx.x, threadIdx.y, threadIdx.z, blockIdx.x, blockIdx.y, blockIdx.z, blockDim.x, blockDim.y, blockDim.z, gridDim.x,gridDim.y,gridDim.z);
}

__global__ void kernel_2D_1D()
{
printf("Local thread ID: %i Global thread ID: %i\n", threadIdx.x, getGlobalIdx_2D_1D());
printf("threadIdx:(%d, %d, %d) blockIdx:(%d, %d, %d) blockDim:(%d, %d, %d) " "gridDim:(%d, %d, %d)\n\n", threadIdx.x, threadIdx.y, threadIdx.z, blockIdx.x, blockIdx.y, blockIdx.z, blockDim.x, blockDim.y, blockDim.z, gridDim.x,gridDim.y,gridDim.z);
}

__global__ void kernel_2D_2D()
{
printf("Local thread IDs: (%i,%i) Global thread ID: %i\n", threadIdx.x, threadIdx.y, getGlobalIdx_2D_2D());
printf("threadIdx:(%d, %d, %d) blockIdx:(%d, %d, %d) blockDim:(%d, %d, %d) " "gridDim:(%d, %d, %d)\n\n", threadIdx.x, threadIdx.y, threadIdx.z, blockIdx.x, blockIdx.y, blockIdx.z, blockDim.x, blockDim.y, blockDim.z, gridDim.x,gridDim.y,gridDim.z);
}

__global__ void kernel_2D_3D()
{
printf("Local thread IDs: (%i,%i,%i) Global thread ID: %i\n", threadIdx.x, threadIdx.y, threadIdx.z, getGlobalIdx_2D_3D());
printf("threadIdx:(%d, %d, %d) blockIdx:(%d, %d, %d) blockDim:(%d, %d, %d) " "gridDim:(%d, %d, %d)\n\n", threadIdx.x, threadIdx.y, threadIdx.z, blockIdx.x, blockIdx.y, blockIdx.z, blockDim.x, blockDim.y, blockDim.z, gridDim.x,gridDim.y,gridDim.z);
}

__global__ void kernel_3D_1D()
{
printf("Local thread ID: %i Global thread ID: %i\n", threadIdx.x, getGlobalIdx_3D_1D());
printf("threadIdx:(%d, %d, %d) blockIdx:(%d, %d, %d) blockDim:(%d, %d, %d) " "gridDim:(%d, %d, %d)\n\n", threadIdx.x, threadIdx.y, threadIdx.z, blockIdx.x, blockIdx.y, blockIdx.z, blockDim.x, blockDim.y, blockDim.z, gridDim.x,gridDim.y,gridDim.z);
}

__global__ void kernel_3D_2D()
{
printf("Local thread IDs: (%i,%i) Global thread ID: %i\n", threadIdx.x, threadIdx.y, getGlobalIdx_3D_2D());
printf("threadIdx:(%d, %d, %d) blockIdx:(%d, %d, %d) blockDim:(%d, %d, %d) " "gridDim:(%d, %d, %d)\n\n", threadIdx.x, threadIdx.y, threadIdx.z, blockIdx.x, blockIdx.y, blockIdx.z, blockDim.x, blockDim.y, blockDim.z, gridDim.x,gridDim.y,gridDim.z);
}

__global__ void kernel_3D_3D()
{
printf("Local thread IDs: (%i,%i,%i) Global thread ID: %i\n", threadIdx.x, threadIdx.y, threadIdx.z, getGlobalIdx_3D_3D());
printf("threadIdx:(%d, %d, %d) blockIdx:(%d, %d, %d) blockDim:(%d, %d, %d) " "gridDim:(%d, %d, %d)\n\n", threadIdx.x, threadIdx.y, threadIdx.z, blockIdx.x, blockIdx.y, blockIdx.z, blockDim.x, blockDim.y, blockDim.z, gridDim.x,gridDim.y,gridDim.z);
}

int main()
{
printf("\nLaunching kernel as 1D grid of 1D blocks...\n");
kernel_1D_1D<<<dim3(2,1,1), dim3(2,1,1)>>>();
cudaDeviceReset();

printf("\nLaunching kernel as 1D grid of 2D blocks...\n");
kernel_1D_2D<<<dim3(2,1,1), dim3(2,2,1)>>>();
cudaDeviceReset();

printf("\nLaunching kernel as 1D grid of 3D blocks...\n");
kernel_1D_3D<<<dim3(2,1,1), dim3(2,2,2)>>>();
cudaDeviceReset();
printf("\nLaunching kernel as 2D grid of 1D blocks...\n");
kernel_2D_1D<<<dim3(2,2,1), dim3(2,1,1)>>>();
cudaDeviceReset();

printf("\nLaunching kernel as 2D grid of 2D blocks...\n");
kernel_2D_2D<<<dim3(2,2,1), dim3(2,2,1)>>>();
cudaDeviceReset();

printf("\nLaunching kernel as 2D grid of 3D blocks...\n");
kernel_2D_3D<<<dim3(2,2,1), dim3(2,2,2)>>>();
cudaDeviceReset();
printf("\nLaunching kernel as 3D grid of 1D blocks...\n");
kernel_3D_1D<<<dim3(2,2,2), dim3(2,1,1)>>>();
cudaDeviceReset();

printf("\nLaunching kernel as 3D grid of 2D blocks...\n");
kernel_3D_2D<<<dim3(2,2,2), dim3(2,2,1)>>>();
cudaDeviceReset();

printf("\nLaunching kernel as 3D grid of 3D blocks...\n");
kernel_3D_3D<<<dim3(2,2,2), dim3(2,2,2)>>>();
cudaDeviceReset();
getch();
return 0;
}

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

 
Extracts from a Personal Diary

dedicated to the life of a silent girl who eventually learnt to open up

Num3ri v 2.0

I miei numeri - seconda versione

ThuyDX

Just another WordPress.com site

Algunos Intereses de Abraham Zamudio Chauca

Matematica, Linux , Programacion Serial , Programacion Paralela (CPU - GPU) , Cluster de Computadores , Software Cientifico

josephdung

thoughts...

Tech_Raj

A great WordPress.com site

Travel tips

Travel tips

Experience the real life.....!!!

Shurwaat achi honi chahiye ...

Ronzii's Blog

Just your average geek's blog

Karan Jitendra Thakkar

Everything I think. Everything I do. Right here.

VentureBeat

News About Tech, Money and Innovation

Chetan Solanki

Helpful to u, if u need it.....

ScreenCrush

Explorer of Research #HEMBAD

managedCUDA

Explorer of Research #HEMBAD

siddheshsathe

A great WordPress.com site

Ari's

This is My Space so Dont Mess With IT !!

%d bloggers like this: