Skip to content

Instantly share code, notes, and snippets.

@yuikns
Forked from allanmac/assert_cuda.c
Created November 5, 2016 16:19
Show Gist options
  • Select an option

  • Save yuikns/d65862ab846a8e13aa28a8049110e40b to your computer and use it in GitHub Desktop.

Select an option

Save yuikns/d65862ab846a8e13aa28a8049110e40b to your computer and use it in GitHub Desktop.
A tiny example of CUDA + OpenGL interop with write-only surfaces and CUDA kernels. Uses GLFW+GLAD.
//
//
//
#include <stdlib.h>
//
//
//
#include "interop.h"
//
//
//
struct pxl_interop
{
// w x h
int width;
int height;
// GL buffers
GLuint fb0;
GLuint rb0;
// CUDA resources
cudaGraphicsResource_t cgr0;
};
//
//
//
struct pxl_interop*
pxl_interop_create(GLFWwindow* window)
{
struct pxl_interop* const interop = (struct pxl_interop*)malloc(sizeof(struct pxl_interop));
// init cuda graphics resource
interop->cgr0 = NULL;
// render buffer object w/a color buffer
glGenRenderbuffers(1,&interop->rb0);
// frame buffer object
glGenFramebuffers(1,&interop->fb0);
// return it
return interop;
}
void
pxl_interop_destroy(struct pxl_interop* const interop)
{
free(interop);
}
//
//
//
cudaError_t
pxl_interop_resize(struct pxl_interop* const interop, const int width, const int height)
{
// save new size
interop->width = width;
interop->height = height;
//
// RESIZE FBO'S COLOR BUFFER
//
// bind rbo
glBindRenderbuffer (GL_RENDERBUFFER,interop->rb0);
// resize color buffer
glRenderbufferStorage(GL_RENDERBUFFER,GL_RGBA8,width,height);
// bind fbo to read
glBindFramebuffer (GL_FRAMEBUFFER,interop->fb0);
// attach rb0 to the fb0
glFramebufferRenderbuffer(GL_FRAMEBUFFER,GL_COLOR_ATTACHMENT0,GL_RENDERBUFFER,interop->rb0);
// unbind rbo
glBindRenderbuffer(GL_RENDERBUFFER,0);
// bind to default fb
glBindFramebuffer(GL_FRAMEBUFFER,0);
//
// REGISTER RBO WITH CUDA
//
cudaError_t cuda_err;
// unregister
if (interop->cgr0 != NULL)
cuda_err = cudaGraphicsUnregisterResource(interop->cgr0);
// register image
cuda_err = cudaGraphicsGLRegisterImage(&interop->cgr0,
interop->rb0,
GL_RENDERBUFFER,
cudaGraphicsRegisterFlagsSurfaceLoadStore | cudaGraphicsRegisterFlagsWriteDiscard);
// diddle some flags
cuda_err = cudaGraphicsResourceSetMapFlags(interop->cgr0,cudaGraphicsMapFlagsWriteDiscard);
return cuda_err;
}
void
pxl_interop_get_size(struct pxl_interop* const interop, int* const width, int* const height)
{
*width = interop->width;
*height = interop->height;
}
//
//
//
cudaError_t
pxl_interop_map(struct pxl_interop* const interop, cudaArray_t* cuda_array, cudaStream_t stream)
{
cudaError_t cuda_err;
// map graphics resources
cuda_err = cudaGraphicsMapResources(1,&interop->cgr0,stream);
// get a CUDA Array
cuda_err = cudaGraphicsSubResourceGetMappedArray(cuda_array,interop->cgr0,0,0);
return cuda_err;
}
cudaError_t
pxl_interop_unmap(struct pxl_interop* const interop, cudaStream_t stream)
{
cudaError_t cuda_err;
cuda_err = cudaGraphicsUnmapResources(1,&interop->cgr0,stream);
return cuda_err;
}
//
//
//
void
pxl_interop_clear(struct pxl_interop* const interop)
{
/*
const GLenum draw_buffer[] = { GL_COLOR_ATTACHMENT0 };
const GLuint clear_color[] = { 255, 0, 0, 255 };
glNamedFramebufferDrawBuffers(interop->fb0,1,draw_buffer);
glClearNamedFramebufferuiv(interop->fb0,GL_COLOR,0,clear_color);
*/
const GLenum attachments[] = { GL_COLOR_ATTACHMENT0 };
glInvalidateNamedFramebufferData(interop->fb0,1,attachments);
}
//
//
//
void
pxl_interop_blit(struct pxl_interop* const interop)
{
/*
glBlitFramebuffer(0,0, interop->width,interop->height,
0,interop->height,interop->width,0,
GL_COLOR_BUFFER_BIT,
GL_NEAREST);
*/
glBlitNamedFramebuffer(interop->fb0,0,
0,0, interop->width,interop->height,
0,interop->height,interop->width,0,
GL_COLOR_BUFFER_BIT,
GL_NEAREST);
}
//
//
//
/*
* Copyright 2015 Allan MacKinnon. All rights reserved.
*
*/
#pragma once
//
//
//
#include <glad/glad.h>
#include <GLFW/glfw3.h>
//
//
//
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
//
//
//
struct pxl_interop*
pxl_interop_create();
void
pxl_interop_destroy(struct pxl_interop* const interop);
//
//
//
cudaError_t
pxl_interop_resize(struct pxl_interop* const interop, const int width, const int height);
void
pxl_interop_get_size(struct pxl_interop* const interop, int* const width, int* const height);
//
//
//
cudaError_t
pxl_interop_map(struct pxl_interop* const interop, cudaArray_t* cuda_array, cudaStream_t stream);
cudaError_t
pxl_interop_unmap(struct pxl_interop* const interop, cudaStream_t stream);
//
//
//
void
pxl_interop_clear(struct pxl_interop* const interop);
void
pxl_interop_blit(struct pxl_interop* const interop);
//
//
//
// -*- compile-command: "nvcc -m 32 -arch sm_30 -Xptxas=-v -cubin kernel.cu"; -*-
//
//
//
surface<void,cudaSurfaceType2D> surf;
//
//
//
union pxl_rgbx_24
{
uint1 b32;
struct {
unsigned r : 8;
unsigned g : 8;
unsigned b : 8;
unsigned na : 8;
};
};
//
//
//
extern "C"
__global__
void
pxl_kernel(const int width)
{
// pixel coordinates
const int idx = (blockDim.x * blockIdx.x) + threadIdx.x;
const int x = idx % width;
const int y = idx / width;
// pixel color
const int t = (unsigned int)clock() / 1250000; // 1.25 GHz
const int xt = (idx + t) % width;
const unsigned int ramp = (unsigned int)(((float)xt / (float)(width-1)) * 255.0f + 0.5f);
const unsigned int bar = ((y + t) / 32) & 3;
union pxl_rgbx_24 rgbx;
rgbx.r = (bar == 0) || (bar == 1) ? ramp : 0;
rgbx.g = (bar == 0) || (bar == 2) ? ramp : 0;
rgbx.b = (bar == 0) || (bar == 3) ? ramp : 0;
rgbx.na = 255;
// cudaBoundaryModeZero squelches out-of-bound writes
surf2Dwrite(rgbx.b32, // even simpler: (unsigned int)clock()
surf,
x*sizeof(rgbx),
y,
cudaBoundaryModeZero);
}
//
//
//
#define PXL_KERNEL_THREADS_PER_BLOCK 256 // enough for 4Kx2 monitor
extern "C"
cudaError_t
pxl_kernel_launcher(cudaArray_const_t array,
const int width,
const int height,
cudaStream_t stream)
{
cudaError_t cuda_err = cudaBindSurfaceToArray(surf,array);
if (cuda_err)
return cuda_err;
const int blocks = (width * height + PXL_KERNEL_THREADS_PER_BLOCK - 1) / PXL_KERNEL_THREADS_PER_BLOCK;
if (blocks > 0)
pxl_kernel<<<blocks,PXL_KERNEL_THREADS_PER_BLOCK,0,stream>>>(width);
return cudaSuccess;
}
//
//
//
//
//
//
#include <glad/glad.h>
#include <GLFW/glfw3.h>
//
//
//
#include <stdlib.h>
#include <stdio.h>
//
//
//
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
//
//
//
#include "interop.h"
//
// FPS COUNTER FROM HERE:
//
// http://antongerdelan.net/opengl/glcontext2.html
//
static
void
pxl_glfw_fps(GLFWwindow* window)
{
// static fps counters
static double stamp_prev = 0.0;
static int frame_count = 0;
// locals
const double stamp_curr = glfwGetTime();
const double elapsed = stamp_curr - stamp_prev;
if (elapsed > 0.5)
{
stamp_prev = stamp_curr;
const double fps = (double)frame_count / elapsed;
int width, height;
char tmp[64];
glfwGetFramebufferSize(window,&width,&height);
sprintf_s(tmp,64,"(%u x %u) - FPS: %.2f",width,height,fps);
glfwSetWindowTitle(window,tmp);
frame_count = 0;
}
frame_count++;
}
//
//
//
static
void
pxl_glfw_error_callback(int error, const char* description)
{
fputs(description,stderr);
}
static
void
pxl_glfw_key_callback(GLFWwindow* window, int key, int scancode, int action, int mods)
{
if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS)
glfwSetWindowShouldClose(window, GL_TRUE);
}
static
void
pxl_glfw_init(GLFWwindow** window, const int width, const int height)
{
//
// INITIALIZE GLFW/GLAD
//
glfwSetErrorCallback(pxl_glfw_error_callback);
if (!glfwInit())
exit(EXIT_FAILURE);
const GLFWvidmode* mode = glfwGetVideoMode(glfwGetPrimaryMonitor());
glfwWindowHint(GLFW_DEPTH_BITS, 0);
glfwWindowHint(GLFW_STENCIL_BITS, 0);
glfwWindowHint(GLFW_SRGB_CAPABLE, GL_TRUE);
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 5);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
*window = glfwCreateWindow(width,height,"GLFW / CUDA Interop",NULL,NULL);
if (*window == NULL)
{
glfwTerminate();
exit(EXIT_FAILURE);
}
glfwMakeContextCurrent(*window);
// set up GLAD
gladLoadGLLoader((GLADloadproc)glfwGetProcAddress);
// ignore vsync for now
glfwSwapInterval(0);
// enable SRGB
// glEnable(GL_FRAMEBUFFER_SRGB);
// only copy r/g/b
// glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_FALSE);
}
//
//
//
static
void
pxl_glfw_window_size_callback(GLFWwindow* window, int width, int height)
{
// get context
struct pxl_interop* const interop = glfwGetWindowUserPointer(window);
pxl_interop_resize(interop,width,height);
}
//
//
//
cudaError_t
pxl_kernel_launcher(cudaArray_const_t array, const int width, const int height, cudaStream_t stream);
//
//
//
int
main(int argc, char* argv[])
{
//
// INIT GLFW
//
GLFWwindow* window;
pxl_glfw_init(&window,1024,1024);
//
// INIT CUDA
//
cudaError_t cuda_err;
int gl_device_id,gl_device_count;
cuda_err = cudaGLGetDevices(&gl_device_count,&gl_device_id,1,cudaGLDeviceListAll);
int cuda_device_id = (argc > 1) ? atoi(argv[1]) : gl_device_id;
cuda_err = cudaSetDevice(cuda_device_id);
//
// INFO
//
struct cudaDeviceProp props;
cuda_err = cudaGetDeviceProperties(&props,gl_device_id);
printf("GL : %-24s (%2d)\n",props.name,props.multiProcessorCount);
cuda_err = cudaGetDeviceProperties(&props,cuda_device_id);
printf("CUDA : %-24s (%2d)\n",props.name,props.multiProcessorCount);
//
// CREATE A CUDA STREAM
//
cudaStream_t stream;
cuda_err = cudaStreamCreate(&stream);
//
// CREATE AND SAVE INTEROP INSTANCE
//
struct pxl_interop* const interop = pxl_interop_create(window);
glfwSetWindowUserPointer(window,interop);
//
// SET CALLBACKS
//
glfwSetKeyCallback (window,pxl_glfw_key_callback);
glfwSetFramebufferSizeCallback(window,pxl_glfw_window_size_callback);
//
// GET ACTUAL WINDOW SIZE
//
int width, height;
// get initial width/height
glfwGetFramebufferSize(window,&width,&height);
// resize with initial window dimensions
cuda_err = pxl_interop_resize(interop,width,height);
//
// LOOP UNTIL DONE
//
while (!glfwWindowShouldClose(window))
{
//
// MONITOR FPS
//
pxl_glfw_fps(window);
//
// EXECUTE CUDA KERNEL ON RENDER BUFFER
//
int width,height;
cudaArray_t cuda_array;
pxl_interop_get_size(interop,&width,&height);
cuda_err = pxl_interop_map(interop,&cuda_array,stream);
cuda_err = pxl_kernel_launcher(cuda_array,width,height,stream);
cuda_err = pxl_interop_unmap(interop,stream);
cuda_err = cudaStreamSynchronize(stream);
//
// BLIT
//
pxl_interop_blit(interop);
//
// SWAP
//
glfwSwapBuffers(window);
//
// PUMP/POLL/WAIT
//
glfwPollEvents(); // glfwWaitEvents();
}
//
// CLEANUP
//
pxl_interop_destroy(interop);
glfwDestroyWindow(window);
glfwTerminate();
cudaDeviceReset();
// missing some clean up here
exit(EXIT_SUCCESS);
}
//
//
//
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment