-
-
Save yuikns/d65862ab846a8e13aa28a8049110e40b to your computer and use it in GitHub Desktop.
A tiny example of CUDA + OpenGL interop with write-only surfaces and CUDA kernels. Uses GLFW+GLAD.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // | |
| // | |
| // | |
| #include <stdlib.h> | |
| // | |
| // | |
| // | |
| #include "interop.h" | |
| // | |
| // | |
| // | |
| struct pxl_interop | |
| { | |
| // w x h | |
| int width; | |
| int height; | |
| // GL buffers | |
| GLuint fb0; | |
| GLuint rb0; | |
| // CUDA resources | |
| cudaGraphicsResource_t cgr0; | |
| }; | |
| // | |
| // | |
| // | |
| struct pxl_interop* | |
| pxl_interop_create(GLFWwindow* window) | |
| { | |
| struct pxl_interop* const interop = (struct pxl_interop*)malloc(sizeof(struct pxl_interop)); | |
| // init cuda graphics resource | |
| interop->cgr0 = NULL; | |
| // render buffer object w/a color buffer | |
| glGenRenderbuffers(1,&interop->rb0); | |
| // frame buffer object | |
| glGenFramebuffers(1,&interop->fb0); | |
| // return it | |
| return interop; | |
| } | |
| void | |
| pxl_interop_destroy(struct pxl_interop* const interop) | |
| { | |
| free(interop); | |
| } | |
| // | |
| // | |
| // | |
| cudaError_t | |
| pxl_interop_resize(struct pxl_interop* const interop, const int width, const int height) | |
| { | |
| // save new size | |
| interop->width = width; | |
| interop->height = height; | |
| // | |
| // RESIZE FBO'S COLOR BUFFER | |
| // | |
| // bind rbo | |
| glBindRenderbuffer (GL_RENDERBUFFER,interop->rb0); | |
| // resize color buffer | |
| glRenderbufferStorage(GL_RENDERBUFFER,GL_RGBA8,width,height); | |
| // bind fbo to read | |
| glBindFramebuffer (GL_FRAMEBUFFER,interop->fb0); | |
| // attach rb0 to the fb0 | |
| glFramebufferRenderbuffer(GL_FRAMEBUFFER,GL_COLOR_ATTACHMENT0,GL_RENDERBUFFER,interop->rb0); | |
| // unbind rbo | |
| glBindRenderbuffer(GL_RENDERBUFFER,0); | |
| // bind to default fb | |
| glBindFramebuffer(GL_FRAMEBUFFER,0); | |
| // | |
| // REGISTER RBO WITH CUDA | |
| // | |
| cudaError_t cuda_err; | |
| // unregister | |
| if (interop->cgr0 != NULL) | |
| cuda_err = cudaGraphicsUnregisterResource(interop->cgr0); | |
| // register image | |
| cuda_err = cudaGraphicsGLRegisterImage(&interop->cgr0, | |
| interop->rb0, | |
| GL_RENDERBUFFER, | |
| cudaGraphicsRegisterFlagsSurfaceLoadStore | cudaGraphicsRegisterFlagsWriteDiscard); | |
| // diddle some flags | |
| cuda_err = cudaGraphicsResourceSetMapFlags(interop->cgr0,cudaGraphicsMapFlagsWriteDiscard); | |
| return cuda_err; | |
| } | |
| void | |
| pxl_interop_get_size(struct pxl_interop* const interop, int* const width, int* const height) | |
| { | |
| *width = interop->width; | |
| *height = interop->height; | |
| } | |
| // | |
| // | |
| // | |
| cudaError_t | |
| pxl_interop_map(struct pxl_interop* const interop, cudaArray_t* cuda_array, cudaStream_t stream) | |
| { | |
| cudaError_t cuda_err; | |
| // map graphics resources | |
| cuda_err = cudaGraphicsMapResources(1,&interop->cgr0,stream); | |
| // get a CUDA Array | |
| cuda_err = cudaGraphicsSubResourceGetMappedArray(cuda_array,interop->cgr0,0,0); | |
| return cuda_err; | |
| } | |
| cudaError_t | |
| pxl_interop_unmap(struct pxl_interop* const interop, cudaStream_t stream) | |
| { | |
| cudaError_t cuda_err; | |
| cuda_err = cudaGraphicsUnmapResources(1,&interop->cgr0,stream); | |
| return cuda_err; | |
| } | |
| // | |
| // | |
| // | |
| void | |
| pxl_interop_clear(struct pxl_interop* const interop) | |
| { | |
| /* | |
| const GLenum draw_buffer[] = { GL_COLOR_ATTACHMENT0 }; | |
| const GLuint clear_color[] = { 255, 0, 0, 255 }; | |
| glNamedFramebufferDrawBuffers(interop->fb0,1,draw_buffer); | |
| glClearNamedFramebufferuiv(interop->fb0,GL_COLOR,0,clear_color); | |
| */ | |
| const GLenum attachments[] = { GL_COLOR_ATTACHMENT0 }; | |
| glInvalidateNamedFramebufferData(interop->fb0,1,attachments); | |
| } | |
| // | |
| // | |
| // | |
| void | |
| pxl_interop_blit(struct pxl_interop* const interop) | |
| { | |
| /* | |
| glBlitFramebuffer(0,0, interop->width,interop->height, | |
| 0,interop->height,interop->width,0, | |
| GL_COLOR_BUFFER_BIT, | |
| GL_NEAREST); | |
| */ | |
| glBlitNamedFramebuffer(interop->fb0,0, | |
| 0,0, interop->width,interop->height, | |
| 0,interop->height,interop->width,0, | |
| GL_COLOR_BUFFER_BIT, | |
| GL_NEAREST); | |
| } | |
| // | |
| // | |
| // |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| * Copyright 2015 Allan MacKinnon. All rights reserved. | |
| * | |
| */ | |
| #pragma once | |
| // | |
| // | |
| // | |
| #include <glad/glad.h> | |
| #include <GLFW/glfw3.h> | |
| // | |
| // | |
| // | |
| #include <cuda_runtime.h> | |
| #include <cuda_gl_interop.h> | |
| // | |
| // | |
| // | |
| struct pxl_interop* | |
| pxl_interop_create(); | |
| void | |
| pxl_interop_destroy(struct pxl_interop* const interop); | |
| // | |
| // | |
| // | |
| cudaError_t | |
| pxl_interop_resize(struct pxl_interop* const interop, const int width, const int height); | |
| void | |
| pxl_interop_get_size(struct pxl_interop* const interop, int* const width, int* const height); | |
| // | |
| // | |
| // | |
| cudaError_t | |
| pxl_interop_map(struct pxl_interop* const interop, cudaArray_t* cuda_array, cudaStream_t stream); | |
| cudaError_t | |
| pxl_interop_unmap(struct pxl_interop* const interop, cudaStream_t stream); | |
| // | |
| // | |
| // | |
| void | |
| pxl_interop_clear(struct pxl_interop* const interop); | |
| void | |
| pxl_interop_blit(struct pxl_interop* const interop); | |
| // | |
| // | |
| // |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // -*- compile-command: "nvcc -m 32 -arch sm_30 -Xptxas=-v -cubin kernel.cu"; -*- | |
| // | |
| // | |
| // | |
| surface<void,cudaSurfaceType2D> surf; | |
| // | |
| // | |
| // | |
| union pxl_rgbx_24 | |
| { | |
| uint1 b32; | |
| struct { | |
| unsigned r : 8; | |
| unsigned g : 8; | |
| unsigned b : 8; | |
| unsigned na : 8; | |
| }; | |
| }; | |
| // | |
| // | |
| // | |
| extern "C" | |
| __global__ | |
| void | |
| pxl_kernel(const int width) | |
| { | |
| // pixel coordinates | |
| const int idx = (blockDim.x * blockIdx.x) + threadIdx.x; | |
| const int x = idx % width; | |
| const int y = idx / width; | |
| // pixel color | |
| const int t = (unsigned int)clock() / 1250000; // 1.25 GHz | |
| const int xt = (idx + t) % width; | |
| const unsigned int ramp = (unsigned int)(((float)xt / (float)(width-1)) * 255.0f + 0.5f); | |
| const unsigned int bar = ((y + t) / 32) & 3; | |
| union pxl_rgbx_24 rgbx; | |
| rgbx.r = (bar == 0) || (bar == 1) ? ramp : 0; | |
| rgbx.g = (bar == 0) || (bar == 2) ? ramp : 0; | |
| rgbx.b = (bar == 0) || (bar == 3) ? ramp : 0; | |
| rgbx.na = 255; | |
| // cudaBoundaryModeZero squelches out-of-bound writes | |
| surf2Dwrite(rgbx.b32, // even simpler: (unsigned int)clock() | |
| surf, | |
| x*sizeof(rgbx), | |
| y, | |
| cudaBoundaryModeZero); | |
| } | |
| // | |
| // | |
| // | |
| #define PXL_KERNEL_THREADS_PER_BLOCK 256 // enough for 4Kx2 monitor | |
| extern "C" | |
| cudaError_t | |
| pxl_kernel_launcher(cudaArray_const_t array, | |
| const int width, | |
| const int height, | |
| cudaStream_t stream) | |
| { | |
| cudaError_t cuda_err = cudaBindSurfaceToArray(surf,array); | |
| if (cuda_err) | |
| return cuda_err; | |
| const int blocks = (width * height + PXL_KERNEL_THREADS_PER_BLOCK - 1) / PXL_KERNEL_THREADS_PER_BLOCK; | |
| if (blocks > 0) | |
| pxl_kernel<<<blocks,PXL_KERNEL_THREADS_PER_BLOCK,0,stream>>>(width); | |
| return cudaSuccess; | |
| } | |
| // | |
| // | |
| // |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // | |
| // | |
| // | |
| #include <glad/glad.h> | |
| #include <GLFW/glfw3.h> | |
| // | |
| // | |
| // | |
| #include <stdlib.h> | |
| #include <stdio.h> | |
| // | |
| // | |
| // | |
| #include <cuda_runtime.h> | |
| #include <cuda_gl_interop.h> | |
| // | |
| // | |
| // | |
| #include "interop.h" | |
| // | |
| // FPS COUNTER FROM HERE: | |
| // | |
| // http://antongerdelan.net/opengl/glcontext2.html | |
| // | |
| static | |
| void | |
| pxl_glfw_fps(GLFWwindow* window) | |
| { | |
| // static fps counters | |
| static double stamp_prev = 0.0; | |
| static int frame_count = 0; | |
| // locals | |
| const double stamp_curr = glfwGetTime(); | |
| const double elapsed = stamp_curr - stamp_prev; | |
| if (elapsed > 0.5) | |
| { | |
| stamp_prev = stamp_curr; | |
| const double fps = (double)frame_count / elapsed; | |
| int width, height; | |
| char tmp[64]; | |
| glfwGetFramebufferSize(window,&width,&height); | |
| sprintf_s(tmp,64,"(%u x %u) - FPS: %.2f",width,height,fps); | |
| glfwSetWindowTitle(window,tmp); | |
| frame_count = 0; | |
| } | |
| frame_count++; | |
| } | |
| // | |
| // | |
| // | |
| static | |
| void | |
| pxl_glfw_error_callback(int error, const char* description) | |
| { | |
| fputs(description,stderr); | |
| } | |
| static | |
| void | |
| pxl_glfw_key_callback(GLFWwindow* window, int key, int scancode, int action, int mods) | |
| { | |
| if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS) | |
| glfwSetWindowShouldClose(window, GL_TRUE); | |
| } | |
| static | |
| void | |
| pxl_glfw_init(GLFWwindow** window, const int width, const int height) | |
| { | |
| // | |
| // INITIALIZE GLFW/GLAD | |
| // | |
| glfwSetErrorCallback(pxl_glfw_error_callback); | |
| if (!glfwInit()) | |
| exit(EXIT_FAILURE); | |
| const GLFWvidmode* mode = glfwGetVideoMode(glfwGetPrimaryMonitor()); | |
| glfwWindowHint(GLFW_DEPTH_BITS, 0); | |
| glfwWindowHint(GLFW_STENCIL_BITS, 0); | |
| glfwWindowHint(GLFW_SRGB_CAPABLE, GL_TRUE); | |
| glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4); | |
| glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 5); | |
| glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); | |
| *window = glfwCreateWindow(width,height,"GLFW / CUDA Interop",NULL,NULL); | |
| if (*window == NULL) | |
| { | |
| glfwTerminate(); | |
| exit(EXIT_FAILURE); | |
| } | |
| glfwMakeContextCurrent(*window); | |
| // set up GLAD | |
| gladLoadGLLoader((GLADloadproc)glfwGetProcAddress); | |
| // ignore vsync for now | |
| glfwSwapInterval(0); | |
| // enable SRGB | |
| // glEnable(GL_FRAMEBUFFER_SRGB); | |
| // only copy r/g/b | |
| // glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_FALSE); | |
| } | |
| // | |
| // | |
| // | |
| static | |
| void | |
| pxl_glfw_window_size_callback(GLFWwindow* window, int width, int height) | |
| { | |
| // get context | |
| struct pxl_interop* const interop = glfwGetWindowUserPointer(window); | |
| pxl_interop_resize(interop,width,height); | |
| } | |
| // | |
| // | |
| // | |
| cudaError_t | |
| pxl_kernel_launcher(cudaArray_const_t array, const int width, const int height, cudaStream_t stream); | |
| // | |
| // | |
| // | |
| int | |
| main(int argc, char* argv[]) | |
| { | |
| // | |
| // INIT GLFW | |
| // | |
| GLFWwindow* window; | |
| pxl_glfw_init(&window,1024,1024); | |
| // | |
| // INIT CUDA | |
| // | |
| cudaError_t cuda_err; | |
| int gl_device_id,gl_device_count; | |
| cuda_err = cudaGLGetDevices(&gl_device_count,&gl_device_id,1,cudaGLDeviceListAll); | |
| int cuda_device_id = (argc > 1) ? atoi(argv[1]) : gl_device_id; | |
| cuda_err = cudaSetDevice(cuda_device_id); | |
| // | |
| // INFO | |
| // | |
| struct cudaDeviceProp props; | |
| cuda_err = cudaGetDeviceProperties(&props,gl_device_id); | |
| printf("GL : %-24s (%2d)\n",props.name,props.multiProcessorCount); | |
| cuda_err = cudaGetDeviceProperties(&props,cuda_device_id); | |
| printf("CUDA : %-24s (%2d)\n",props.name,props.multiProcessorCount); | |
| // | |
| // CREATE A CUDA STREAM | |
| // | |
| cudaStream_t stream; | |
| cuda_err = cudaStreamCreate(&stream); | |
| // | |
| // CREATE AND SAVE INTEROP INSTANCE | |
| // | |
| struct pxl_interop* const interop = pxl_interop_create(window); | |
| glfwSetWindowUserPointer(window,interop); | |
| // | |
| // SET CALLBACKS | |
| // | |
| glfwSetKeyCallback (window,pxl_glfw_key_callback); | |
| glfwSetFramebufferSizeCallback(window,pxl_glfw_window_size_callback); | |
| // | |
| // GET ACTUAL WINDOW SIZE | |
| // | |
| int width, height; | |
| // get initial width/height | |
| glfwGetFramebufferSize(window,&width,&height); | |
| // resize with initial window dimensions | |
| cuda_err = pxl_interop_resize(interop,width,height); | |
| // | |
| // LOOP UNTIL DONE | |
| // | |
| while (!glfwWindowShouldClose(window)) | |
| { | |
| // | |
| // MONITOR FPS | |
| // | |
| pxl_glfw_fps(window); | |
| // | |
| // EXECUTE CUDA KERNEL ON RENDER BUFFER | |
| // | |
| int width,height; | |
| cudaArray_t cuda_array; | |
| pxl_interop_get_size(interop,&width,&height); | |
| cuda_err = pxl_interop_map(interop,&cuda_array,stream); | |
| cuda_err = pxl_kernel_launcher(cuda_array,width,height,stream); | |
| cuda_err = pxl_interop_unmap(interop,stream); | |
| cuda_err = cudaStreamSynchronize(stream); | |
| // | |
| // BLIT | |
| // | |
| pxl_interop_blit(interop); | |
| // | |
| // SWAP | |
| // | |
| glfwSwapBuffers(window); | |
| // | |
| // PUMP/POLL/WAIT | |
| // | |
| glfwPollEvents(); // glfwWaitEvents(); | |
| } | |
| // | |
| // CLEANUP | |
| // | |
| pxl_interop_destroy(interop); | |
| glfwDestroyWindow(window); | |
| glfwTerminate(); | |
| cudaDeviceReset(); | |
| // missing some clean up here | |
| exit(EXIT_SUCCESS); | |
| } | |
| // | |
| // | |
| // |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment