#include #include #include #include #include #include #include #pragma comment(lib, "dxgi.lib") #pragma comment(lib, "d3d11.lib") #pragma comment(lib, "d3dcompiler.lib") #define MAKE_SHADER(x) #x // inputs float dataA[10] = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f }; float dataB[10] = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f }; // Compute shader const char computeShader[] = MAKE_SHADER( Buffer bufA : register(t0); Buffer bufB : register(t1); RWBuffer bufC : register(u0); [numthreads(1,1,1)] void CSMain(uint3 i : SV_DispatchThreadID) { bufC[i.x] = bufA[i.x] + bufB[i.x]; } ); // a mutex for synchronizing stream out std::mutex mut; // definition HRESULT CreateBuffer(ID3D11Device* device, UINT sz, void* pInitData, ID3D11Buffer** bufOut); HRESULT CreateBufferSrv(ID3D11Device * device, DXGI_FORMAT bufFormat, UINT szStride, ID3D11Buffer * buf, ID3D11ShaderResourceView ** srvOut); HRESULT CreateBufferUav(ID3D11Device * device, DXGI_FORMAT bufFormat, UINT szStride, ID3D11Buffer * buf, ID3D11UnorderedAccessView** uavOut); ID3D11Buffer* CopyBuffer(ID3D11Device* device, ID3D11DeviceContext* ctx, ID3D11Buffer* buf); HRESULT CompileShader(const char* shader, SIZE_T sz, ID3D10Blob** bytecode, ID3D10Blob** errMsg); int main() { HRESULT hr; IDXGIFactory* factory; IDXGIAdapter* adapter; ID3D11Device* dev0 = nullptr; ID3D11DeviceContext* ctx0 = nullptr; ID3D11Device* dev1 = nullptr; ID3D11DeviceContext* ctx1 = nullptr; std::vector adapters; static const D3D_FEATURE_LEVEL lvl[] = { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0, D3D_FEATURE_LEVEL_9_3, D3D_FEATURE_LEVEL_9_2, D3D_FEATURE_LEVEL_9_1 }; D3D_FEATURE_LEVEL supported; float result0[10] = { 0.0f }; float result1[10] = { 0.0f }; if (FAILED(hr = CreateDXGIFactory(__uuidof(factory), (void**)&factory))) { std::cout << "Failed to create DXGI factory" << std::endl; return hr; } // Enumerate all adapters and store it into std::vector for (int i = 0; factory->EnumAdapters(i, &adapter) != DXGI_ERROR_NOT_FOUND; i++) adapters.push_back(adapter); // Create d3d11 on each device if (FAILED(hr = D3D11CreateDevice( adapters[0], D3D_DRIVER_TYPE_UNKNOWN, nullptr, 0, lvl, 7, D3D11_SDK_VERSION, &dev0, &supported, &ctx0))) { std::cout << "Failed to create d3d device0" << std::endl; return hr; } if (FAILED(hr = D3D11CreateDevice( adapters[1], D3D_DRIVER_TYPE_UNKNOWN, nullptr, 0, lvl, 7, D3D11_SDK_VERSION, &dev1, &supported, &ctx1))) { std::cout << "Failed to create d3d device1" << std::endl; return hr; } // perform a simple GPGPU program that add ten numbers auto task = [&](ID3D11Device* dev, ID3D11DeviceContext* ctx, float* result, size_t sz, HRESULT* hr) { ID3D11Buffer* bufA; ID3D11Buffer* bufB; ID3D11Buffer* bufC; ID3D11ShaderResourceView* bufASrv; ID3D11ShaderResourceView* bufBSrv; ID3D11UnorderedAccessView* bufCUav; ID3D11ComputeShader* cs = nullptr; ID3D10Blob* bytecode; ID3D10Blob* errMsg; IDXGIDevice* dxgiDev; IDXGIAdapter* adapter; DXGI_ADAPTER_DESC adesc; // Get device information dev->QueryInterface(__uuidof(dxgiDev), (void**)&dxgiDev); dxgiDev->GetAdapter(&adapter); adapter->GetDesc(&adesc); mut.lock(); std::wcout << "Performing async task on device: " << adesc.Description << std::endl; mut.unlock(); // Create buffer and its view CreateBuffer(dev, sizeof(dataA), dataA, &bufA); CreateBuffer(dev, sizeof(dataB), dataB, &bufB); CreateBuffer(dev, sz, nullptr, &bufC); CreateBufferSrv(dev, DXGI_FORMAT_R32_FLOAT, 4, bufA, &bufASrv); CreateBufferSrv(dev, DXGI_FORMAT_R32_FLOAT, 4, bufB, &bufBSrv); CreateBufferUav(dev, DXGI_FORMAT_R32_FLOAT, 4, bufC, &bufCUav); // Compile & create shader if (FAILED(CompileShader(computeShader, sizeof(computeShader), &bytecode, &errMsg))) { std::cout << "Failed to compile compute shader" << std::endl; if (errMsg) { std::cout << (char*)errMsg->GetBufferPointer() << std::endl; errMsg->Release(); *hr = -1; return; } } dev->CreateComputeShader( bytecode->GetBufferPointer(), bytecode->GetBufferSize(), nullptr, &cs); bytecode->Release(); ID3D11ShaderResourceView* srvs[2] = { bufASrv, bufBSrv }; // Perform action on GPU ctx->CSSetShader(cs, nullptr, 0); ctx->CSSetShaderResources(0, 2, srvs); ctx->CSSetUnorderedAccessViews(0, 1, &bufCUav, nullptr); ctx->Dispatch(10, 1, 1); ctx->CSSetShader(nullptr, nullptr, 0); ctx->CSSetShaderResources(0, 0, nullptr); ctx->CSSetUnorderedAccessViews(0, 0, nullptr, nullptr); // Copy result from GPU ID3D11Buffer* cpy = CopyBuffer(dev, ctx, bufC); D3D11_MAPPED_SUBRESOURCE copyback; ctx->Map(cpy, 0, D3D11_MAP_READ, 0, ©back); memcpy_s(result, sz, copyback.pData, sz); ctx->Unmap(cpy, 0); // we dont need these again cpy->Release(); if (bufA) { bufA->Release(); if (bufASrv) bufASrv->Release(); } if (bufB) { bufB->Release(); if (bufBSrv) bufBSrv->Release(); } if (bufC) { bufC->Release(); if (bufCUav) bufCUav->Release(); } if (cs) cs->Release(); return; }; // perform async task std::thread gpu0(task, dev0, ctx0, (float*)result0, (size_t)sizeof(result0), &hr); std::thread gpu1(task, dev1, ctx1, (float*)result1, (size_t)sizeof(result1), &hr); // wait until the job done gpu0.join(); gpu1.join(); std::cout << "Done performing tasks" << std::endl; // print outputs std::cout << "Output from GPU0: { "; for (int i = 0; i < 10; i++) std::cout << result0[i] << ((i < 9) ? ", " : " }"); std::cout << std::endl; std::cout << "Output from GPU1: { "; for (int i = 0; i < 10; i++) std::cout << result1[i] << ((i < 9) ? ", " : " }"); std::cout << std::endl; std::cout << "Releasing shits..." << std::endl; // release shits if (dev0) dev0->Release(); if (ctx0) ctx0->Release(); if(dev1) dev1->Release(); if(ctx1) ctx1->Release(); return 0; } HRESULT CreateBuffer(ID3D11Device * device, UINT sz, void * pInitData, ID3D11Buffer ** bufOut) { D3D11_BUFFER_DESC bufDesc = { 0 }; bufDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_INDEX_BUFFER | D3D11_BIND_VERTEX_BUFFER; bufDesc.ByteWidth = sz; bufDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; if (pInitData) { D3D11_SUBRESOURCE_DATA init = { 0 }; init.pSysMem = pInitData; return device->CreateBuffer(&bufDesc, &init, bufOut); } else return device->CreateBuffer(&bufDesc, nullptr, bufOut); } HRESULT CreateBufferSrv(ID3D11Device * device, DXGI_FORMAT bufFormat, UINT szStride, ID3D11Buffer * buf, ID3D11ShaderResourceView ** srvOut) { D3D11_BUFFER_DESC bufDesc; buf->GetDesc(&bufDesc); D3D11_SHADER_RESOURCE_VIEW_DESC desc; ZeroMemory(&desc, sizeof(desc)); desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; desc.Buffer.FirstElement = 0; desc.Buffer.NumElements = bufDesc.ByteWidth / szStride; desc.Format = bufFormat; return device->CreateShaderResourceView(buf, &desc, srvOut); } HRESULT CreateBufferUav(ID3D11Device * device, DXGI_FORMAT bufFormat, UINT szStride, ID3D11Buffer * buf, ID3D11UnorderedAccessView ** uavOut) { D3D11_BUFFER_DESC bufDesc; buf->GetDesc(&bufDesc); D3D11_UNORDERED_ACCESS_VIEW_DESC desc; ZeroMemory(&desc, sizeof(desc)); desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; desc.Buffer.FirstElement = 0; desc.Buffer.NumElements = bufDesc.ByteWidth / szStride; desc.Format = bufFormat; return device->CreateUnorderedAccessView(buf, &desc, uavOut); } ID3D11Buffer* CopyBuffer(ID3D11Device * device, ID3D11DeviceContext * ctx, ID3D11Buffer * buf) { D3D11_BUFFER_DESC desc; ID3D11Buffer* ret = nullptr; buf->GetDesc(&desc); desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; desc.Usage = D3D11_USAGE_STAGING; desc.BindFlags = 0; desc.MiscFlags = 0; if (SUCCEEDED(device->CreateBuffer(&desc, nullptr, &ret))) ctx->CopyResource(ret, buf); return ret; } HRESULT CompileShader(const char * shader, SIZE_T sz, ID3D10Blob** bytecode, ID3D10Blob** errMsg) { HRESULT hr; if (FAILED(hr = D3DCompile(shader, sz, "ComputeShader", nullptr, nullptr, "CSMain", "cs_5_0", 0, 0, bytecode, errMsg))) return hr; return hr; }