Last active
          July 12, 2024 07:39 
        
      - 
      
 - 
        
Save snowzurfer/1e90678d0d23d3295dda9a0cc93b2453 to your computer and use it in GitHub Desktop.  
    3D world points from ARKit depth
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import ARKit | |
| import SceneKit | |
| let horizontalPoints = 256 / 2 | |
| let verticalPoints = 192 / 2 | |
| var depthNodes = [SCNNode]() | |
| var parentDebugNodes = SCNNode() | |
| var sceneView: ARSCNView! | |
| // Somewhere during setup | |
| func setup() { | |
| let configuration = ARWorldTrackingConfiguration() | |
| configuration.frameSemantics = .smoothedSceneDepth | |
| sceneView.session.run(configuration) | |
| sceneView.scene.rootNode.addChildNode(parentDebugNodes) | |
| let sizeGeomPredictions = 0.005 | |
| let geom = SCNBox(width: sizeGeomPredictions, height: sizeGeomPredictions, length: sizeGeomPredictions, chamferRadius: 0) | |
| geom.firstMaterial?.diffuse.contents = UIColor.green | |
| for _ in 0..<(horizontalPoints * verticalPoints) { | |
| let node = SCNNode(geometry: geom) | |
| self.parentDebugNodes.addChildNode(node) | |
| self.depthNodes.append(node) | |
| } | |
| } | |
| func session(_ session: ARSession, didUpdate frame: ARFrame) { | |
| guard let smoothedDepth = frame.smoothedSceneDepth?.depthMap else { | |
| return | |
| } | |
| let capturedImage = frame.capturedImage | |
| let lockFlags = CVPixelBufferLockFlags.readOnly | |
| CVPixelBufferLockBaseAddress(smoothedDepth, lockFlags) | |
| defer { | |
| CVPixelBufferUnlockBaseAddress(smoothedDepth, lockFlags) | |
| } | |
| let baseAddress = CVPixelBufferGetBaseAddressOfPlane(smoothedDepth, 0)! | |
| let depthByteBuffer = baseAddress.assumingMemoryBound(to: Float32.self) | |
| // The `.size` accessor simply read the CVPixelBuffer's width and height in pixels. | |
| // | |
| // They are the same ratio: | |
| // 1920 x 1440 = 1440 x 1920 = 0.75 | |
| let depthMapSize = smoothedDepth.size | |
| // 192 x 256 = 0.75 | |
| let capturedImageSize = capturedImage.size | |
| var cameraIntrinsics = frame.camera.intrinsics | |
| let depthResolution = simd_float2(x: Float(depthMapSize.x), y: Float(depthMapSize.y)) | |
| let scaleRes = simd_float2(x: Float(capturedImageSize.x) / depthResolution.x, | |
| y: Float(capturedImageSize.y) / depthResolution.y ) | |
| // Make the camera intrinsics be with respect to Depth. | |
| cameraIntrinsics[0][0] /= scaleRes.x | |
| cameraIntrinsics[1][1] /= scaleRes.y | |
| cameraIntrinsics[2][0] /= scaleRes.x | |
| cameraIntrinsics[2][1] /= scaleRes.y | |
| // This will be the long size, because of the rotation | |
| let horizontalStep = Float(depthMapSize.x) / Float(self.horizontalPoints) | |
| let halfHorizontalStep = horizontalStep / 2 | |
| // This will be the short size, because of the rotation | |
| let verticalStep = Float(depthMapSize.y) / Float(self.verticalPoints) | |
| let halfVerticalStep = verticalStep / 2 | |
| for h in 0..<horizontalPoints { | |
| for v in 0..<verticalPoints { | |
| let x = Float(h) * horizontalStep + halfHorizontalStep | |
| let y = Float(v) * verticalStep + halfVerticalStep | |
| let depthMapPoint = simd_float2(x, y) | |
| // Sample depth | |
| let metricDepth = sampleDepthRaw(depthByteBuffer, size: depthMapSize, at: .init(depthMapPoint)) | |
| let wp = worldPoint(depthMapPixelPoint: depthMapPoint, | |
| depth: metricDepth, | |
| cameraIntrinsics: cameraIntrinsics, | |
| // This is crucial: you need to always use the view matrix for Landscape Right. | |
| viewMatrixInverted: frame.camera.viewMatrix(for: .landscapeRight).inverse) | |
| let node = self.depthNodes[v * horizontalPoints + h] | |
| node.simdWorldPosition = wp | |
| } | |
| } | |
| } | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | func sampleDepthRaw(_ pointer: UnsafeMutablePointer<Float32>, size: SIMD2<Int>, at: SIMD2<Int>) -> Float { | |
| let baseAddressIndex = at.y * size.x + at.x | |
| return Float(pointer[baseAddressIndex]) | |
| } | |
| // This also works. Adapted from: | |
| // https://developer.apple.com/forums/thread/676368 | |
| func worldPoint(depthMapPixelPoint: SIMD2<Float>, depth: Float, cameraIntrinsicsInverted: simd_float3x3, viewMatrixInverted: simd_float4x4) -> SIMD3<Float> { | |
| let localPoint = cameraIntrinsicsInverted * simd_float3(depthMapPixelPoint, 1) * -depth | |
| let localPointSwappedX = simd_float3(-localPoint.x, localPoint.y, localPoint.z) | |
| let worldPoint = viewMatrixInverted * simd_float4(localPointSwappedX, 1) | |
| return (worldPoint / worldPoint.w)[SIMD3(0,1,2)] | |
| } | |
| // This one is adapted from: | |
| // http://nicolas.burrus.name/index.php/Research/KinectCalibration | |
| func worldPoint(depthMapPixelPoint: SIMD2<Float>, depth: Float, cameraIntrinsics: simd_float3x3, viewMatrixInverted: simd_float4x4) -> SIMD3<Float> { | |
| let xrw = ((depthMapPixelPoint.x - cameraIntrinsics[2][0]) * depth / cameraIntrinsics[0][0]) | |
| let yrw = (depthMapPixelPoint.y - cameraIntrinsics[2][1]) * depth / cameraIntrinsics[1][1] | |
| // Y is UP in camera space, vs it being DOWN in image space. | |
| let localPoint = simd_float3(xrw, -yrw, -depth) | |
| let worldPoint = viewMatrixInverted * simd_float4(localPoint, 1) | |
| return simd_float3(worldPoint.x, worldPoint.y, worldPoint.z) | |
| } | |
| extension CVPixelBuffer { | |
| var size: SIMD2<Int> { | |
| let width = CVPixelBufferGetWidthOfPlane(self, 0) | |
| let height = CVPixelBufferGetHeightOfPlane(self, 0) | |
| return .init(x: width, y: height) | |
| } | |
| } | 
@fabio914 thank so much for the feedback and trying out the code.
You're right, those parts are missing as I didn't intend this to be "ready-to-use".
I'll fix the typos though, and add your suggestions so that it's more complete.
Btw @snowzurfer, I've managed to build a version with color.
My version is still not ideal but here's the updated code if you're interested:
   func session(_ session: ARSession, didUpdate frame: ARFrame) {
        guard let smoothedDepth = frame.smoothedSceneDepth?.depthMap else {
            return
        }
        let capturedImage = frame.capturedImage
        let lockFlags = CVPixelBufferLockFlags.readOnly
        CVPixelBufferLockBaseAddress(smoothedDepth, lockFlags)
        defer {
            CVPixelBufferUnlockBaseAddress(smoothedDepth, lockFlags)
        }
        CVPixelBufferLockBaseAddress(capturedImage, lockFlags)
        defer {
            CVPixelBufferUnlockBaseAddress(capturedImage, lockFlags)
        }
        let baseAddress = CVPixelBufferGetBaseAddressOfPlane(smoothedDepth, 0)!
        let depthByteBuffer = baseAddress.assumingMemoryBound(to: Float32.self)
        let lumaBaseAddress = CVPixelBufferGetBaseAddressOfPlane(capturedImage, 0)!
        let lumaByteBuffer = lumaBaseAddress.assumingMemoryBound(to: UInt8.self)
        let chromaBaseAddress = CVPixelBufferGetBaseAddressOfPlane(capturedImage, 1)!
        let chromaByteBuffer = chromaBaseAddress.assumingMemoryBound(to: UInt16.self)
        // The `.size` accessor simply read the CVPixelBuffer's width and height in pixels.
        //
        // They are the same ratio:
        // 1920 x 1440 = 1440 x 1920 = 0.75
        let depthMapSize = smoothedDepth.size(ofPlane: 0)
        // 192 x 256 = 0.75
        let capturedImageSize = capturedImage.size(ofPlane: 0)
        let lumaSize = capturedImageSize
        let chromaSize = capturedImage.size(ofPlane: 1)
        var cameraIntrinsics = frame.camera.intrinsics
        let depthResolution = simd_float2(x: Float(depthMapSize.x), y: Float(depthMapSize.y))
        let scaleRes = simd_float2(x: Float(capturedImageSize.x) / depthResolution.x,
                                   y: Float(capturedImageSize.y) / depthResolution.y )
        // Make the camera intrinsics be with respect to Depth.
        cameraIntrinsics[0][0] /= scaleRes.x
        cameraIntrinsics[1][1] /= scaleRes.y
        cameraIntrinsics[2][0] /= scaleRes.x
        cameraIntrinsics[2][1] /= scaleRes.y
        // This will be the long size, because of the rotation
        let horizontalStep = Float(depthMapSize.x) / Float(self.horizontalPoints)
        let halfHorizontalStep = horizontalStep / 2
        // This will be the short size, because of the rotation
        let verticalStep = Float(depthMapSize.y) / Float(self.verticalPoints)
        let halfVerticalStep = verticalStep / 2
        let depthWidthToLumaWidth = Float(lumaSize.x)/Float(depthMapSize.x)
        let depthHeightToLumaHeight = Float(lumaSize.y)/Float(depthMapSize.y)
        let depthWidthToChromaWidth = Float(chromaSize.x)/Float(depthMapSize.x)
        let depthHeightToChromaHeight = Float(chromaSize.y)/Float(depthMapSize.y)
         for h in 0..<horizontalPoints {
            for v in 0..<verticalPoints {
                let x = Float(h) * horizontalStep + halfHorizontalStep
                let y = Float(v) * verticalStep + halfVerticalStep
                let depthMapPoint = simd_float2(x, y)
                // Sample depth
                let metricDepth = sampleDepthRaw(depthByteBuffer, size: depthMapSize, at: .init(depthMapPoint))
                let wp = worldPoint(depthMapPixelPoint: depthMapPoint,
                                    depth: metricDepth,
                                    cameraIntrinsics: cameraIntrinsics,
                                    // This is crucial: you need to always use the view matrix for Landscape Right.
                                    viewMatrixInverted: frame.camera.viewMatrix(for: .landscapeRight).inverse)
                // Sample Image
                let lumaPoint = simd_float2(x * depthWidthToLumaWidth, y * depthHeightToLumaHeight)
                let luma = sampleLuma(lumaByteBuffer, size: lumaSize, at: .init(lumaPoint))
                let chromaPoint = simd_float2(x * depthWidthToChromaWidth, y * depthHeightToChromaHeight)
                let chroma = sampleChroma(chromaByteBuffer, size: chromaSize, at: .init(chromaPoint))
                let cr = UInt8(chroma >> 8)
                let cb = UInt8((chroma << 8) >> 8)
                let node = self.depthNodes[v * horizontalPoints + h]
                node.simdWorldPosition = wp
                node.geometry?.materials.first?.diffuse.contents = UIColor(y: luma, cb: cb, cr: cr)
            }
        }
    }where the setup() function is also a bit different (so that different nodes can have different materials):
func setup() {
    scene.rootNode.addChildNode(parentDebugNodes)
    let sizeGeomPredictions = 0.005
    for _ in 0 ..< (horizontalPoints * verticalPoints) {
        let geom = SCNBox(width: sizeGeomPredictions, height: sizeGeomPredictions, length: sizeGeomPredictions, chamferRadius: 0)
        geom.firstMaterial?.diffuse.contents = UIColor.green
        let node = SCNNode(geometry: geom)
        parentDebugNodes.addChildNode(node)
        depthNodes.append(node)
    }
}And these are the other auxiliary functions I wrote:
func sampleLuma(_ pointer: UnsafeMutablePointer<UInt8>, size: SIMD2<Int>, at: SIMD2<Int>) -> UInt8 {
    let baseAddressIndex = at.y * size.x + at.x
    return UInt8(pointer[baseAddressIndex])
}
func sampleChroma(_ pointer: UnsafeMutablePointer<UInt16>, size: SIMD2<Int>, at: SIMD2<Int>) -> UInt16 {
    let baseAddressIndex = at.y * size.x + at.x
    return UInt16(pointer[baseAddressIndex])
}and this extension on UIColor to convert from YCbCr to RGB:
extension UIColor {
    private static let encoding: (r: CGFloat, g: CGFloat, b: CGFloat) = (0.299, 0.587, 0.114)
    convenience init(y: UInt8, cb: UInt8, cr: UInt8, alpha: CGFloat = 1.0) {
        let Y  = (Double(y)  / 255.0)
        let Cb = (Double(cb) / 255.0) - 0.5
        let Cr = (Double(cr) / 255.0) - 0.5
        let k = UIColor.encoding
        let kr = (Cr * ((1.0 - k.r) / 0.5))
        let kgb = (Cb * ((k.b * (1.0 - k.b)) / (0.5 * k.g)))
        let kgr = (Cr * ((k.r * (1.0 - k.r)) / (0.5 * k.g)))
        let kb = (Cb * ((1.0 - k.b) / 0.5))
        let r = Y + kr
        let g = Y - kgb - kgr
        let b = Y + kb
        self.init(red: r, green: g, blue: b, alpha: alpha)
    }
}and a different extension on CVPixelBuffer:
extension CVPixelBuffer {
    func size(ofPlane plane: Int = 0) -> SIMD2<Int> {
        let width = CVPixelBufferGetWidthOfPlane(self, plane)
        let height = CVPixelBufferGetHeightOfPlane(self, plane)
        return  .init(x: width, y: height)
    }
}EDIT
I've uploaded my project to this repository.
It looks great, and thanks for posting the rest of your code!
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment
  
            
Hi 👋 I noticed a few issues when I was playing with your code.
This line should be replaced with:
We're also missing an extension on
CVPixelBuffer:For anyone else trying this code, make sure to run this with this ARKit configuration: