constexpr bool perform_expensive_numerical_analysis_to_prevent_intersection_behind_ray_origin = true; // could noticeable slowdown algorithm (like 2x) // // Sven Woop, Carsten Benthin, and Ingo Wald, Watertight Ray/Triangle Intersection, // Journal of Computer Graphics Techniques (JCGT), vol. 2, no. 1, 65-82, 2013 // http://jcgt.org/published/0002/01/05/ // float intersect_triangle_watertight(const Ray& ray, const Vector3& p0, const Vector3& p1, const Vector3& p2, Vector3* barycentrics) { const int kz = ray.direction.abs().max_dimension(); const int kx = (kz == 2 ? 0 : kz + 1); const int ky = (kz == 0 ? 2 : kz - 1); Vector3 direction = ray.direction.permutation(kx, ky, kz); float sx = -direction.x / direction.z; float sy = -direction.y / direction.z; float sz = 1.f / direction.z; const Vector3 p0t = (p0 - ray.origin).permutation(kx, ky, kz); const Vector3 p1t = (p1 - ray.origin).permutation(kx, ky, kz); const Vector3 p2t = (p2 - ray.origin).permutation(kx, ky, kz); float x0 = p0t.x + sx * p0t.z; float y0 = p0t.y + sy * p0t.z; float x1 = p1t.x + sx * p1t.z; float y1 = p1t.y + sy * p1t.z; float x2 = p2t.x + sx * p2t.z; float y2 = p2t.y + sy * p2t.z; float e0 = x1*y2 - y1*x2; float e1 = x2*y0 - y2*x0; float e2 = x0*y1 - y0*x1; if (e0 == 0.f || e1 == 0.f || e2 == 0.f) { e0 = float(double(x1)*double(y2) - double(y1)*double(x2)); e1 = float(double(x2)*double(y0) - double(y2)*double(x0)); e2 = float(double(x0)*double(y1) - double(y0)*double(x1)); } if ((e0 < 0 || e1 < 0 || e2 < 0) && (e0 > 0 || e1 > 0 || e2 > 0)) return Infinity; float det = e0 + e1 + e2; if (det == 0.f) return Infinity; float z0 = sz * p0t.z; float z1 = sz * p1t.z; float z2 = sz * p2t.z; float t_scaled = e0*z0 + e1*z1 + e2*z2; // This following code is more efficient version of this snippet: // if (det < 0 && t_scaled > 0 || det > 0 && t_scaled < 0) return Infinity; // MSVC compiler generates 2 x64 instructions: xor, jl. // Benchmark consistenly shows 1 cycle gain :) // TODO: replace memcpy with std::bit_cast if no surprises in generated code { uint32_t det_bits; memcpy(&det_bits, &det, 4); uint32_t t_scaled_bits; memcpy(&t_scaled_bits, &t_scaled, 4); if ((det_bits ^ t_scaled_bits) >> 31) return Infinity; } float inv_det = 1.f / det; float t = inv_det * t_scaled; ASSERT(t >= 0); if constexpr(perform_expensive_numerical_analysis_to_prevent_intersection_behind_ray_origin) { float max_xt = Vector3(p0t.x, p1t.x, p2t.x).abs().max_component(); float max_yt = Vector3(p0t.y, p1t.y, p2t.y).abs().max_component(); float max_zt = Vector3(p0t.z, p1t.z, p2t.z).abs().max_component(); float max_x = Vector3(x0, x1, x2).abs().max_component(); float max_y = Vector3(y0, y1, y2).abs().max_component(); float max_z = Vector3(z0, z1, z2).abs().max_component(); float max_e = Vector3(e0, e1, e2).abs().max_component(); float delta_x = gamma(4) * (max_xt + max_zt); float delta_y = gamma(4) * (max_yt + max_zt); float delta_z = gamma(3) * max_z; float delta_xy = gamma(1)*max_x*max_y + delta_x*max_y + delta_y*max_x; float delta_e = 2*delta_xy + gamma(1)*max_e; float delta_ez = gamma(1)*max_e*max_z + delta_z*max_e + delta_e*max_z; float delta_t = (3*delta_ez + gamma(2)*std::abs(t_scaled)) * std::abs(inv_det); if (delta_t >= t) return Infinity; } barycentrics->x = e0 * inv_det; barycentrics->y = e1 * inv_det; barycentrics->z = e2 * inv_det; return t; }