kennyalive · November 6, 2021 17:55
diff --git a/intersect_triangle_watertight.cpp b/intersect_triangle_watertight.cpp
 constexpr bool perform_expensive_numerical_analysis_to_prevent_intersection_behind_ray_origin = true; // could noticeable slowdown algorithm (like 2x)

 //
 // Sven Woop, Carsten Benthin, and Ingo Wald, Watertight Ray/Triangle Intersection, 
 // Journal of Computer Graphics Techniques (JCGT), vol. 2, no. 1, 65-82, 2013
 // http://jcgt.org/published/0002/01/05/
 //
 float intersect_triangle_watertight(const Ray& ray, const Vector3& p0, const Vector3& p1, const Vector3& p2, Vector3* barycentrics)
 {
    const int kz = ray.direction.abs().max_dimension();
    const int kx = (kz == 2 ? 0 : kz + 1);
    const int ky = (kz == 0 ? 2 : kz - 1);

    Vector3 direction = ray.direction.permutation(kx, ky, kz);
    float sx = -direction.x / direction.z;
    float sy = -direction.y / direction.z;
    float sz = 1.f / direction.z;

    const Vector3 p0t = (p0 - ray.origin).permutation(kx, ky, kz);
    const Vector3 p1t = (p1 - ray.origin).permutation(kx, ky, kz);
    const Vector3 p2t = (p2 - ray.origin).permutation(kx, ky, kz);

    float x0 = p0t.x + sx * p0t.z;
    float y0 = p0t.y + sy * p0t.z;
    float x1 = p1t.x + sx * p1t.z;
    float y1 = p1t.y + sy * p1t.z;
    float x2 = p2t.x + sx * p2t.z;
    float y2 = p2t.y + sy * p2t.z;

    float e0 = x1*y2 - y1*x2;
    float e1 = x2*y0 - y2*x0;
    float e2 = x0*y1 - y0*x1;

    if (e0 == 0.f || e1 == 0.f || e2 == 0.f) {
        e0 = float(double(x1)*double(y2) - double(y1)*double(x2));
        e1 = float(double(x2)*double(y0) - double(y2)*double(x0));
        e2 = float(double(x0)*double(y1) - double(y0)*double(x1));
    }

    if ((e0 < 0 || e1 < 0 || e2 < 0) && (e0 > 0 || e1 > 0 || e2 > 0))
        return Infinity;

    float det = e0 + e1 + e2;
    if (det == 0.f)
        return Infinity;

    float z0 = sz * p0t.z;
    float z1 = sz * p1t.z;
    float z2 = sz * p2t.z;

    float t_scaled = e0*z0 + e1*z1 + e2*z2;

    // This following code is more efficient version of this snippet:
    //    if (det < 0 && t_scaled > 0 || det > 0 && t_scaled < 0) return Infinity;
    // MSVC compiler generates 2 x64 instructions: xor, jl.
    // Benchmark consistenly shows 1 cycle gain :)
    // TODO: replace memcpy with std::bit_cast if no surprises in generated code
    {
        uint32_t det_bits;
        memcpy(&det_bits, &det, 4);
        uint32_t t_scaled_bits;
        memcpy(&t_scaled_bits, &t_scaled, 4);
        if ((det_bits ^ t_scaled_bits) >> 31)
            return Infinity;
    }

    float inv_det = 1.f / det;
    float t = inv_det * t_scaled;
    ASSERT(t >= 0);

    if constexpr(perform_expensive_numerical_analysis_to_prevent_intersection_behind_ray_origin)
    {
        float max_xt = Vector3(p0t.x, p1t.x, p2t.x).abs().max_component();
        float max_yt = Vector3(p0t.y, p1t.y, p2t.y).abs().max_component();
        float max_zt = Vector3(p0t.z, p1t.z, p2t.z).abs().max_component();
        float max_x = Vector3(x0, x1, x2).abs().max_component();
        float max_y = Vector3(y0, y1, y2).abs().max_component();
        float max_z = Vector3(z0, z1, z2).abs().max_component();
        float max_e = Vector3(e0, e1, e2).abs().max_component();

        float delta_x = gamma(4) * (max_xt + max_zt);
        float delta_y = gamma(4) * (max_yt + max_zt);
        float delta_z = gamma(3) * max_z;

        float delta_xy = gamma(1)*max_x*max_y + delta_x*max_y + delta_y*max_x;
        float delta_e = 2*delta_xy + gamma(1)*max_e;
        float delta_ez = gamma(1)*max_e*max_z + delta_z*max_e + delta_e*max_z;

        float delta_t = (3*delta_ez + gamma(2)*std::abs(t_scaled)) * std::abs(inv_det);
        if (delta_t >= t)
            return Infinity;
    }

    barycentrics->x = e0 * inv_det;
    barycentrics->y = e1 * inv_det;
    barycentrics->z = e2 * inv_det;
    return t;
 }
	constexpr bool perform_expensive_numerical_analysis_to_prevent_intersection_behind_ray_origin = true; // could noticeable slowdown algorithm (like 2x)

	//
	// Sven Woop, Carsten Benthin, and Ingo Wald, Watertight Ray/Triangle Intersection,
	// Journal of Computer Graphics Techniques (JCGT), vol. 2, no. 1, 65-82, 2013
	// http://jcgt.org/published/0002/01/05/
	//
	float intersect_triangle_watertight(const Ray& ray, const Vector3& p0, const Vector3& p1, const Vector3& p2, Vector3* barycentrics)
	{
	const int kz = ray.direction.abs().max_dimension();
	const int kx = (kz == 2 ? 0 : kz + 1);
	const int ky = (kz == 0 ? 2 : kz - 1);

	Vector3 direction = ray.direction.permutation(kx, ky, kz);
	float sx = -direction.x / direction.z;
	float sy = -direction.y / direction.z;
	float sz = 1.f / direction.z;

	const Vector3 p0t = (p0 - ray.origin).permutation(kx, ky, kz);
	const Vector3 p1t = (p1 - ray.origin).permutation(kx, ky, kz);
	const Vector3 p2t = (p2 - ray.origin).permutation(kx, ky, kz);

	float x0 = p0t.x + sx * p0t.z;
	float y0 = p0t.y + sy * p0t.z;
	float x1 = p1t.x + sx * p1t.z;
	float y1 = p1t.y + sy * p1t.z;
	float x2 = p2t.x + sx * p2t.z;
	float y2 = p2t.y + sy * p2t.z;

	float e0 = x1y2 - y1x2;
	float e1 = x2y0 - y2x0;
	float e2 = x0y1 - y0x1;

	if (e0 == 0.f \|\| e1 == 0.f \|\| e2 == 0.f) {
	e0 = float(double(x1)double(y2) - double(y1)double(x2));
	e1 = float(double(x2)double(y0) - double(y2)double(x0));
	e2 = float(double(x0)double(y1) - double(y0)double(x1));
	}

	if ((e0 < 0 \|\| e1 < 0 \|\| e2 < 0) && (e0 > 0 \|\| e1 > 0 \|\| e2 > 0))
	return Infinity;

	float det = e0 + e1 + e2;
	if (det == 0.f)
	return Infinity;

	float z0 = sz * p0t.z;
	float z1 = sz * p1t.z;
	float z2 = sz * p2t.z;

	float t_scaled = e0z0 + e1z1 + e2*z2;

	// This following code is more efficient version of this snippet:
	// if (det < 0 && t_scaled > 0 \|\| det > 0 && t_scaled < 0) return Infinity;
	// MSVC compiler generates 2 x64 instructions: xor, jl.
	// Benchmark consistenly shows 1 cycle gain :)
	// TODO: replace memcpy with std::bit_cast if no surprises in generated code
	{
	uint32_t det_bits;
	memcpy(&det_bits, &det, 4);
	uint32_t t_scaled_bits;
	memcpy(&t_scaled_bits, &t_scaled, 4);
	if ((det_bits ^ t_scaled_bits) >> 31)
	return Infinity;
	}

	float inv_det = 1.f / det;
	float t = inv_det * t_scaled;
	ASSERT(t >= 0);

	if constexpr(perform_expensive_numerical_analysis_to_prevent_intersection_behind_ray_origin)
	{
	float max_xt = Vector3(p0t.x, p1t.x, p2t.x).abs().max_component();
	float max_yt = Vector3(p0t.y, p1t.y, p2t.y).abs().max_component();
	float max_zt = Vector3(p0t.z, p1t.z, p2t.z).abs().max_component();
	float max_x = Vector3(x0, x1, x2).abs().max_component();
	float max_y = Vector3(y0, y1, y2).abs().max_component();
	float max_z = Vector3(z0, z1, z2).abs().max_component();
	float max_e = Vector3(e0, e1, e2).abs().max_component();

	float delta_x = gamma(4) * (max_xt + max_zt);
	float delta_y = gamma(4) * (max_yt + max_zt);
	float delta_z = gamma(3) * max_z;

	float delta_xy = gamma(1)max_xmax_y + delta_xmax_y + delta_ymax_x;
	float delta_e = 2delta_xy + gamma(1)max_e;
	float delta_ez = gamma(1)max_emax_z + delta_zmax_e + delta_emax_z;

	float delta_t = (3delta_ez + gamma(2)std::abs(t_scaled)) * std::abs(inv_det);
	if (delta_t >= t)
	return Infinity;
	}

	barycentrics->x = e0 * inv_det;
	barycentrics->y = e1 * inv_det;
	barycentrics->z = e2 * inv_det;
	return t;
	}
No results found