|
/** |
|
* Enhanced Floyd-Warshall Implementation with C++20/23 Features |
|
*/ |
|
|
|
#include <iostream> |
|
#include <iomanip> |
|
#include <chrono> |
|
#include <vector> |
|
#include <array> |
|
#include <memory> |
|
#include <algorithm> |
|
#include <random> |
|
#include <limits> |
|
#include <cmath> |
|
#include <string_view> |
|
#include <optional> |
|
#ifndef __has_include |
|
#define __has_include(x) 0 |
|
#endif |
|
#if __has_include(<expected>) |
|
#include <expected> // C++23 |
|
#define HAS_STD_EXPECTED 1 |
|
#else |
|
#define HAS_STD_EXPECTED 0 |
|
#endif |
|
#include <span> // C++20 |
|
#include <concepts> // C++20 |
|
#include <ranges> // C++20 |
|
#if __has_include(<print>) |
|
#include <print> // C++23 |
|
#define HAS_STD_PRINT 1 |
|
#else |
|
#define HAS_STD_PRINT 0 |
|
#endif |
|
#include <atomic> |
|
#include <thread> |
|
#include <format> // C++20 |
|
#ifdef _MSC_VER |
|
#include <malloc.h> |
|
#endif |
|
// Common standard headers |
|
#include <map> |
|
#include <mutex> |
|
#if __has_include(<execution>) |
|
#include <execution> |
|
#define HAS_STD_EXECUTION 1 |
|
#else |
|
#define HAS_STD_EXECUTION 0 |
|
#endif |
|
#include <functional> |
|
#include <numeric> |
|
#include <cstring> |
|
#include <cstdlib> |
|
#include <new> |
|
|
|
// Feature detection macros |
|
#if __cplusplus >= 202302L |
|
#define HAS_CPP23 1 |
|
#if __has_include(<mdspan>) |
|
#include <mdspan> |
|
#define HAS_STD_MDSPAN 1 |
|
#else |
|
#define HAS_STD_MDSPAN 0 |
|
#endif |
|
#else |
|
#define HAS_CPP23 0 |
|
#define HAS_STD_MDSPAN 0 |
|
#endif |
|
|
|
#if __cplusplus >= 202002L |
|
#define HAS_CPP20 1 |
|
#else |
|
#define HAS_CPP20 0 |
|
#endif |
|
|
|
// Platform detection (same as before) |
|
#if defined(_OPENMP) && __has_include(<omp.h>) |
|
#include <omp.h> |
|
#define HAS_OPENMP 1 |
|
#else |
|
#define HAS_OPENMP 0 |
|
#endif |
|
|
|
// SIMD detection (same as before) |
|
#if defined(__AVX2__) |
|
#include <immintrin.h> |
|
#define HAS_AVX2 1 |
|
#else |
|
#define HAS_AVX2 0 |
|
#endif |
|
|
|
#if defined(__SSE2__) || defined(_M_X64) |
|
#include <emmintrin.h> |
|
#define HAS_SSE2 1 |
|
#else |
|
#define HAS_SSE2 0 |
|
#endif |
|
|
|
#if defined(__ARM_NEON) |
|
#include <arm_neon.h> |
|
#define HAS_NEON 1 |
|
#else |
|
#define HAS_NEON 0 |
|
#endif |
|
|
|
using namespace std; |
|
using namespace std::chrono; |
|
|
|
// Prefetch portability macro |
|
#if defined(__GNUC__) || defined(__clang__) |
|
#define PREFETCH(addr) __builtin_prefetch((addr), 1, 3) |
|
#else |
|
#define PREFETCH(addr) ((void)0) |
|
#endif |
|
|
|
// ============================================================================ |
|
// Security and Safety Features |
|
// ============================================================================ |
|
|
|
/** |
|
* Secure integer type that prevents overflow |
|
* Uses saturating arithmetic to prevent undefined behavior |
|
*/ |
|
template<typename T> |
|
requires std::integral<T> |
|
class SafeInt { |
|
private: |
|
T value; |
|
|
|
public: |
|
constexpr SafeInt(T val = 0) : value(val) {} |
|
|
|
// Saturating addition - prevents overflow |
|
constexpr SafeInt operator+(const SafeInt& other) const { |
|
if (value == numeric_limits<T>::max() || |
|
other.value == numeric_limits<T>::max()) { |
|
return SafeInt(numeric_limits<T>::max()); |
|
} |
|
|
|
T result; |
|
// Check for overflow using built-in functions (C++20) |
|
if (__builtin_add_overflow(value, other.value, &result)) { |
|
return SafeInt(numeric_limits<T>::max()); |
|
} |
|
return SafeInt(result); |
|
} |
|
|
|
constexpr bool operator<(const SafeInt& other) const { |
|
return value < other.value; |
|
} |
|
|
|
constexpr T get() const { return value; } |
|
constexpr operator T() const { return value; } |
|
}; |
|
|
|
/** |
|
* Secure memory allocator with bounds checking and zeroing |
|
*/ |
|
template<typename T> |
|
class SecureAllocator { |
|
public: |
|
using value_type = T; |
|
|
|
T* allocate(size_t n) { |
|
// Check for overflow in size calculation |
|
if (n > numeric_limits<size_t>::max() / sizeof(T)) { |
|
throw bad_alloc(); |
|
} |
|
|
|
// Allocate with alignment |
|
void* ptr = nullptr; |
|
#ifdef _MSC_VER |
|
ptr = _aligned_malloc(n * sizeof(T), 32); |
|
#else |
|
if (posix_memalign(&ptr, 32, n * sizeof(T)) != 0) { |
|
throw bad_alloc(); |
|
} |
|
#endif |
|
|
|
// Zero memory to prevent information leakage |
|
memset(ptr, 0, n * sizeof(T)); |
|
|
|
return static_cast<T*>(ptr); |
|
} |
|
|
|
void deallocate(T* ptr, size_t n) { |
|
// Secure erase before deallocation |
|
if (ptr) { |
|
// Attempt to use memset_s if available (C11 optional) |
|
#if defined(__STDC_LIB_EXT1__) |
|
memset_s(ptr, n * sizeof(T), 0, n * sizeof(T)); |
|
#else |
|
// Fallback: volatile byte-wise zeroing to prevent optimization-out |
|
volatile unsigned char* p = reinterpret_cast<volatile unsigned char*>(ptr); |
|
for (size_t i = 0; i < n * sizeof(T); ++i) { |
|
p[i] = 0; |
|
} |
|
#endif |
|
|
|
#ifdef _MSC_VER |
|
_aligned_free(ptr); |
|
#else |
|
free(ptr); |
|
#endif |
|
} |
|
} |
|
}; |
|
|
|
/** |
|
* Bounds-checked matrix access wrapper using C++20 span |
|
*/ |
|
template<typename T> |
|
class SafeMatrix { |
|
private: |
|
vector<T, SecureAllocator<T>> data; |
|
size_t rows, cols; |
|
|
|
public: |
|
SafeMatrix(size_t r, size_t c, T default_val = T{}) |
|
: data(r * c, default_val), rows(r), cols(c) { |
|
|
|
// Validate dimensions |
|
if (r == 0 || c == 0 || r > 10000 || c > 10000) { |
|
throw invalid_argument("Invalid matrix dimensions"); |
|
} |
|
} |
|
|
|
// C++20 span-based access with bounds checking |
|
[[nodiscard]] span<T> operator[](size_t row) { |
|
if (row >= rows) [[unlikely]] { |
|
throw out_of_range("Row index out of bounds"); |
|
} |
|
return span<T>(data.data() + row * cols, cols); |
|
} |
|
|
|
[[nodiscard]] span<const T> operator[](size_t row) const { |
|
if (row >= rows) [[unlikely]] { |
|
throw out_of_range("Row index out of bounds"); |
|
} |
|
return span<const T>(data.data() + row * cols, cols); |
|
} |
|
|
|
// Safe element access with optional return |
|
[[nodiscard]] optional<T> at(size_t i, size_t j) const { |
|
if (i >= rows || j >= cols) { |
|
return nullopt; |
|
} |
|
return data[i * cols + j]; |
|
} |
|
|
|
// C++23 mdspan interface (if available) |
|
#if HAS_STD_MDSPAN |
|
[[nodiscard]] auto as_mdspan() { |
|
return std::mdspan(data.data(), rows, cols); |
|
} |
|
#endif |
|
|
|
T* raw_data() { return data.data(); } |
|
const T* raw_data() const { return data.data(); } |
|
size_t size() const { return rows * cols; } |
|
}; |
|
|
|
// ============================================================================ |
|
// C++20 Concepts for Template Constraints |
|
// ============================================================================ |
|
|
|
template<typename T> |
|
concept GraphWeight = requires(T a, T b) { |
|
{ a + b } -> std::convertible_to<T>; |
|
{ a < b } -> std::convertible_to<bool>; |
|
{ T{} }; // Default constructible |
|
}; |
|
|
|
template<typename T> |
|
concept GraphAlgorithm = requires(T t, int n) { |
|
{ t.execute() } -> std::same_as<void>; |
|
{ t.get_result() } -> std::convertible_to<bool>; |
|
}; |
|
|
|
// ============================================================================ |
|
// Advanced Algorithm Selection with Runtime Profiling |
|
// ============================================================================ |
|
|
|
class AlgorithmProfiler { |
|
private: |
|
struct ProfileData { |
|
string algorithm_name; |
|
int graph_size; |
|
double density; |
|
double execution_time; |
|
size_t cache_misses; // Would need perf counters for real measurement |
|
}; |
|
|
|
vector<ProfileData> history; |
|
mutable std::mutex history_mutex; |
|
|
|
// Machine learning-inspired simple predictor |
|
struct Predictor { |
|
// Simple linear model: time = a * size^2 + b * size + c * density + d |
|
array<double, 4> coefficients{0.001, 0.01, 0.1, 0.0}; |
|
|
|
double predict(int size, double density) const { |
|
return coefficients[0] * size * size + |
|
coefficients[1] * size + |
|
coefficients[2] * density + |
|
coefficients[3]; |
|
} |
|
|
|
void update(const ProfileData& data) { |
|
// Simple gradient descent update (simplified) |
|
double predicted = predict(data.graph_size, data.density); |
|
double error = data.execution_time - predicted; |
|
double learning_rate = 0.001; |
|
|
|
// Update coefficients |
|
coefficients[0] += learning_rate * error * data.graph_size * data.graph_size; |
|
coefficients[1] += learning_rate * error * data.graph_size; |
|
coefficients[2] += learning_rate * error * data.density; |
|
coefficients[3] += learning_rate * error; |
|
} |
|
}; |
|
|
|
map<string, Predictor> predictors; |
|
|
|
public: |
|
void record(const string& algo, int size, double density, double time) { |
|
lock_guard<mutex> lock(history_mutex); |
|
|
|
ProfileData data{algo, size, density, time, 0}; |
|
history.push_back(data); |
|
|
|
// Update predictor |
|
predictors[algo].update(data); |
|
|
|
// Keep history bounded |
|
if (history.size() > 1000) { |
|
history.erase(history.begin()); |
|
} |
|
} |
|
|
|
// Predict best algorithm based on history |
|
string predict_best(int size, double density) const { |
|
lock_guard<mutex> lock(history_mutex); |
|
|
|
if (predictors.empty()) { |
|
return "auto"; // No history yet |
|
} |
|
|
|
string best_algo; |
|
double best_time = numeric_limits<double>::max(); |
|
|
|
for (const auto& [algo, predictor] : predictors) { |
|
double predicted_time = predictor.predict(size, density); |
|
if (predicted_time < best_time) { |
|
best_time = predicted_time; |
|
best_algo = algo; |
|
} |
|
} |
|
|
|
return best_algo; |
|
} |
|
|
|
// Save/load profiling data for persistent learning |
|
void save_profile(const string& filename) const { |
|
// Implementation would save to file |
|
} |
|
|
|
void load_profile(const string& filename) { |
|
// Implementation would load from file |
|
} |
|
}; |
|
|
|
// Global profiler instance |
|
static AlgorithmProfiler g_profiler; |
|
|
|
// ============================================================================ |
|
// Enhanced Floyd-Warshall with C++20/23 Features |
|
// ============================================================================ |
|
|
|
template<typename WeightType = SafeInt<int>> |
|
requires GraphWeight<WeightType> |
|
class ModernFloydWarshall { |
|
private: |
|
const size_t n; |
|
SafeMatrix<WeightType> W; // Weight matrix with bounds checking |
|
SafeMatrix<WeightType> D; // Distance matrix |
|
SafeMatrix<int> P; // Predecessor matrix |
|
|
|
static constexpr WeightType INF = numeric_limits<int>::max() / 2; |
|
|
|
// Performance metrics using C++20 chrono |
|
struct Metrics { |
|
duration<double, milli> init_time{0}; |
|
duration<double, milli> compute_time{0}; |
|
string algorithm_used; |
|
bool overflow_detected = false; |
|
size_t bounds_checks_failed = 0; |
|
} metrics; |
|
|
|
// Side-channel resistant comparison (for sensitive applications) |
|
[[nodiscard]] static bool constant_time_less(WeightType a, WeightType b) { |
|
// Avoid branching for sensitive comparisons |
|
return (a - b) >> (sizeof(WeightType) * 8 - 1); |
|
} |
|
|
|
public: |
|
explicit ModernFloydWarshall(size_t size) |
|
: n(size), W(size, size, INF), D(size, size, INF), P(size, size, -1) { |
|
|
|
// Validate size for security |
|
if (size == 0 || size > 10000) { |
|
throw invalid_argument("Graph size must be between 1 and 10000"); |
|
} |
|
} |
|
|
|
/** |
|
* C++20 Ranges-based Floyd-Warshall |
|
* Uses ranges and views for elegant expression |
|
*/ |
|
void floyd_ranges() { |
|
auto start = steady_clock::now(); |
|
|
|
// Initialize using ranges |
|
ranges::copy(W.raw_data(), W.raw_data() + W.size(), D.raw_data()); |
|
|
|
metrics.init_time = steady_clock::now() - start; |
|
|
|
// Main algorithm using ranges |
|
for (size_t k : views::iota(size_t{0}, n)) { |
|
for (size_t i : views::iota(size_t{0}, n)) { |
|
if (D[i][k] >= INF) [[unlikely]] continue; |
|
|
|
for (size_t j : views::iota(size_t{0}, n)) { |
|
WeightType new_dist = D[i][k] + D[k][j]; |
|
|
|
// C++20 [[likely]] attribute for branch prediction |
|
if (new_dist < D[i][j]) [[likely]] { |
|
D[i][j] = new_dist; |
|
P[i][j] = k; |
|
} |
|
} |
|
} |
|
} |
|
|
|
metrics.compute_time = steady_clock::now() - start - metrics.init_time; |
|
metrics.algorithm_used = "Ranges-based"; |
|
} |
|
|
|
/** |
|
* C++20 Parallel STL version |
|
* Uses execution policies for automatic parallelization |
|
*/ |
|
void floyd_parallel_stl() { |
|
#if HAS_CPP20 && HAS_STD_EXECUTION && !defined(__APPLE__) |
|
auto start = steady_clock::now(); |
|
|
|
// Parallel copy |
|
copy(execution::par_unseq, W.raw_data(), W.raw_data() + W.size(), D.raw_data()); |
|
|
|
metrics.init_time = steady_clock::now() - start; |
|
|
|
for (size_t k = 0; k < n; ++k) { |
|
// Create range of indices |
|
vector<size_t> indices(n); |
|
iota(indices.begin(), indices.end(), 0); |
|
|
|
// Parallel for_each |
|
for_each(execution::par_unseq, indices.begin(), indices.end(), |
|
[this, k](size_t i) { |
|
if (D[i][k] >= INF) return; |
|
|
|
for (size_t j = 0; j < n; ++j) { |
|
WeightType new_dist = D[i][k] + D[k][j]; |
|
if (new_dist < D[i][j]) { |
|
D[i][j] = new_dist; |
|
P[i][j] = k; |
|
} |
|
} |
|
}); |
|
} |
|
|
|
metrics.compute_time = steady_clock::now() - start - metrics.init_time; |
|
metrics.algorithm_used = "Parallel STL"; |
|
#else |
|
floyd_ranges(); // Fallback |
|
#endif |
|
} |
|
|
|
/** |
|
* C++23 Expected-based safe execution |
|
* Returns expected with error information |
|
*/ |
|
#if HAS_STD_EXPECTED |
|
[[nodiscard]] expected<bool, string> execute_safe() { |
|
try { |
|
// Check for integer overflow possibility |
|
size_t max_ops = n * n * n; |
|
if (max_ops > numeric_limits<size_t>::max() / sizeof(WeightType)) { |
|
return unexpected("Graph too large - would cause overflow"); |
|
} |
|
|
|
// Execute with automatic algorithm selection |
|
execute_auto(); |
|
|
|
// Verify result |
|
if (!verify_solution()) { |
|
return unexpected("Solution verification failed"); |
|
} |
|
|
|
return true; |
|
|
|
} catch (const exception& e) { |
|
return unexpected(format("Execution failed: {}", e.what())); |
|
} |
|
} |
|
#endif |
|
|
|
/** |
|
* Automatic algorithm selection with profiling feedback |
|
*/ |
|
void execute_auto() { |
|
// Get prediction from profiler |
|
string predicted_best = g_profiler.predict_best(n, calculate_density()); |
|
|
|
// Decision tree with C++20 likely/unlikely attributes |
|
if (predicted_best == "ranges" || n < 50) [[likely]] { |
|
floyd_ranges(); |
|
} |
|
else if (predicted_best == "parallel_stl" && n > 200) [[unlikely]] { |
|
floyd_parallel_stl(); |
|
} |
|
else if (predicted_best == "simd" && (HAS_AVX2 || HAS_SSE2)) [[likely]] { |
|
floyd_simd_modern(); |
|
} |
|
else { |
|
floyd_cache_optimized(); |
|
} |
|
|
|
// Record performance for learning |
|
double time = metrics.init_time.count() + metrics.compute_time.count(); |
|
g_profiler.record(metrics.algorithm_used, n, calculate_density(), time); |
|
} |
|
|
|
/** |
|
* Modern SIMD implementation with C++20 features |
|
*/ |
|
void floyd_simd_modern() { |
|
auto start = steady_clock::now(); |
|
|
|
copy(W.raw_data(), W.raw_data() + W.size(), D.raw_data()); |
|
|
|
metrics.init_time = steady_clock::now() - start; |
|
|
|
#if HAS_AVX2 |
|
for (size_t k = 0; k < n; ++k) { |
|
for (size_t i = 0; i < n; ++i) { |
|
if (D[i][k] >= INF) [[unlikely]] continue; |
|
|
|
__m256i dist_ik = _mm256_set1_epi32(D[i][k].get()); |
|
|
|
size_t j = 0; |
|
// Process 8 elements at a time |
|
for (; j + 8 <= n; j += 8) { |
|
__m256i d_ij = _mm256_loadu_si256((__m256i*)&D[i][j]); |
|
__m256i d_kj = _mm256_loadu_si256((__m256i*)&D[k][j]); |
|
|
|
__m256i new_dist = _mm256_add_epi32(dist_ik, d_kj); |
|
__m256i mask = _mm256_cmpgt_epi32(d_ij, new_dist); |
|
|
|
d_ij = _mm256_blendv_epi8(d_ij, new_dist, mask); |
|
_mm256_storeu_si256((__m256i*)&D[i][j], d_ij); |
|
} |
|
|
|
// Handle remaining elements |
|
for (; j < n; ++j) { |
|
WeightType new_dist = D[i][k] + D[k][j]; |
|
if (new_dist < D[i][j]) { |
|
D[i][j] = new_dist; |
|
P[i][j] = k; |
|
} |
|
} |
|
} |
|
} |
|
#else |
|
floyd_ranges(); // Fallback |
|
#endif |
|
|
|
metrics.compute_time = steady_clock::now() - start - metrics.init_time; |
|
metrics.algorithm_used = "SIMD Modern"; |
|
} |
|
|
|
/** |
|
* Cache-optimized with prefetching |
|
*/ |
|
void floyd_cache_optimized() { |
|
auto start = steady_clock::now(); |
|
|
|
copy(W.raw_data(), W.raw_data() + W.size(), D.raw_data()); |
|
|
|
metrics.init_time = steady_clock::now() - start; |
|
|
|
constexpr size_t block_size = 64; // L1 cache optimal |
|
|
|
for (size_t kb = 0; kb < n; kb += block_size) { |
|
size_t k_end = min(kb + block_size, n); |
|
|
|
for (size_t k = kb; k < k_end; ++k) { |
|
for (size_t ib = 0; ib < n; ib += block_size) { |
|
size_t i_end = min(ib + block_size, n); |
|
|
|
for (size_t jb = 0; jb < n; jb += block_size) { |
|
size_t j_end = min(jb + block_size, n); |
|
|
|
// Prefetch next block |
|
if (jb + block_size < n) { |
|
PREFETCH(&D[ib][jb + block_size]); |
|
} |
|
|
|
for (size_t i = ib; i < i_end; ++i) { |
|
if (D[i][k] >= INF) continue; |
|
|
|
for (size_t j = jb; j < j_end; ++j) { |
|
WeightType new_dist = D[i][k] + D[k][j]; |
|
if (new_dist < D[i][j]) { |
|
D[i][j] = new_dist; |
|
P[i][j] = k; |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
metrics.compute_time = steady_clock::now() - start - metrics.init_time; |
|
metrics.algorithm_used = "Cache Optimized"; |
|
} |
|
|
|
/** |
|
* Generate secure random graph |
|
*/ |
|
void generate_graph(double density, optional<unsigned> seed = nullopt) { |
|
// Use cryptographically secure random if no seed provided |
|
random_device rd; |
|
mt19937_64 gen(seed.value_or(rd())); |
|
|
|
uniform_real_distribution<> density_dist(0.0, 1.0); |
|
uniform_int_distribution<> weight_dist(1, 100); |
|
|
|
// Input validation |
|
if (density < 0.0 || density > 1.0) { |
|
throw invalid_argument("Density must be between 0 and 1"); |
|
} |
|
|
|
for (size_t i = 0; i < n; ++i) { |
|
for (size_t j = 0; j < n; ++j) { |
|
if (i == j) { |
|
W[i][j] = WeightType(0); |
|
} else if (density_dist(gen) < density) { |
|
W[i][j] = WeightType(weight_dist(gen)); |
|
} else { |
|
W[i][j] = INF; |
|
} |
|
} |
|
} |
|
} |
|
|
|
/** |
|
* Verify solution correctness |
|
*/ |
|
[[nodiscard]] bool verify_solution() const { |
|
// Check triangle inequality |
|
for (size_t i = 0; i < n; ++i) { |
|
for (size_t j = 0; j < n; ++j) { |
|
for (size_t k = 0; k < n; ++k) { |
|
if (D[i][k] < INF && D[k][j] < INF) { |
|
if (D[i][j] > D[i][k] + D[k][j]) { |
|
return false; |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
// Check for negative cycles |
|
for (size_t i = 0; i < n; ++i) { |
|
if (D[i][i] < WeightType(0)) { |
|
return false; // Negative cycle detected |
|
} |
|
} |
|
|
|
return true; |
|
} |
|
|
|
/** |
|
* Get path with C++23 expected for error handling |
|
*/ |
|
#if HAS_STD_EXPECTED |
|
[[nodiscard]] expected<vector<size_t>, string> |
|
get_path(size_t from, size_t to) const { |
|
if (from >= n || to >= n) { |
|
return unexpected("Invalid node indices"); |
|
} |
|
|
|
if (D[from][to] >= INF) { |
|
return unexpected("No path exists"); |
|
} |
|
|
|
vector<size_t> path; |
|
std::function<void(size_t, size_t)> reconstruct = |
|
[&](size_t i, size_t j) { |
|
int k = P[i][j]; |
|
if (k != -1) { |
|
reconstruct(i, k); |
|
path.push_back(k); |
|
reconstruct(k, j); |
|
} |
|
}; |
|
|
|
path.push_back(from); |
|
reconstruct(from, to); |
|
path.push_back(to); |
|
|
|
return path; |
|
} |
|
#endif |
|
|
|
/** |
|
* Print metrics with C++23 print |
|
*/ |
|
void print_metrics() const { |
|
#if HAS_STD_PRINT |
|
std::print("Algorithm: {}\n", metrics.algorithm_used); |
|
std::print("Init time: {:.3f} ms\n", metrics.init_time.count()); |
|
std::print("Compute time: {:.3f} ms\n", metrics.compute_time.count()); |
|
std::print("Total time: {:.3f} ms\n", |
|
metrics.init_time.count() + metrics.compute_time.count()); |
|
if (metrics.overflow_detected) { |
|
std::print("Warning: Integer overflow detected and handled\n"); |
|
} |
|
#else |
|
// C++20 format |
|
cout << format("Algorithm: {}\n", metrics.algorithm_used); |
|
cout << format("Init time: {:.3f} ms\n", metrics.init_time.count()); |
|
cout << format("Compute time: {:.3f} ms\n", metrics.compute_time.count()); |
|
cout << format("Total time: {:.3f} ms\n", |
|
metrics.init_time.count() + metrics.compute_time.count()); |
|
#endif |
|
} |
|
|
|
private: |
|
double calculate_density() const { |
|
size_t edges = 0; |
|
for (size_t i = 0; i < n; ++i) { |
|
for (size_t j = 0; j < n; ++j) { |
|
if (i != j && W[i][j] < INF) { |
|
edges++; |
|
} |
|
} |
|
} |
|
return static_cast<double>(edges) / (n * (n - 1)); |
|
} |
|
}; |
|
|
|
// ============================================================================ |
|
// Benchmark Suite with Modern Features |
|
// ============================================================================ |
|
|
|
class ModernBenchmark { |
|
public: |
|
struct Config { |
|
vector<int> sizes = {10, 50, 100, 200, 500}; |
|
vector<double> densities = {0.2, 0.5, 0.8, 1.0}; |
|
int iterations = 3; |
|
bool verify = true; |
|
bool save_profile = false; |
|
string profile_file = "floyd_profile.dat"; |
|
}; |
|
|
|
void run(const Config& config) { |
|
#if HAS_STD_PRINT |
|
std::print("{}\n", string(60, '=')); |
|
std::print(" MODERN FLOYD-WARSHALL BENCHMARK (C++23)\n"); |
|
std::print("{}\n\n", string(60, '=')); |
|
#else |
|
cout << "\n" << string(60, '=') << "\n"; |
|
cout << " MODERN FLOYD-WARSHALL BENCHMARK (C++20)\n"; |
|
cout << string(60, '=') << "\n\n"; |
|
#endif |
|
|
|
// Print system info |
|
print_system_info(); |
|
|
|
// Run benchmarks |
|
for (double density : config.densities) { |
|
#if HAS_STD_PRINT |
|
std::print("\nDensity = {:.1f}:\n", density); |
|
std::print("{}\n", string(40, '-')); |
|
#else |
|
cout << format("\nDensity = {:.1f}:\n", density); |
|
cout << string(40, '-') << "\n"; |
|
#endif |
|
|
|
for (int size : config.sizes) { |
|
run_single_benchmark(size, density, config); |
|
|
|
// Skip large sizes for complete graphs |
|
if (density == 1.0 && size >= 200) break; |
|
} |
|
} |
|
|
|
// Save profile if requested |
|
if (config.save_profile) { |
|
g_profiler.save_profile(config.profile_file); |
|
#if HAS_STD_PRINT |
|
std::print("\nProfile saved to {}\n", config.profile_file); |
|
#else |
|
cout << format("\nProfile saved to {}\n", config.profile_file); |
|
#endif |
|
} |
|
} |
|
|
|
// Convenience overload to use default-constructed config without relying on braced default param |
|
void run() { run(Config{}); } |
|
|
|
private: |
|
void print_system_info() { |
|
#if HAS_STD_PRINT |
|
std::print("System Configuration:\n"); |
|
std::print(" C++ Standard: {}\n", __cplusplus); |
|
std::print(" Hardware threads: {}\n", thread::hardware_concurrency()); |
|
std::print(" OpenMP: {}\n", HAS_OPENMP ? "Yes" : "No"); |
|
std::print(" AVX2: {}\n", HAS_AVX2 ? "Yes" : "No"); |
|
std::print(" SSE2: {}\n", HAS_SSE2 ? "Yes" : "No"); |
|
std::print(" NEON: {}\n", HAS_NEON ? "Yes" : "No"); |
|
#else |
|
cout << "System Configuration:\n"; |
|
cout << format(" C++ Standard: {}\n", __cplusplus); |
|
cout << format(" Hardware threads: {}\n", thread::hardware_concurrency()); |
|
cout << format(" OpenMP: {}\n", HAS_OPENMP ? "Yes" : "No"); |
|
cout << format(" AVX2: {}\n", HAS_AVX2 ? "Yes" : "No"); |
|
cout << format(" SSE2: {}\n", HAS_SSE2 ? "Yes" : "No"); |
|
cout << format(" NEON: {}\n", HAS_NEON ? "Yes" : "No"); |
|
#endif |
|
} |
|
|
|
void run_single_benchmark(int size, double density, const Config& config) { |
|
double total_time = 0; |
|
bool valid = true; |
|
|
|
for (int iter = 0; iter < config.iterations; ++iter) { |
|
ModernFloydWarshall<SafeInt<int>> fw(size); |
|
fw.generate_graph(density, iter); // Different seed each iteration |
|
|
|
auto start = steady_clock::now(); |
|
fw.execute_auto(); |
|
auto end = steady_clock::now(); |
|
|
|
double time = duration<double, milli>(end - start).count(); |
|
total_time += time; |
|
|
|
if (config.verify && iter == 0) { |
|
valid = fw.verify_solution(); |
|
} |
|
} |
|
|
|
double avg_time = total_time / config.iterations; |
|
|
|
#if HAS_CPP23 |
|
std::print("Size {:4d}: {:7.2f} ms {}\n", |
|
size, avg_time, valid ? "✓" : "✗"); |
|
#else |
|
cout << format("Size {:4d}: {:7.2f} ms {}\n", |
|
size, avg_time, valid ? "✓" : "✗"); |
|
#endif |
|
} |
|
}; |
|
|
|
// ============================================================================ |
|
// Main Function |
|
// ============================================================================ |
|
|
|
int main(int argc, char* argv[]) { |
|
try { |
|
ModernBenchmark::Config config; |
|
|
|
// Parse command line arguments |
|
for (int i = 1; i < argc; ++i) { |
|
string_view arg = argv[i]; |
|
|
|
if (arg == "--help" || arg == "-h") { |
|
#if HAS_STD_PRINT |
|
std::print("Usage: {} [options]\n", argv[0]); |
|
std::print("Options:\n"); |
|
std::print(" --sizes N1,N2,... Graph sizes to test\n"); |
|
std::print(" --densities D1,D2... Densities to test\n"); |
|
std::print(" --iterations N Iterations per test\n"); |
|
std::print(" --no-verify Skip verification\n"); |
|
std::print(" --save-profile Save profiling data\n"); |
|
std::print(" --load-profile FILE Load profiling data\n"); |
|
#else |
|
cout << format("Usage: {} [options]\n", argv[0]); |
|
cout << "Options:\n"; |
|
cout << " --sizes N1,N2,... Graph sizes to test\n"; |
|
cout << " --densities D1,D2... Densities to test\n"; |
|
cout << " --iterations N Iterations per test\n"; |
|
cout << " --no-verify Skip verification\n"; |
|
cout << " --save-profile Save profiling data\n"; |
|
cout << " --load-profile FILE Load profiling data\n"; |
|
#endif |
|
return 0; |
|
} |
|
else if (arg == "--iterations" && i + 1 < argc) { |
|
config.iterations = stoi(argv[++i]); |
|
} |
|
else if (arg == "--no-verify") { |
|
config.verify = false; |
|
} |
|
else if (arg == "--save-profile") { |
|
config.save_profile = true; |
|
} |
|
else if (arg == "--load-profile" && i + 1 < argc) { |
|
g_profiler.load_profile(argv[++i]); |
|
} |
|
} |
|
|
|
// Example: Test specific C++23 features |
|
#if HAS_STD_PRINT |
|
std::print("\nTesting C++23 Features:\n"); |
|
std::print("{:-^40}\n", ""); |
|
|
|
ModernFloydWarshall<SafeInt<int>> fw(10); |
|
fw.generate_graph(0.5); |
|
|
|
// Test expected-based execution |
|
auto result = fw.execute_safe(); |
|
if (result) { |
|
std::print("Execution successful: {}\n", *result); |
|
|
|
// Test path finding with expected |
|
auto path = fw.get_path(0, 5); |
|
if (path) { |
|
std::print("Path found: "); |
|
for (size_t node : *path) { |
|
std::print("{} ", node); |
|
} |
|
std::print("\n"); |
|
} else { |
|
std::print("Path error: {}\n", path.error()); |
|
} |
|
} else { |
|
std::print("Execution failed: {}\n", result.error()); |
|
} |
|
#endif |
|
|
|
// Run main benchmark |
|
ModernBenchmark benchmark; |
|
benchmark.run(config); |
|
|
|
return 0; |
|
|
|
} catch (const exception& e) { |
|
#if HAS_STD_PRINT |
|
std::print(stderr, "Error: {}\n", e.what()); |
|
#else |
|
cerr << format("Error: {}\n", e.what()); |
|
#endif |
|
return 1; |
|
} |
|
} |