#ifdef _WIN32 #include #endif #include #include #include /** * Allocator for aligned data. * * Modified from the Mallocator from Stephan T. Lavavej. * */ template class aligned_allocator { public: // The following will be the same for virtually all allocators. typedef T * pointer; typedef const T * const_pointer; typedef T& reference; typedef const T& const_reference; typedef T value_type; typedef std::size_t size_type; typedef ptrdiff_t difference_type; T * address(T& r) const { return &r; } const T * address(const T& s) const { return &s; } std::size_t max_size() const { // The following has been carefully written to be independent of // the definition of size_t and to avoid signed/unsigned warnings. return (static_cast(0) - static_cast(1)) / sizeof(T); } // The following must be the same for all allocators. template struct rebind { typedef aligned_allocator other; } ; bool operator!=(const aligned_allocator& other) const { return !(*this == other); } void construct(T * const p, const T& t) const { void * const pv = static_cast(p); new (pv) T(t); } void destroy(T * const p) const { p->~T(); } // Returns true if and only if storage allocated from *this // can be deallocated from other, and vice versa. // Always returns true for stateless allocators. bool operator==(const aligned_allocator& other) const { return true; } // Default constructor, copy constructor, rebinding constructor, and destructor. // Empty for stateless allocators. aligned_allocator() { } aligned_allocator(const aligned_allocator&) { } template aligned_allocator(const aligned_allocator&) { } ~aligned_allocator() { } // The following will be different for each allocator. T * allocate(const std::size_t n) const { // The return value of allocate(0) is unspecified. // Mallocator returns NULL in order to avoid depending // on malloc(0)'s implementation-defined behavior // (the implementation can define malloc(0) to return NULL, // in which case the bad_alloc check below would fire). // All allocators can return NULL in this case. if (n == 0) { return NULL; } // All allocators should contain an integer overflow check. // The Standardization Committee recommends that std::length_error // be thrown in the case of integer overflow. if (n > max_size()) { throw std::length_error("aligned_allocator::allocate() - Integer overflow."); } // Mallocator wraps malloc(). void * const pv = _mm_malloc(n * sizeof(T), Alignment); // Allocators should throw std::bad_alloc in the case of memory allocation failure. if (pv == NULL) { throw std::bad_alloc(); } return static_cast(pv); } void deallocate(T * const p, const std::size_t n) const { _mm_free(p); } // The following will be the same for all allocators that ignore hints. template T * allocate(const std::size_t n, const U * /* const hint */) const { return allocate(n); } // Allocators are not required to be assignable, so // all allocators should have a private unimplemented // assignment operator. Note that this will trigger the // off-by-default (enabled under /Wall) warning C4626 // "assignment operator could not be generated because a // base class assignment operator is inaccessible" within // the STL headers, but that warning is useless. private: aligned_allocator& operator=(const aligned_allocator&); }; int main() { typedef std::vector<__m128, aligned_allocator<__m128, sizeof(__m128)> > aligned_vector; aligned_vector lhs; aligned_vector rhs; float a = 1.0f; float b = 2.0f; float c = 3.0f; float d = 4.0f; float e = 5.0f; float f = 6.0f; float g = 7.0f; float h = 8.0f; for (std::size_t i = 0; i < 1000; ++i) { lhs.push_back(_mm_set_ps(a, b, c, d)); rhs.push_back(_mm_set_ps(e, f, g, h)); a += 1.0f; b += 1.0f; c += 1.0f; d += 1.0f; e += 1.0f; f += 1.0f; g += 1.0f; h += 1.0f; } __m128 mul = _mm_mul_ps(lhs[10], rhs[10]); }