int main () { struct Cascade *cascade; int barrier_view[wsz]; int barrier_stream __attribute__((stream)); #omp task cache (W:owm_pool[:SZ]) { cascade = _malloc (sizeof (struct Cascade)); foo (barrier_stream, cascade, ...); #omp task cache (R:owm_pool[:SZ]) \ input (barrier_stream >> barrier_view[scaleIndexMax] { ........ } // we need a taskwait here... taskwait doesn't work with COTSon // and this task wait couldn't be replaced by stream... #pragma omp taskwait } } void foo (int barrier_stream __attribute__((stream)), matrix, cascade,...) { matrix[:] = ...; for (i = 0; i < scaleIndexMax; i++) { #pragma task omp output (barrier_stream) cache (R:matrix[:SZ]) { //task } } //other computations... }