libMesh
parallel_implementation.h
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2017 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 #ifndef LIBMESH_PARALLEL_IMPLEMENTATION_H
20 #define LIBMESH_PARALLEL_IMPLEMENTATION_H
21 
22 // Local includes
23 #include "parallel.h"
24 #include "libmesh_logging.h"
25 
26 // C++ includes
27 #include <iterator> // iterator_traits
28 
29 namespace libMesh {
30 namespace Parallel {
31 
32 // First declare StandardType specializations so we can use them in anonymous
33 // helper functions later
34 
35 #ifdef LIBMESH_HAVE_MPI
36 
37 #define LIBMESH_STANDARD_TYPE(cxxtype,mpitype) \
38  template<> \
39  class StandardType<cxxtype> : public DataType \
40  { \
41  public: \
42  explicit \
43  StandardType(const cxxtype * = libmesh_nullptr) : DataType(mpitype) {} \
44  }
45 
46 #define LIBMESH_PARALLEL_INTEGER_OPS(cxxtype) \
47  template<> \
48  class OpFunction<cxxtype> \
49  { \
50  public: \
51  static MPI_Op max() { return MPI_MAX; } \
52  static MPI_Op min() { return MPI_MIN; } \
53  static MPI_Op sum() { return MPI_SUM; } \
54  static MPI_Op product() { return MPI_PROD; } \
55  static MPI_Op logical_and() { return MPI_LAND; } \
56  static MPI_Op bitwise_and() { return MPI_BAND; } \
57  static MPI_Op logical_or() { return MPI_LOR; } \
58  static MPI_Op bitwise_or() { return MPI_BOR; } \
59  static MPI_Op logical_xor() { return MPI_LXOR; } \
60  static MPI_Op bitwise_xor() { return MPI_BXOR; } \
61  static MPI_Op max_location() { return MPI_MAXLOC; } \
62  static MPI_Op min_location() { return MPI_MINLOC; } \
63  }
64 
65 #define LIBMESH_PARALLEL_FLOAT_OPS(cxxtype) \
66  template<> \
67  class OpFunction<cxxtype> \
68  { \
69  public: \
70  static MPI_Op max() { return MPI_MAX; } \
71  static MPI_Op min() { return MPI_MIN; } \
72  static MPI_Op sum() { return MPI_SUM; } \
73  static MPI_Op product() { return MPI_PROD; } \
74  static MPI_Op max_location() { return MPI_MAXLOC; } \
75  static MPI_Op min_location() { return MPI_MINLOC; } \
76  }
77 
78 #else
79 
80 #define LIBMESH_STANDARD_TYPE(cxxtype,mpitype) \
81  template<> \
82  class StandardType<cxxtype> : public DataType \
83  { \
84  public: \
85  explicit \
86  StandardType(const cxxtype * = libmesh_nullptr) : DataType() {} \
87  }
88 
89 #define LIBMESH_PARALLEL_INTEGER_OPS(cxxtype) \
90  template<> \
91  class OpFunction<cxxtype> \
92  { \
93  }
94 
95 #define LIBMESH_PARALLEL_FLOAT_OPS(cxxtype) \
96  template<> \
97  class OpFunction<cxxtype> \
98  { \
99  }
100 
101 #endif
102 
103 #define LIBMESH_INT_TYPE(cxxtype,mpitype) \
104  LIBMESH_STANDARD_TYPE(cxxtype,mpitype); \
105  LIBMESH_PARALLEL_INTEGER_OPS(cxxtype); \
106  \
107  template<> \
108  struct Attributes<cxxtype> \
109  { \
110  static const bool has_min_max = true; \
111  static void set_lowest(cxxtype & x) { x = std::numeric_limits<cxxtype>::min(); } \
112  static void set_highest(cxxtype & x) { x = std::numeric_limits<cxxtype>::max(); } \
113  }
114 
115 #define LIBMESH_FLOAT_TYPE(cxxtype,mpitype) \
116  LIBMESH_STANDARD_TYPE(cxxtype,mpitype); \
117  LIBMESH_PARALLEL_FLOAT_OPS(cxxtype); \
118  \
119  template<> \
120  struct Attributes<cxxtype> \
121  { \
122  static const bool has_min_max = true; \
123  static void set_lowest(cxxtype & x) { x = -std::numeric_limits<cxxtype>::infinity(); } \
124  static void set_highest(cxxtype & x) { x = std::numeric_limits<cxxtype>::infinity(); } \
125  }
126 
127 #define LIBMESH_CONTAINER_TYPE(cxxtype) \
128  template<typename T> \
129  struct Attributes<cxxtype<T>> \
130  { \
131  static const bool has_min_max = Attributes<T>::has_min_max; \
132  static void set_lowest(cxxtype<T> & x) { \
133  for (typename cxxtype<T>::iterator i = x.begin(); i != x.end(); ++i) \
134  Attributes<T>::set_lowest(*i); } \
135  static void set_highest(cxxtype<T> & x) { \
136  for (typename cxxtype<T>::iterator i = x.begin(); i != x.end(); ++i) \
137  Attributes<T>::set_highest(*i); } \
138  }
139 
140 
141 LIBMESH_INT_TYPE(char,MPI_CHAR);
142 #if MPI_VERSION > 1
143 LIBMESH_INT_TYPE(signed char,MPI_SIGNED_CHAR);
144 #endif
145 LIBMESH_INT_TYPE(unsigned char,MPI_UNSIGNED_CHAR);
146 LIBMESH_INT_TYPE(short int,MPI_SHORT);
147 LIBMESH_INT_TYPE(unsigned short int,MPI_UNSIGNED_SHORT);
148 LIBMESH_INT_TYPE(int,MPI_INT);
149 LIBMESH_INT_TYPE(unsigned int,MPI_UNSIGNED);
150 LIBMESH_INT_TYPE(long,MPI_LONG);
151 LIBMESH_INT_TYPE(long long,MPI_LONG_LONG_INT);
152 LIBMESH_INT_TYPE(unsigned long,MPI_UNSIGNED_LONG);
153 #if MPI_VERSION > 1 || !defined(LIBMESH_HAVE_MPI)
154 LIBMESH_INT_TYPE(unsigned long long,MPI_UNSIGNED_LONG_LONG);
155 #else
156 // MPI 1.0 did not have an unsigned long long type, so we have to use
157 // MPI_UNSIGNED_LONG in this case. If "unsigned long" and "unsigned
158 // long long" are different sizes on your system, we detect this and
159 // throw an error in dbg mode rather than communicating values
160 // incorrectly.
161 template<>
162 class StandardType<unsigned long long> : public DataType
163 {
164 public:
165  explicit
166  StandardType(const cxxtype * = libmesh_nullptr) :
167  DataType(MPI_UNSIGNED_LONG)
168  {
169  libmesh_assert_equal_to(sizeof(unsigned long long),
170  sizeof(unsigned long));
171  }
172 };
173 
174 LIBMESH_PARALLEL_INTEGER_OPS(unsigned long long); \
175 
176 template<>
177 struct Attributes<unsigned long long>
178 {
179  static const bool has_min_max = true;
180  static void set_lowest(unsigned long long & x) { x = std::numeric_limits<unsigned long long>::min(); }
181  static void set_highest(unsigned long long & x) { x = std::numeric_limits<unsigned long long>::max(); }
182 };
183 #endif
184 
185 LIBMESH_FLOAT_TYPE(float,MPI_FLOAT);
186 LIBMESH_FLOAT_TYPE(double,MPI_DOUBLE);
187 LIBMESH_FLOAT_TYPE(long double,MPI_LONG_DOUBLE);
188 LIBMESH_CONTAINER_TYPE(std::set);
189 LIBMESH_CONTAINER_TYPE(std::vector);
190 
191 template<typename T1, typename T2>
192 class StandardType<std::pair<T1, T2>> : public DataType
193 {
194 public:
195  explicit
196  StandardType(const std::pair<T1, T2> * example = libmesh_nullptr) {
197  // We need an example for MPI_Address to use
198  static const std::pair<T1, T2> p;
199  if (!example)
200  example = &p;
201 
202  // _static_type never gets freed, but it only gets committed once
203  // per T, so it's not a *huge* memory leak...
204  static data_type _static_type;
205  static bool _is_initialized = false;
206  if (!_is_initialized)
207  {
208 #ifdef LIBMESH_HAVE_MPI
209 
210  // Get the sub-data-types, and make sure they live long enough
211  // to construct the derived type
212  StandardType<T1> d1(&example->first);
213  StandardType<T2> d2(&example->second);
214 
215 #if MPI_VERSION == 1
216 
217  // Use MPI_LB and MPI_UB here to workaround potential bugs from
218  // nested MPI_LB and MPI_UB in the specifications of d1 and/or d2:
219  // https://github.com/libMesh/libmesh/issues/631
220  MPI_Datatype types[] = { MPI_LB, (data_type)d1, (data_type)d2, MPI_UB };
221  int blocklengths[] = {1,1,1,1};
222  MPI_Aint displs[4];
223 
224  libmesh_call_mpi
225  (MPI_Address (const_cast<std::pair<T1,T2> *>(example),
226  &displs[0]));
227  libmesh_call_mpi
228  (MPI_Address (const_cast<T1*>(&example->first),
229  &displs[1]));
230  libmesh_call_mpi
231  (MPI_Address (const_cast<T2*>(&example->second),
232  &displs[2]));
233  libmesh_call_mpi
234  (MPI_Address (const_cast<std::pair<T1,T2> *>(example+1),
235  &displs[3]));
236 
237  displs[1] -= displs[0];
238  displs[2] -= displs[0];
239  displs[3] -= displs[0];
240  displs[0] = 0;
241 
242  libmesh_call_mpi
243  (MPI_Type_struct (4, blocklengths, displs, types,
244  &_static_type));
245 #else
246  MPI_Datatype types[] = { (data_type)d1, (data_type)d2 };
247  int blocklengths[] = {1,1};
248  MPI_Aint displs[2], start;
249 
250  libmesh_call_mpi
251  (MPI_Get_address (const_cast<std::pair<T1,T2> *>(example),
252  &start));
253  libmesh_call_mpi
254  (MPI_Get_address (const_cast<T1*>(&example->first),
255  &displs[0]));
256  libmesh_call_mpi
257  (MPI_Get_address (const_cast<T2*>(&example->second),
258  &displs[1]));
259  displs[0] -= start;
260  displs[1] -= start;
261 
262  // create a prototype structure
263  MPI_Datatype tmptype;
264  libmesh_call_mpi
265  (MPI_Type_create_struct (2, blocklengths, displs, types,
266  &tmptype));
267  libmesh_call_mpi
268  (MPI_Type_commit (&tmptype));
269 
270  // resize the structure type to account for padding, if any
271  libmesh_call_mpi
272  (MPI_Type_create_resized (tmptype, 0,
273  sizeof(std::pair<T1,T2>),
274  &_static_type));
275 #endif
276 
277  libmesh_call_mpi
278  (MPI_Type_commit (&_static_type));
279 #endif // LIBMESH_HAVE_MPI
280 
281  _is_initialized = true;
282  }
283 
284  _datatype = _static_type;
285  }
286 
287  // Make sure not to free our singleton
289 };
290 
291 template<typename T>
292 class StandardType<std::complex<T>> : public DataType
293 {
294 public:
295  explicit
296  StandardType(const std::complex<T> * /*example*/ = libmesh_nullptr) :
298 
299  ~StandardType() { this->free(); }
300 };
301 
302 } // namespace Parallel
303 
304 } // namespace libMesh
305 
306 
307 // Anonymous namespace for helper functions
308 namespace {
309 
310 // Internal helper function to create vector<something_usable> from
311 // vector<bool> for compatibility with MPI bitwise operations
312 template <typename T>
313 inline void pack_vector_bool(const std::vector<bool> & vec_in,
314  std::vector<T> & vec_out)
315 {
316  unsigned int data_bits = 8*sizeof(T);
317  std::size_t in_size = vec_in.size();
318  std::size_t out_size = in_size/data_bits + ((in_size%data_bits)?1:0);
319  vec_out.clear();
320  vec_out.resize(out_size);
321  for (std::size_t i=0; i != in_size; ++i)
322  {
323  std::size_t index = i/data_bits;
324  std::size_t offset = i%data_bits;
325  vec_out[index] += (vec_in[i]?1:0) << offset;
326  }
327 }
328 
329 // Internal helper function to create vector<bool> from
330 // vector<something usable> for compatibility with MPI byte
331 // operations
332 template <typename T>
333 inline void unpack_vector_bool(const std::vector<T> & vec_in,
334  std::vector<bool> & vec_out)
335 {
336  unsigned int data_bits = 8*sizeof(T);
337  // We need the output vector to already be properly sized
338  std::size_t out_size = vec_out.size();
339  libmesh_assert_equal_to
340  (out_size/data_bits + (out_size%data_bits?1:0), vec_in.size());
341 
342  for (std::size_t i=0; i != out_size; ++i)
343  {
344  std::size_t index = i/data_bits;
345  std::size_t offset = i%data_bits;
346  vec_out[i] = vec_in[index] << (data_bits-1-offset) >> (data_bits-1);
347  }
348 }
349 
350 
351 #ifdef LIBMESH_HAVE_MPI
352 // We use a helper function here to avoid ambiguity when calling
353 // send_receive of (vector<vector<T>>,vector<vector<T>>)
354 template <typename T1, typename T2>
355 inline void send_receive_vec_of_vec(const unsigned int dest_processor_id,
356  const std::vector<std::vector<T1>> & send,
357  const unsigned int source_processor_id,
358  std::vector<std::vector<T2>> & recv,
359  const libMesh::Parallel::MessageTag & send_tag,
360  const libMesh::Parallel::MessageTag & recv_tag,
361  const libMesh::Parallel::Communicator & comm)
362 {
363  LOG_SCOPE("send_receive()", "Parallel");
364 
365  if (dest_processor_id == comm.rank() &&
366  source_processor_id == comm.rank())
367  {
368  recv = send;
369  return;
370  }
371 
372  // temporary buffers - these will be sized in bytes
373  // and manipulated with MPI_Pack and friends
374  std::vector<char> sendbuf, recvbuf;
375 
376  // figure out how many bytes we need to pack all the data
377  int packedsize=0, sendsize=0;
378 
379  // The outer buffer size
380  libmesh_call_mpi
381  (MPI_Pack_size (1,
383  comm.get(),
384  &packedsize));
385 
386  sendsize += packedsize;
387 
388  for (std::size_t i=0; i<send.size(); i++)
389  {
390  // The size of the ith inner buffer
391  libmesh_call_mpi
392  (MPI_Pack_size (1,
394  comm.get(),
395  &packedsize));
396 
397  sendsize += packedsize;
398 
399  // The data for each inner buffer
400  libmesh_call_mpi
401  (MPI_Pack_size (libMesh::cast_int<int>(send[i].size()),
403  (send[i].empty() ? libmesh_nullptr : &send[i][0]),
404  comm.get(),
405  &packedsize));
406 
407  sendsize += packedsize;
408  }
409 
410  libmesh_assert (sendsize /* should at least be 1! */);
411  sendbuf.resize (sendsize);
412 
413  // Pack the send buffer
414  int pos=0;
415 
416  // ... the size of the outer buffer
417  sendsize = libMesh::cast_int<int>(send.size());
418 
419  libmesh_call_mpi
420  (MPI_Pack (&sendsize, 1,
422  &sendbuf[0], libMesh::cast_int<int>(sendbuf.size()),
423  &pos, comm.get()));
424 
425  for (std::size_t i=0; i<send.size(); i++)
426  {
427  // ... the size of the ith inner buffer
428  sendsize = libMesh::cast_int<int>(send[i].size());
429 
430  libmesh_call_mpi
431  (MPI_Pack (&sendsize, 1, libMesh::Parallel::StandardType<unsigned int>(),
432  &sendbuf[0], libMesh::cast_int<int>(sendbuf.size()), &pos,
433  comm.get()));
434 
435  // ... the contents of the ith inner buffer
436  if (!send[i].empty())
437  libmesh_call_mpi
438  (MPI_Pack (const_cast<T1*>(&send[i][0]),
439  libMesh::cast_int<int>(send[i].size()),
441  &sendbuf[0],
442  libMesh::cast_int<int>(sendbuf.size()), &pos,
443  comm.get()));
444  }
445 
446  libmesh_assert_equal_to (static_cast<unsigned int>(pos), sendbuf.size());
447 
449 
450  comm.send (dest_processor_id, sendbuf, MPI_PACKED, request, send_tag);
451 
452  comm.receive (source_processor_id, recvbuf, MPI_PACKED, recv_tag);
453 
454  // Unpack the received buffer
455  libmesh_assert (!recvbuf.empty());
456  pos=0;
457  libmesh_call_mpi
458  (MPI_Unpack (&recvbuf[0], libMesh::cast_int<int>(recvbuf.size()), &pos,
460  comm.get()));
461 
462  // ... size the outer buffer
463  recv.resize (sendsize);
464 
465  for (std::size_t i=0; i<recv.size(); i++)
466  {
467  libmesh_call_mpi
468  (MPI_Unpack (&recvbuf[0],
469  libMesh::cast_int<int>(recvbuf.size()), &pos,
470  &sendsize, 1,
472  comm.get()));
473 
474  // ... size the inner buffer
475  recv[i].resize (sendsize);
476 
477  // ... unpack the inner buffer if it is not empty
478  if (!recv[i].empty())
479  libmesh_call_mpi
480  (MPI_Unpack (&recvbuf[0],
481  libMesh::cast_int<int>(recvbuf.size()), &pos,
482  &recv[i][0],
483  libMesh::cast_int<int>(recv[i].size()),
485  comm.get()));
486  }
487 
488  request.wait();
489 }
490 
491 #endif // LIBMESH_HAVE_MPI
492 
493 } // Anonymous namespace
494 
495 
496 
497 namespace libMesh
498 {
499 
500 namespace Parallel
501 {
502 
503 /*
504  * A reference to the default libMesh communicator. This is now
505  * deprecated - instead of libMesh::Parallel::Communicator_World use
506  * libMesh::CommWorld
507  */
508 #ifdef LIBMESH_DISABLE_COMMWORLD
510 #else
512 #endif
513 
514 
518 template <typename Context, typename Iter>
519 inline std::size_t packed_range_size (const Context * context,
520  Iter range_begin,
521  const Iter range_end)
522 {
523  typedef typename std::iterator_traits<Iter>::value_type T;
524 
525  std::size_t buffer_size = 0;
526  for (Iter range_count = range_begin;
527  range_count != range_end;
528  ++range_count)
529  {
530  buffer_size += Parallel::Packing<T>::packable_size(*range_count, context);
531  }
532  return buffer_size;
533 }
534 
535 
539 template <typename Context, typename buffertype, typename Iter>
540 inline Iter pack_range (const Context * context,
541  Iter range_begin,
542  const Iter range_end,
543  std::vector<buffertype> & buffer,
544  // When we serialize into buffers, we need to use large buffers to optimize MPI
545  // bandwidth, but not so large as to risk allocation failures. max_buffer_size
546  // is measured in number of buffer type entries; number of bytes may be 4 or 8
547  // times larger depending on configuration.
548  std::size_t approx_buffer_size)
549 {
550  typedef typename std::iterator_traits<Iter>::value_type T;
551 
552  // Count the total size of and preallocate buffer for efficiency.
553  // Prepare to stop early if the buffer would be too large.
554  std::size_t buffer_size = 0;
555  Iter range_stop = range_begin;
556  for (; range_stop != range_end && buffer_size < approx_buffer_size;
557  ++range_stop)
558  {
559  std::size_t next_buffer_size =
560  Parallel::Packing<T>::packable_size(*range_stop, context);
561  buffer_size += next_buffer_size;
562  }
563  buffer.reserve(buffer.size() + buffer_size);
564 
565  // Pack the objects into the buffer
566  for (; range_begin != range_stop; ++range_begin)
567  {
568 #ifndef NDEBUG
569  std::size_t old_size = buffer.size();
570 #endif
571 
573  (*range_begin, back_inserter(buffer), context);
574 
575 #ifndef NDEBUG
576  unsigned int my_packable_size =
577  Parallel::Packing<T>::packable_size(*range_begin, context);
578  unsigned int my_packed_size =
579  Parallel::Packing<T>::packed_size (buffer.begin() + old_size);
580  libmesh_assert_equal_to (my_packable_size, my_packed_size);
581  libmesh_assert_equal_to (buffer.size(), old_size + my_packable_size);
582 #endif
583  }
584 
585  return range_stop;
586 }
587 
588 
589 
593 template <typename Context, typename buffertype,
594  typename OutputIter, typename T>
595 inline void unpack_range (const std::vector<buffertype> & buffer,
596  Context * context,
597  OutputIter out_iter,
598  const T * /* output_type */)
599 {
600  // Loop through the buffer and unpack each object, returning the
601  // object pointer via the output iterator
602  typename std::vector<buffertype>::const_iterator
603  next_object_start = buffer.begin();
604 
605  while (next_object_start < buffer.end())
606  {
607  *out_iter++ = Parallel::Packing<T>::unpack(next_object_start, context);
608  next_object_start +=
609  Parallel::Packing<T>::packed_size(next_object_start);
610  }
611 
612  // We should have used up the exact amount of data in the buffer
613  libmesh_assert (next_object_start == buffer.end());
614 }
615 
616 
618 #ifdef LIBMESH_HAVE_MPI
619  _communicator(MPI_COMM_SELF),
620 #endif
621  _rank(0),
622  _size(1),
623  _send_mode(DEFAULT),
624  used_tag_values(),
625  _I_duped_it(false) {}
626 
628 #ifdef LIBMESH_HAVE_MPI
629  _communicator(MPI_COMM_SELF),
630 #endif
631  _rank(0),
632  _size(1),
634  used_tag_values(),
635  _I_duped_it(false)
636 {
637  this->assign(comm);
638 }
639 
641 {
642  this->clear();
643 }
644 
645 #ifdef LIBMESH_HAVE_MPI
646 inline void Communicator::split(int color, int key, Communicator & target) const
647 {
648  target.clear();
649  MPI_Comm newcomm;
650  libmesh_call_mpi
651  (MPI_Comm_split(this->get(), color, key, &newcomm));
652 
653  target.assign(newcomm);
654  target._I_duped_it = true;
655  target.send_mode(this->send_mode());
656 }
657 #else
658 inline void Communicator::split(int, int, Communicator & target) const
659 {
660  target.assign(this->get());
661 }
662 #endif
663 
664 inline void Communicator::duplicate(const Communicator & comm)
665 {
666  this->duplicate(comm._communicator);
667  this->send_mode(comm.send_mode());
668 }
669 
670 #ifdef LIBMESH_HAVE_MPI
671 inline void Communicator::duplicate(const communicator & comm)
672 {
673  if (_communicator != MPI_COMM_NULL)
674  {
675  libmesh_call_mpi
676  (MPI_Comm_dup(comm, &_communicator));
677 
678  _I_duped_it = true;
679  }
680  this->assign(_communicator);
681 }
682 #else
683 inline void Communicator::duplicate(const communicator &) { }
684 #endif
685 
686 inline void Communicator::clear() {
687 #ifdef LIBMESH_HAVE_MPI
688  if (_I_duped_it)
689  {
690  libmesh_assert (_communicator != MPI_COMM_NULL);
691  libmesh_call_mpi
692  (MPI_Comm_free(&_communicator));
693 
694  _communicator = MPI_COMM_NULL;
695  }
696  _I_duped_it = false;
697 #endif
698 }
699 
701 {
702  this->clear();
703  this->assign(comm);
704  return *this;
705 }
706 
707 // Disallowed copy constructor
709 #ifdef LIBMESH_HAVE_MPI
710  _communicator(MPI_COMM_NULL),
711 #endif
712  _rank(0),
713  _size(1),
715  used_tag_values(),
716  _I_duped_it(false)
717 {
718  libmesh_not_implemented();
719 }
720 
721 inline void Communicator::assign(const communicator & comm)
722 {
723  _communicator = comm;
724 #ifdef LIBMESH_HAVE_MPI
725  if (_communicator != MPI_COMM_NULL)
726  {
727  int i;
728  libmesh_call_mpi
729  (MPI_Comm_size(_communicator, &i));
730 
731  libmesh_assert_greater_equal (i, 0);
732  _size = static_cast<unsigned int>(i);
733 
734  libmesh_call_mpi
735  (MPI_Comm_rank(_communicator, &i));
736 
737  libmesh_assert_greater_equal (i, 0);
738  _rank = static_cast<unsigned int>(i);
739  }
740  else
741  {
742  _rank = 0;
743  _size = 1;
744  }
745 #endif
747 }
748 
749 
750 
751 inline Status::Status () :
752  _status(),
753  _datatype()
754 {}
755 
756 inline Status::Status (const data_type & type) :
757  _status(),
758  _datatype(type)
759 {}
760 
761 inline Status::Status (const status & stat) :
762  _status(stat),
763  _datatype()
764 {}
765 
766 inline Status::Status (const status & stat,
767  const data_type & type) :
768  _status(stat),
769  _datatype(type)
770 {}
771 
772 inline Status::Status (const Status & stat) :
773  _status(stat._status),
774  _datatype(stat._datatype)
775 {}
776 
777 inline Status::Status (const Status & stat,
778  const data_type & type) :
779  _status(stat._status),
780  _datatype(type)
781 {}
782 
783 inline int Status::source () const
784 {
785 #ifdef LIBMESH_HAVE_MPI
786  return _status.MPI_SOURCE;
787 #else
788  return 0;
789 #endif
790 }
791 
792 inline int Status::tag () const
793 {
794 #ifdef LIBMESH_HAVE_MPI
795  return _status.MPI_TAG;
796 #else
797  libmesh_not_implemented();
798  return 0;
799 #endif
800 }
801 
802 #ifdef LIBMESH_HAVE_MPI
803 inline unsigned int Status::size (const data_type & type) const
804 {
805  int msg_size;
806  libmesh_call_mpi
807  (MPI_Get_count (const_cast<MPI_Status*>(&_status), type,
808  &msg_size));
809 
810  libmesh_assert_greater_equal (msg_size, 0);
811  return msg_size;
812 }
813 #else
814 inline unsigned int Status::size (const data_type &) const
815 {
816  libmesh_not_implemented();
817  return 0;
818 }
819 #endif
820 
821 inline unsigned int Status::size () const
822 { return this->size (this->datatype()); }
823 
824 
825 
826 inline Request::Request () :
827 #ifdef LIBMESH_HAVE_MPI
828  _request(MPI_REQUEST_NULL),
829 #else
830  _request(),
831 #endif
832  post_wait_work(libmesh_nullptr)
833 {}
834 
835 inline Request::Request (const request & r) :
836  _request(r),
838 {}
839 
840 inline Request::Request (const Request & other) :
841  _request(other._request),
843 {
844  if (other._prior_request.get())
846  (new Request(*other._prior_request.get()));
847 
848  // operator= should behave like a shared pointer
849  if (post_wait_work)
850  post_wait_work->second++;
851 }
852 
853 inline void Request::cleanup()
854 {
855  if (post_wait_work)
856  {
857  // Decrement the use count
858  post_wait_work->second--;
859 
860  if (!post_wait_work->second)
861  {
862 #ifdef DEBUG
863  // If we're done using this request, then we'd better have
864  // done the work we waited for
865  for (std::vector<PostWaitWork *>::iterator i =
866  post_wait_work->first.begin();
867  i != post_wait_work->first.end(); ++i)
868  libmesh_assert(!(*i));
869 #endif
870  delete post_wait_work;
872  }
873  }
874 }
875 
876 inline Request & Request::operator = (const Request & other)
877 {
878  this->cleanup();
879  _request = other._request;
881 
882  if (other._prior_request.get())
884  (new Request(*other._prior_request.get()));
885 
886  // operator= should behave like a shared pointer
887  if (post_wait_work)
888  post_wait_work->second++;
889 
890  return *this;
891 }
892 
894 {
895  this->cleanup();
896  _request = r;
898  return *this;
899 }
900 
901 inline Request::~Request () {
902  this->cleanup();
903 }
904 
906 {
907  LOG_SCOPE("wait()", "Parallel::Request");
908 
909  if (_prior_request.get())
910  _prior_request->wait();
911 
912  Status stat;
913 #ifdef LIBMESH_HAVE_MPI
914  libmesh_call_mpi
915  (MPI_Wait (&_request, stat.get()));
916 #endif
917  if (post_wait_work)
918  for (std::vector<PostWaitWork *>::iterator i =
919  post_wait_work->first.begin();
920  i != post_wait_work->first.end(); ++i)
921  {
922  // The user should never try to give us NULL work or try
923  // to wait() twice.
924  libmesh_assert (*i);
925  (*i)->run();
926  delete (*i);
927  *i = libmesh_nullptr;
928  }
929 
930  return stat;
931 }
932 
933 inline bool Request::test ()
934 {
935 #ifdef LIBMESH_HAVE_MPI
936  int val=0;
937 
938  // MPI_STATUS_IGNORE is from MPI-2; using it with some versions of
939  // MPICH may cause a crash:
940  // https://bugzilla.mcs.anl.gov/globus/show_bug.cgi?id=1798
941 #if MPI_VERSION > 1
942  libmesh_call_mpi
943  (MPI_Test (&_request, &val, MPI_STATUS_IGNORE));
944 #else
945  MPI_Status stat;
946  libmesh_call_mpi
947  (MPI_Test (&_request, &val, &stat));
948 #endif
949 
950  if (val)
951  {
952  libmesh_assert (_request == MPI_REQUEST_NULL);
953  libmesh_assert_equal_to (val, 1);
954  }
955 
956  return val;
957 #else
958  return true;
959 #endif
960 }
961 
962 #ifdef LIBMESH_HAVE_MPI
963 inline bool Request::test (status & stat)
964 {
965  int val=0;
966 
967  libmesh_call_mpi
968  (MPI_Test (&_request, &val, &stat));
969 
970  return val;
971 }
972 #else
973 inline bool Request::test (status &)
974 {
975  return true;
976 }
977 #endif
978 
979 inline void Request::add_prior_request(const Request & req)
980 {
981  // We're making a chain of prior requests, not a tree
982  libmesh_assert(!req._prior_request.get());
983 
984  Request * new_prior_req = new Request(req);
985 
986  // new_prior_req takes ownership of our existing _prior_request
987  new_prior_req->_prior_request.reset(this->_prior_request.release());
988 
989  // Our _prior_request now manages the new resource we just set up
990  this->_prior_request.reset(new_prior_req);
991 }
992 
994 {
995  if (!post_wait_work)
996  post_wait_work = new
997  std::pair<std::vector <PostWaitWork * >, unsigned int>
998  (std::vector <PostWaitWork * >(), 1);
999  post_wait_work->first.push_back(work);
1000 }
1001 
1002 
1003 
1007 #ifdef LIBMESH_HAVE_MPI
1008 inline void Communicator::barrier () const
1009 {
1010  if (this->size() > 1)
1011  {
1012  LOG_SCOPE("barrier()", "Parallel");
1013  libmesh_call_mpi(MPI_Barrier (this->get()));
1014  }
1015 }
1016 #else
1017 inline void Communicator::barrier () const {}
1018 #endif
1019 
1020 
1021 // legacy e.g. Parallel::send() methods, requires
1022 // Communicator_World
1023 #ifndef LIBMESH_DISABLE_COMMWORLD
1024 inline void barrier (const Communicator & comm = Communicator_World)
1025 {
1026  comm.barrier();
1027 }
1028 
1029 template <typename T>
1030 inline bool verify(const T & r,
1031  const Communicator & comm = Communicator_World)
1032 { return comm.verify(r); }
1033 
1034 template <typename T>
1035 inline void min(T & r,
1036  const Communicator & comm = Communicator_World)
1037 { comm.min(r); }
1038 
1039 template <typename T, typename U>
1040 inline void minloc(T & r,
1041  U & min_id,
1042  const Communicator & comm = Communicator_World)
1043 { comm.minloc(r, min_id); }
1044 
1045 template <typename T>
1046 inline void max(T & r,
1047  const Communicator & comm = Communicator_World)
1048 { comm.max(r); }
1049 
1050 template <typename T, typename U>
1051 inline void maxloc(T & r,
1052  U & max_id,
1053  const Communicator & comm = Communicator_World)
1054 { comm.maxloc(r, max_id); }
1055 
1056 template <typename T>
1057 inline void sum(T & r,
1058  const Communicator & comm = Communicator_World)
1059 { comm.sum(r); }
1060 
1061 template <typename T>
1062 inline void set_union(T & data, const unsigned int root_id,
1063  const Communicator & comm = Communicator_World)
1064 { comm.set_union(data, root_id); }
1065 
1066 template <typename T>
1067 inline void set_union(T & data,
1068  const Communicator & comm = Communicator_World)
1069 { comm.set_union(data); }
1070 
1071 inline status probe (const unsigned int src_processor_id,
1072  const MessageTag & tag=any_tag,
1073  const Communicator & comm = Communicator_World)
1074 { return comm.probe(src_processor_id, tag); }
1075 
1076 template <typename T>
1077 inline void send (const unsigned int dest_processor_id,
1078  const T & data,
1079  const MessageTag & tag=no_tag,
1080  const Communicator & comm = Communicator_World)
1081 { comm.send(dest_processor_id, data, tag); }
1082 
1083 template <typename T>
1084 inline void send (const unsigned int dest_processor_id,
1085  const T & data,
1086  Request & req,
1087  const MessageTag & tag=no_tag,
1088  const Communicator & comm = Communicator_World)
1089 { comm.send(dest_processor_id, data, req, tag); }
1090 
1091 template <typename T>
1092 inline void send (const unsigned int dest_processor_id,
1093  const T & data,
1094  const DataType & type,
1095  const MessageTag & tag=no_tag,
1096  const Communicator & comm = Communicator_World)
1097 { comm.send(dest_processor_id, data, type, tag); }
1098 
1099 template <typename T>
1100 inline void send (const unsigned int dest_processor_id,
1101  const T & data,
1102  const DataType & type,
1103  Request & req,
1104  const MessageTag & tag=no_tag,
1105  const Communicator & comm = Communicator_World)
1106 { comm.send(dest_processor_id, data, type, req, tag); }
1107 
1108 
1109 template <typename Context, typename Iter>
1110 inline void send_packed_range (const unsigned int dest_processor_id,
1111  const Context * context,
1112  Iter range_begin,
1113  const Iter range_end,
1114  const MessageTag & tag=no_tag,
1115  const Communicator & comm = Communicator_World)
1116 { comm.send_packed_range(dest_processor_id, context, range_begin, range_end, tag); }
1117 
1118 
1119 template <typename Context, typename Iter>
1120 inline void send_packed_range (const unsigned int dest_processor_id,
1121  const Context * context,
1122  Iter range_begin,
1123  const Iter range_end,
1124  Request & req,
1125  const MessageTag & tag=no_tag,
1126  const Communicator & comm = Communicator_World)
1127 { comm.send_packed_range(dest_processor_id, context, range_begin, range_end, req, tag); }
1128 
1129 
1130 template <typename T>
1131 inline void nonblocking_send (const unsigned int dest_processor_id,
1132  T & buf,
1133  const DataType & type,
1134  Request & r,
1135  const MessageTag & tag=no_tag,
1136  const Communicator & comm = Communicator_World)
1137 { comm.send (dest_processor_id, buf, type, r, tag); }
1138 
1139 template <typename T>
1140 inline void nonblocking_send (const unsigned int dest_processor_id,
1141  T & buf,
1142  Request & r,
1143  const MessageTag & tag=no_tag,
1144  const Communicator & comm = Communicator_World)
1145 { comm.send (dest_processor_id, buf, r, tag); }
1146 
1147 template <typename T>
1148 inline Status receive (const unsigned int src_processor_id,
1149  T & buf,
1150  const MessageTag & tag=any_tag,
1151  const Communicator & comm = Communicator_World)
1152 { return comm.receive (src_processor_id, buf, tag); }
1153 
1154 template <typename T>
1155 inline void receive (const unsigned int src_processor_id,
1156  T & buf,
1157  Request & req,
1158  const MessageTag & tag=any_tag,
1159  const Communicator & comm = Communicator_World)
1160 { comm.receive (src_processor_id, buf, req, tag); }
1161 
1162 template <typename T>
1163 inline Status receive (const unsigned int src_processor_id,
1164  T & buf,
1165  const DataType & type,
1166  const MessageTag & tag=any_tag,
1167  const Communicator & comm = Communicator_World)
1168 { return comm.receive (src_processor_id, buf, type, tag); }
1169 
1170 template <typename T>
1171 inline void receive (const unsigned int src_processor_id,
1172  T & buf,
1173  const DataType & type,
1174  Request & req,
1175  const MessageTag & tag=any_tag,
1176  const Communicator & comm = Communicator_World)
1177 { comm.receive (src_processor_id, buf, type, req, tag); }
1178 
1179 template <typename Context, typename OutputIter, typename T>
1180 inline void receive_packed_range (const unsigned int src_processor_id,
1181  Context * context,
1182  OutputIter out_iter,
1183  const T * output_type,
1184  const MessageTag & tag=any_tag,
1185  const Communicator & comm = Communicator_World)
1186 {
1187  comm.receive_packed_range (src_processor_id, context, out_iter,
1188  output_type, tag);
1189 }
1190 
1191 // template <typename Context, typename OutputIter>
1192 // inline void receive_packed_range (const unsigned int src_processor_id,
1193 // Context * context,
1194 // OutputIter out_iter,
1195 // Request & req,
1196 // const MessageTag & tag=any_tag,
1197 // const Communicator & comm = Communicator_World)
1198 // { comm.receive_packed_range (src_processor_id, context, out_iter, req, tag); }
1199 
1200 template <typename T>
1201 inline void nonblocking_receive (const unsigned int src_processor_id,
1202  T & buf,
1203  const DataType & type,
1204  Request & r,
1205  const MessageTag & tag=any_tag,
1206  const Communicator & comm = Communicator_World)
1207 { comm.receive (src_processor_id, buf, type, r, tag); }
1208 
1209 template <typename T>
1210 inline void nonblocking_receive (const unsigned int src_processor_id,
1211  T & buf,
1212  Request & r,
1213  const MessageTag & tag=any_tag,
1214  const Communicator & comm = Communicator_World)
1215 { comm.receive (src_processor_id, buf, r, tag); }
1216 
1217 template <typename T1, typename T2>
1218 inline void send_receive(const unsigned int dest_processor_id,
1219  T1 & send,
1220  const unsigned int source_processor_id,
1221  T2 & recv,
1222  const MessageTag & send_tag = no_tag,
1223  const MessageTag & recv_tag = any_tag,
1224  const Communicator & comm = Communicator_World)
1225 { comm.send_receive(dest_processor_id, send, source_processor_id, recv,
1226  send_tag, recv_tag); }
1227 
1228 template <typename Context1, typename RangeIter,
1229  typename Context2, typename OutputIter, typename T>
1230 inline void send_receive_packed_range(const unsigned int dest_processor_id,
1231  const Context1 * context1,
1232  RangeIter send_begin,
1233  const RangeIter send_end,
1234  const unsigned int source_processor_id,
1235  Context2 * context2,
1236  OutputIter out_iter,
1237  const T * output_type,
1238  const MessageTag & send_tag = no_tag,
1239  const MessageTag & recv_tag = any_tag,
1240  const Communicator & comm = Communicator_World)
1241 {
1242  comm.send_receive_packed_range(dest_processor_id, context1,
1243  send_begin, send_end,
1244  source_processor_id, context2,
1245  out_iter, output_type,
1246  send_tag, recv_tag);
1247 }
1248 
1249 template <typename T1, typename T2>
1250 inline void send_receive(const unsigned int dest_processor_id,
1251  T1 & send,
1252  const DataType & type1,
1253  const unsigned int source_processor_id,
1254  T2 & recv,
1255  const DataType & type2,
1256  const MessageTag & send_tag = no_tag,
1257  const MessageTag & recv_tag = any_tag,
1258  const Communicator & comm = Communicator_World)
1259 { comm.send_receive(dest_processor_id, send, type1, source_processor_id,
1260  recv, type2, send_tag, recv_tag); }
1261 
1262 template <typename T>
1263 inline void gather(const unsigned int root_id,
1264  T send,
1265  std::vector<T> & recv,
1266  const Communicator & comm = Communicator_World)
1267 { comm.gather(root_id, send, recv); }
1268 
1269 template <typename T>
1270 inline void gather(const unsigned int root_id,
1271  std::vector<T> & r,
1272  const Communicator & comm = Communicator_World)
1273 { comm.gather(root_id, r); }
1274 
1275 template <typename T>
1276 inline void allgather(T send,
1277  std::vector<T> & recv,
1278  const Communicator & comm = Communicator_World)
1279 { comm.allgather(send, recv); }
1280 
1281 template <typename T>
1282 inline void allgather(std::vector<T> & r,
1283  const bool identical_buffer_sizes = false,
1284  const Communicator & comm = Communicator_World)
1285 { comm.allgather(r, identical_buffer_sizes); }
1286 
1287 template <typename Context, typename Iter, typename OutputIter>
1288 inline void gather_packed_range (const unsigned int root_id,
1289  Context * context,
1290  Iter range_begin,
1291  const Iter range_end,
1292  OutputIter out_iter,
1293  const Communicator & comm = Communicator_World)
1294 { comm.gather_packed_range(root_id, context, range_begin, range_end, out_iter); }
1295 
1296 template <typename Context, typename Iter, typename OutputIter>
1297 inline void allgather_packed_range (Context * context,
1298  Iter range_begin,
1299  const Iter range_end,
1300  OutputIter out_iter,
1301  const Communicator & comm = Communicator_World)
1302 { comm.allgather_packed_range(context, range_begin, range_end, out_iter); }
1303 
1304 template <typename T>
1305 inline void alltoall(std::vector<T> & r,
1306  const Communicator & comm = Communicator_World)
1307 { comm.alltoall(r); }
1308 
1309 template <typename T>
1310 inline void broadcast(T & data,
1311  const unsigned int root_id=0,
1312  const Communicator & comm = Communicator_World)
1313 { comm.broadcast(data, root_id); }
1314 
1315 template <typename Context, typename OutputContext, typename Iter, typename OutputIter>
1316 inline void broadcast_packed_range (const Context * context1,
1317  Iter range_begin,
1318  const Iter range_end,
1319  OutputContext * context2,
1320  OutputIter out_iter,
1321  const unsigned int root_id = 0,
1322  const Communicator & comm = Communicator_World)
1323 { comm.broadcast_packed_range(context1, range_begin, range_end, context2, out_iter, root_id); }
1324 
1325 #endif // #ifndef LIBMESH_DISABLE_COMMWORLD
1326 
1327 //-----------------------------------------------------------------------
1328 // Parallel members
1329 
1330 inline
1332 {
1333  if (_comm)
1334  _comm->dereference_unique_tag(_tagvalue);
1335 }
1336 
1337 
1338 inline
1340  : _tagvalue(other._tagvalue), _comm(other._comm)
1341 {
1342  if (_comm)
1344 }
1345 
1346 
1347 inline
1349 {
1350  if (used_tag_values.count(tagvalue))
1351  {
1352  // Get the largest value in the used values, and pick one
1353  // larger
1354  tagvalue = used_tag_values.rbegin()->first+1;
1355  libmesh_assert(!used_tag_values.count(tagvalue));
1356  }
1357  used_tag_values[tagvalue] = 1;
1358 
1359  // #ifndef NDEBUG
1360  // // Make sure everyone called get_unique_tag and make sure
1361  // // everyone got the same value
1362  // int maxval = tagvalue;
1363  // this->max(maxval);
1364  // libmesh_assert_equal_to (tagvalue, maxval);
1365  // #endif
1366 
1367  return MessageTag(tagvalue, this);
1368 }
1369 
1370 
1371 inline
1372 void Communicator::reference_unique_tag(int tagvalue) const
1373 {
1374  // This had better be an already-acquired tag.
1375  libmesh_assert(used_tag_values.count(tagvalue));
1376 
1377  used_tag_values[tagvalue]++;
1378 }
1379 
1380 
1381 inline
1383 {
1384  // This had better be an already-acquired tag.
1385  libmesh_assert(used_tag_values.count(tagvalue));
1386 
1387  used_tag_values[tagvalue]--;
1388  // If we don't have any more outstanding references, we
1389  // don't even need to keep this tag in our "used" set.
1390  if (!used_tag_values[tagvalue])
1391  used_tag_values.erase(tagvalue);
1392 }
1393 
1394 
1395 #ifdef LIBMESH_HAVE_MPI
1396 template<>
1397 inline data_type dataplusint_type<short int>() { return MPI_SHORT_INT; }
1398 
1399 template<>
1400 inline data_type dataplusint_type<int>() { return MPI_2INT; }
1401 
1402 template<>
1403 inline data_type dataplusint_type<long>() { return MPI_LONG_INT; }
1404 
1405 template<>
1406 inline data_type dataplusint_type<float>() { return MPI_FLOAT_INT; }
1407 
1408 template<>
1409 inline data_type dataplusint_type<double>() { return MPI_DOUBLE_INT; }
1410 
1411 template<>
1412 inline data_type dataplusint_type<long double>() { return MPI_LONG_DOUBLE_INT; }
1413 
1414 template <typename T>
1415 inline bool Communicator::verify(const T & r) const
1416 {
1417  if (this->size() > 1 && Attributes<T>::has_min_max == true)
1418  {
1419  T tempmin = r, tempmax = r;
1420  this->min(tempmin);
1421  this->max(tempmax);
1422  bool verified = (r == tempmin) &&
1423  (r == tempmax);
1424  this->min(verified);
1425  return verified;
1426  }
1427 
1428 #ifdef LIBMESH_HAVE_CXX11
1429  static_assert(Attributes<T>::has_min_max,
1430  "Tried to verify an unverifiable type");
1431 #endif
1432 
1433  return true;
1434 }
1435 
1436 
1437 
1438 template <>
1439 inline bool Communicator::verify(const bool & r) const
1440 {
1441  const unsigned char rnew = r;
1442  return this->verify(rnew);
1443 }
1444 
1445 
1446 
1447 template <typename T>
1448 inline bool Communicator::semiverify(const T * r) const
1449 {
1450  if (this->size() > 1 && Attributes<T>::has_min_max == true)
1451  {
1452  T tempmin, tempmax;
1453  if (r)
1454  tempmin = tempmax = *r;
1455  else
1456  {
1457  Attributes<T>::set_highest(tempmin);
1458  Attributes<T>::set_lowest(tempmax);
1459  }
1460  this->min(tempmin);
1461  this->max(tempmax);
1462  bool invalid = r && ((*r != tempmin) ||
1463  (*r != tempmax));
1464  this->max(invalid);
1465  return !invalid;
1466  }
1467 
1468 #ifdef LIBMESH_HAVE_CXX11
1469  static_assert(Attributes<T>::has_min_max,
1470  "Tried to semiverify an unverifiable type");
1471 #endif
1472 
1473  return true;
1474 }
1475 
1476 
1477 
1478 template <>
1479 inline bool Communicator::semiverify(const bool * r) const
1480 {
1481  if (r)
1482  {
1483  const unsigned char rnew = *r;
1484  return this->semiverify(&rnew);
1485  }
1486 
1487  const unsigned char * rptr = libmesh_nullptr;
1488  return this->semiverify(rptr);
1489 }
1490 
1491 
1492 
1493 template <typename T>
1494 inline bool Communicator::semiverify(const std::vector<T> * r) const
1495 {
1496  if (this->size() > 1 && Attributes<T>::has_min_max == true)
1497  {
1498  std::size_t rsize = r ? r->size() : 0;
1499  std::size_t * psize = r ? &rsize : libmesh_nullptr;
1500 
1501  if (!this->semiverify(psize))
1502  return false;
1503 
1504  this->max(rsize);
1505 
1506  std::vector<T> tempmin, tempmax;
1507  if (r)
1508  {
1509  tempmin = tempmax = *r;
1510  }
1511  else
1512  {
1513  tempmin.resize(rsize);
1514  tempmax.resize(rsize);
1515  Attributes<std::vector<T>>::set_highest(tempmin);
1516  Attributes<std::vector<T>>::set_lowest(tempmax);
1517  }
1518  this->min(tempmin);
1519  this->max(tempmax);
1520  bool invalid = r && ((*r != tempmin) ||
1521  (*r != tempmax));
1522  this->max(invalid);
1523  return !invalid;
1524  }
1525 
1526 #ifdef LIBMESH_HAVE_CXX11
1527  static_assert(Attributes<T>::has_min_max,
1528  "Tried to semiverify a vector of an unverifiable type");
1529 #endif
1530 
1531  return true;
1532 }
1533 
1534 
1535 
1536 inline bool Communicator::verify(const std::string & r) const
1537 {
1538  if (this->size() > 1)
1539  {
1540  // Cannot use <char> since MPI_MIN is not
1541  // strictly defined for chars!
1542  std::vector<short int> temp; temp.reserve(r.size());
1543  for (std::size_t i=0; i != r.size(); ++i)
1544  temp.push_back(r[i]);
1545  return this->verify(temp);
1546  }
1547  return true;
1548 }
1549 
1550 
1551 
1552 inline bool Communicator::semiverify(const std::string * r) const
1553 {
1554  if (this->size() > 1)
1555  {
1556  std::size_t rsize = r ? r->size() : 0;
1557  std::size_t * psize = r ? &rsize : libmesh_nullptr;
1558 
1559  if (!this->semiverify(psize))
1560  return false;
1561 
1562  this->max(rsize);
1563 
1564  // Cannot use <char> since MPI_MIN is not
1565  // strictly defined for chars!
1566  std::vector<short int> temp (rsize);
1567  if (r)
1568  {
1569  temp.reserve(rsize);
1570  for (std::size_t i=0; i != rsize; ++i)
1571  temp.push_back((*r)[i]);
1572  }
1573 
1574  std::vector<short int> * ptemp = r ? &temp: libmesh_nullptr;
1575 
1576  return this->semiverify(ptemp);
1577  }
1578  return true;
1579 }
1580 
1581 
1582 
1583 template <typename T>
1584 inline void Communicator::min(T & r) const
1585 {
1586  if (this->size() > 1)
1587  {
1588  LOG_SCOPE("min(scalar)", "Parallel");
1589 
1590  T temp = r;
1591  libmesh_call_mpi
1592  (MPI_Allreduce (&temp, &r, 1, StandardType<T>(&temp),
1594  this->get()));
1595  }
1596 }
1597 
1598 
1599 inline void Communicator::min(bool & r) const
1600 {
1601  if (this->size() > 1)
1602  {
1603  LOG_SCOPE("min(bool)", "Parallel");
1604 
1605  unsigned int tempsend = r;
1606  unsigned int temp;
1607 
1608  libmesh_call_mpi
1609  (MPI_Allreduce (&tempsend, &temp, 1,
1612  this->get()));
1613  r = temp;
1614  }
1615 }
1616 
1617 
1618 template <typename T>
1619 inline void Communicator::min(std::vector<T> & r) const
1620 {
1621  if (this->size() > 1 && !r.empty())
1622  {
1623  LOG_SCOPE("min(vector)", "Parallel");
1624 
1625  libmesh_assert(this->verify(r.size()));
1626 
1627  std::vector<T> temp(r);
1628  libmesh_call_mpi
1629  (MPI_Allreduce (&temp[0], &r[0], cast_int<int>(r.size()),
1630  StandardType<T>(&temp[0]),
1632  this->get()));
1633  }
1634 }
1635 
1636 
1637 inline void Communicator::min(std::vector<bool> & r) const
1638 {
1639  if (this->size() > 1 && !r.empty())
1640  {
1641  LOG_SCOPE("min(vector<bool>)", "Parallel");
1642 
1643  libmesh_assert(this->verify(r.size()));
1644 
1645  std::vector<unsigned int> ruint;
1646  pack_vector_bool(r, ruint);
1647  std::vector<unsigned int> temp(ruint.size());
1648  libmesh_call_mpi
1649  (MPI_Allreduce (&ruint[0], &temp[0],
1650  cast_int<int>(ruint.size()),
1651  StandardType<unsigned int>(), MPI_BAND,
1652  this->get()));
1653  unpack_vector_bool(temp, r);
1654  }
1655 }
1656 
1657 
1658 template <typename T>
1659 inline void Communicator::minloc(T & r,
1660  unsigned int & min_id) const
1661 {
1662  if (this->size() > 1)
1663  {
1664  LOG_SCOPE("minloc(scalar)", "Parallel");
1665 
1666  DataPlusInt<T> data_in;
1667  data_in.val = r;
1668  data_in.rank = this->rank();
1669  DataPlusInt<T> data_out;
1670  libmesh_call_mpi
1671  (MPI_Allreduce (&data_in, &data_out, 1, dataplusint_type<T>(),
1672  OpFunction<T>::max_location(), this->get()));
1673  r = data_out.val;
1674  min_id = data_out.rank;
1675  }
1676  else
1677  min_id = this->rank();
1678 }
1679 
1680 
1681 inline void Communicator::minloc(bool & r,
1682  unsigned int & min_id) const
1683 {
1684  if (this->size() > 1)
1685  {
1686  LOG_SCOPE("minloc(bool)", "Parallel");
1687 
1688  DataPlusInt<int> data_in;
1689  data_in.val = r;
1690  data_in.rank = this->rank();
1691  DataPlusInt<int> data_out;
1692  libmesh_call_mpi
1693  (MPI_Allreduce (&data_in, &data_out, 1,
1695  OpFunction<int>::min_location(), this->get()));
1696  r = data_out.val;
1697  min_id = data_out.rank;
1698  }
1699  else
1700  min_id = this->rank();
1701 }
1702 
1703 
1704 template <typename T>
1705 inline void Communicator::minloc(std::vector<T> & r,
1706  std::vector<unsigned int> & min_id) const
1707 {
1708  if (this->size() > 1 && !r.empty())
1709  {
1710  LOG_SCOPE("minloc(vector)", "Parallel");
1711 
1712  libmesh_assert(this->verify(r.size()));
1713 
1714  std::vector<DataPlusInt<T>> data_in(r.size());
1715  for (std::size_t i=0; i != r.size(); ++i)
1716  {
1717  data_in[i].val = r[i];
1718  data_in[i].rank = this->rank();
1719  }
1720  std::vector<DataPlusInt<T>> data_out(r.size());
1721  libmesh_call_mpi
1722  (MPI_Allreduce (&data_in[0], &data_out[0],
1723  cast_int<int>(r.size()),
1724  dataplusint_type<T>(),
1725  OpFunction<T>::min_location(), this->get()));
1726  for (std::size_t i=0; i != r.size(); ++i)
1727  {
1728  r[i] = data_out[i].val;
1729  min_id[i] = data_out[i].rank;
1730  }
1731  }
1732  else if (!r.empty())
1733  {
1734  for (std::size_t i=0; i != r.size(); ++i)
1735  min_id[i] = this->rank();
1736  }
1737 }
1738 
1739 
1740 inline void Communicator::minloc(std::vector<bool> & r,
1741  std::vector<unsigned int> & min_id) const
1742 {
1743  if (this->size() > 1 && !r.empty())
1744  {
1745  LOG_SCOPE("minloc(vector<bool>)", "Parallel");
1746 
1747  libmesh_assert(this->verify(r.size()));
1748 
1749  std::vector<DataPlusInt<int>> data_in(r.size());
1750  for (std::size_t i=0; i != r.size(); ++i)
1751  {
1752  data_in[i].val = r[i];
1753  data_in[i].rank = this->rank();
1754  }
1755  std::vector<DataPlusInt<int>> data_out(r.size());
1756  libmesh_call_mpi
1757  (MPI_Allreduce (&data_in[0], &data_out[0],
1758  cast_int<int>(r.size()),
1760  OpFunction<int>::min_location(), this->get()));
1761  for (std::size_t i=0; i != r.size(); ++i)
1762  {
1763  r[i] = data_out[i].val;
1764  min_id[i] = data_out[i].rank;
1765  }
1766  }
1767  else if (!r.empty())
1768  {
1769  for (std::size_t i=0; i != r.size(); ++i)
1770  min_id[i] = this->rank();
1771  }
1772 }
1773 
1774 
1775 template <typename T>
1776 inline void Communicator::max(T & r) const
1777 {
1778  if (this->size() > 1)
1779  {
1780  LOG_SCOPE("max(scalar)", "Parallel");
1781 
1782  T temp;
1783  libmesh_call_mpi
1784  (MPI_Allreduce (&r, &temp, 1, StandardType<T>(&r),
1786  this->get()));
1787  r = temp;
1788  }
1789 }
1790 
1791 
1792 inline void Communicator::max(bool & r) const
1793 {
1794  if (this->size() > 1)
1795  {
1796  LOG_SCOPE("max(bool)", "Parallel");
1797 
1798  unsigned int tempsend = r;
1799  unsigned int temp;
1800  libmesh_call_mpi
1801  (MPI_Allreduce (&tempsend, &temp, 1,
1804  this->get()));
1805  r = temp;
1806  }
1807 }
1808 
1809 
1810 template <typename T>
1811 inline void Communicator::max(std::vector<T> & r) const
1812 {
1813  if (this->size() > 1 && !r.empty())
1814  {
1815  LOG_SCOPE("max(vector)", "Parallel");
1816 
1817  libmesh_assert(this->verify(r.size()));
1818 
1819  std::vector<T> temp(r);
1820  libmesh_call_mpi
1821  (MPI_Allreduce (&temp[0], &r[0], cast_int<int>(r.size()),
1822  StandardType<T>(&temp[0]),
1824  this->get()));
1825  }
1826 }
1827 
1828 
1829 inline void Communicator::max(std::vector<bool> & r) const
1830 {
1831  if (this->size() > 1 && !r.empty())
1832  {
1833  LOG_SCOPE("max(vector<bool>)", "Parallel");
1834 
1835  libmesh_assert(this->verify(r.size()));
1836 
1837  std::vector<unsigned int> ruint;
1838  pack_vector_bool(r, ruint);
1839  std::vector<unsigned int> temp(ruint.size());
1840  libmesh_call_mpi
1841  (MPI_Allreduce (&ruint[0], &temp[0],
1842  cast_int<int>(ruint.size()),
1843  StandardType<unsigned int>(), MPI_BOR,
1844  this->get()));
1845  unpack_vector_bool(temp, r);
1846  }
1847 }
1848 
1849 
1850 template <typename T>
1851 inline void Communicator::maxloc(T & r,
1852  unsigned int & max_id) const
1853 {
1854  if (this->size() > 1)
1855  {
1856  LOG_SCOPE("maxloc(scalar)", "Parallel");
1857 
1858  DataPlusInt<T> data_in;
1859  data_in.val = r;
1860  data_in.rank = this->rank();
1861  DataPlusInt<T> data_out;
1862  libmesh_call_mpi
1863  (MPI_Allreduce (&data_in, &data_out, 1,
1864  dataplusint_type<T>(),
1866  this->get()));
1867  r = data_out.val;
1868  max_id = data_out.rank;
1869  }
1870  else
1871  max_id = this->rank();
1872 }
1873 
1874 
1875 inline void Communicator::maxloc(bool & r,
1876  unsigned int & max_id) const
1877 {
1878  if (this->size() > 1)
1879  {
1880  LOG_SCOPE("maxloc(bool)", "Parallel");
1881 
1882  DataPlusInt<int> data_in;
1883  data_in.val = r;
1884  data_in.rank = this->rank();
1885  DataPlusInt<int> data_out;
1886  libmesh_call_mpi
1887  (MPI_Allreduce (&data_in, &data_out, 1,
1890  this->get()));
1891  r = data_out.val;
1892  max_id = data_out.rank;
1893  }
1894  else
1895  max_id = this->rank();
1896 }
1897 
1898 
1899 template <typename T>
1900 inline void Communicator::maxloc(std::vector<T> & r,
1901  std::vector<unsigned int> & max_id) const
1902 {
1903  if (this->size() > 1 && !r.empty())
1904  {
1905  LOG_SCOPE("maxloc(vector)", "Parallel");
1906 
1907  libmesh_assert(this->verify(r.size()));
1908 
1909  std::vector<DataPlusInt<T>> data_in(r.size());
1910  for (std::size_t i=0; i != r.size(); ++i)
1911  {
1912  data_in[i].val = r[i];
1913  data_in[i].rank = this->rank();
1914  }
1915  std::vector<DataPlusInt<T>> data_out(r.size());
1916  libmesh_call_mpi
1917  (MPI_Allreduce (&data_in[0], &data_out[0],
1918  cast_int<int>(r.size()),
1919  dataplusint_type<T>(),
1921  this->get()));
1922  for (std::size_t i=0; i != r.size(); ++i)
1923  {
1924  r[i] = data_out[i].val;
1925  max_id[i] = data_out[i].rank;
1926  }
1927  }
1928  else if (!r.empty())
1929  {
1930  for (std::size_t i=0; i != r.size(); ++i)
1931  max_id[i] = this->rank();
1932  }
1933 }
1934 
1935 
1936 inline void Communicator::maxloc(std::vector<bool> & r,
1937  std::vector<unsigned int> & max_id) const
1938 {
1939  if (this->size() > 1 && !r.empty())
1940  {
1941  LOG_SCOPE("maxloc(vector<bool>)", "Parallel");
1942 
1943  libmesh_assert(this->verify(r.size()));
1944 
1945  std::vector<DataPlusInt<int>> data_in(r.size());
1946  for (std::size_t i=0; i != r.size(); ++i)
1947  {
1948  data_in[i].val = r[i];
1949  data_in[i].rank = this->rank();
1950  }
1951  std::vector<DataPlusInt<int>> data_out(r.size());
1952  libmesh_call_mpi
1953  (MPI_Allreduce (&data_in[0], &data_out[0],
1954  cast_int<int>(r.size()),
1957  this->get()));
1958  for (std::size_t i=0; i != r.size(); ++i)
1959  {
1960  r[i] = data_out[i].val;
1961  max_id[i] = data_out[i].rank;
1962  }
1963  }
1964  else if (!r.empty())
1965  {
1966  for (std::size_t i=0; i != r.size(); ++i)
1967  max_id[i] = this->rank();
1968  }
1969 }
1970 
1971 
1972 template <typename T>
1973 inline void Communicator::sum(T & r) const
1974 {
1975  if (this->size() > 1)
1976  {
1977  LOG_SCOPE("sum()", "Parallel");
1978 
1979  T temp = r;
1980  libmesh_call_mpi
1981  (MPI_Allreduce (&temp, &r, 1, StandardType<T>(&temp),
1983  this->get()));
1984  }
1985 }
1986 
1987 
1988 template <typename T>
1989 inline void Communicator::sum(std::vector<T> & r) const
1990 {
1991  if (this->size() > 1 && !r.empty())
1992  {
1993  LOG_SCOPE("sum()", "Parallel");
1994 
1995  libmesh_assert(this->verify(r.size()));
1996 
1997  std::vector<T> temp(r);
1998  libmesh_call_mpi
1999  (MPI_Allreduce (&temp[0], &r[0], cast_int<int>(r.size()),
2000  StandardType<T>(&temp[0]),
2002  this->get()));
2003  }
2004 }
2005 
2006 
2007 // We still do function overloading for complex sums - in a perfect
2008 // world we'd have a StandardSumOp to go along with StandardType...
2009 template <typename T>
2010 inline void Communicator::sum(std::complex<T> & r) const
2011 {
2012  if (this->size() > 1)
2013  {
2014  LOG_SCOPE("sum()", "Parallel");
2015 
2016  std::complex<T> temp(r);
2017  libmesh_call_mpi
2018  (MPI_Allreduce (&temp, &r, 2, StandardType<T>(),
2020  this->get()));
2021  }
2022 }
2023 
2024 
2025 template <typename T>
2026 inline void Communicator::sum(std::vector<std::complex<T>> & r) const
2027 {
2028  if (this->size() > 1 && !r.empty())
2029  {
2030  LOG_SCOPE("sum()", "Parallel");
2031 
2032  libmesh_assert(this->verify(r.size()));
2033 
2034  std::vector<std::complex<T>> temp(r);
2035  libmesh_call_mpi
2036  (MPI_Allreduce (&temp[0], &r[0], cast_int<int>(r.size() * 2),
2038  OpFunction<T>::sum(), this->get()));
2039  }
2040 }
2041 
2042 
2043 template <typename T>
2044 inline void Communicator::set_union(std::set<T> & data,
2045  const unsigned int root_id) const
2046 {
2047  std::vector<T> vecdata(data.begin(), data.end());
2048  this->gather(root_id, vecdata);
2049  if (this->rank() == root_id)
2050  data.insert(vecdata.begin(), vecdata.end());
2051 }
2052 
2053 
2054 
2055 template <typename T>
2056 inline void Communicator::set_union(std::set<T> & data) const
2057 {
2058  std::vector<T> vecdata(data.begin(), data.end());
2059  this->allgather(vecdata, false);
2060  data.insert(vecdata.begin(), vecdata.end());
2061 }
2062 
2063 
2064 
2065 template <typename T1, typename T2>
2066 inline void Communicator::set_union(std::map<T1,T2> & data,
2067  const unsigned int root_id) const
2068 {
2069  std::vector<std::pair<T1,T2>> vecdata(data.begin(), data.end());
2070  this->gather(root_id, vecdata);
2071  if (this->rank() == root_id)
2072  data.insert(vecdata.begin(), vecdata.end());
2073 }
2074 
2075 
2076 
2077 template <typename T1, typename T2>
2078 inline void Communicator::set_union(std::map<T1,T2> & data) const
2079 {
2080  std::vector<std::pair<T1,T2>> vecdata(data.begin(), data.end());
2081  this->allgather(vecdata, false);
2082  data.insert(vecdata.begin(), vecdata.end());
2083 }
2084 
2085 
2086 
2087 inline status Communicator::probe (const unsigned int src_processor_id,
2088  const MessageTag & tag) const
2089 {
2090  LOG_SCOPE("probe()", "Parallel");
2091 
2092  status stat;
2093 
2094  libmesh_call_mpi
2095  (MPI_Probe (src_processor_id, tag.value(), this->get(), &stat));
2096 
2097  return stat;
2098 }
2099 
2100 template<typename T>
2101 inline Status Communicator::packed_range_probe (const unsigned int src_processor_id,
2102  const MessageTag & tag,
2103  bool & flag) const
2104 {
2105  LOG_SCOPE("packed_range_probe()", "Parallel");
2106 
2107  libmesh_experimental();
2108 
2109  Status stat((StandardType<typename Packing<T>::buffer_type>()));
2110 
2111  int int_flag;
2112 
2113  libmesh_call_mpi(MPI_Iprobe(src_processor_id,
2114  tag.value(),
2115  this->get(),
2116  &int_flag,
2117  stat.get()));
2118 
2119  flag = int_flag;
2120 
2121  return stat;
2122 }
2123 
2124 
2125 template<typename T>
2126 inline void Communicator::send (const unsigned int dest_processor_id,
2127  const std::basic_string<T> & buf,
2128  const MessageTag & tag) const
2129 {
2130  LOG_SCOPE("send()", "Parallel");
2131 
2132  T * dataptr = buf.empty() ? libmesh_nullptr : const_cast<T *>(buf.data());
2133 
2134  libmesh_call_mpi
2135  (((this->send_mode() == SYNCHRONOUS) ?
2136  MPI_Ssend : MPI_Send) (dataptr,
2137  cast_int<int>(buf.size()),
2138  StandardType<T>(dataptr),
2139  dest_processor_id,
2140  tag.value(),
2141  this->get()));
2142 }
2143 
2144 
2145 
2146 template <typename T>
2147 inline void Communicator::send (const unsigned int dest_processor_id,
2148  const std::basic_string<T> & buf,
2149  Request & req,
2150  const MessageTag & tag) const
2151 {
2152  LOG_SCOPE("send()", "Parallel");
2153 
2154  T * dataptr = buf.empty() ? libmesh_nullptr : const_cast<T *>(buf.data());
2155 
2156  std::cerr<<"Sending: "<<buf.size()<<std::endl;
2157 
2158  libmesh_call_mpi
2159  (((this->send_mode() == SYNCHRONOUS) ?
2160  MPI_Issend : MPI_Isend) (dataptr,
2161  cast_int<int>(buf.size()),
2162  StandardType<T>(dataptr),
2163  dest_processor_id,
2164  tag.value(),
2165  this->get(),
2166  req.get()));
2167 }
2168 
2169 
2170 
2171 template <typename T>
2172 inline void Communicator::send (const unsigned int dest_processor_id,
2173  const T & buf,
2174  const MessageTag & tag) const
2175 {
2176  LOG_SCOPE("send()", "Parallel");
2177 
2178  T * dataptr = const_cast<T*> (&buf);
2179 
2180  libmesh_call_mpi
2181  (((this->send_mode() == SYNCHRONOUS) ?
2182  MPI_Ssend : MPI_Send) (dataptr,
2183  1,
2184  StandardType<T>(dataptr),
2185  dest_processor_id,
2186  tag.value(),
2187  this->get()));
2188 }
2189 
2190 
2191 
2192 template <typename T>
2193 inline void Communicator::send (const unsigned int dest_processor_id,
2194  const T & buf,
2195  Request & req,
2196  const MessageTag & tag) const
2197 {
2198  LOG_SCOPE("send()", "Parallel");
2199 
2200  T * dataptr = const_cast<T*>(&buf);
2201 
2202  libmesh_call_mpi
2203  (((this->send_mode() == SYNCHRONOUS) ?
2204  MPI_Issend : MPI_Isend) (dataptr,
2205  1,
2206  StandardType<T>(dataptr),
2207  dest_processor_id,
2208  tag.value(),
2209  this->get(),
2210  req.get()));
2211 }
2212 
2213 
2214 
2215 template <typename T>
2216 inline void Communicator::send (const unsigned int dest_processor_id,
2217  const std::set<T> & buf,
2218  const MessageTag & tag) const
2219 {
2220  this->send(dest_processor_id, buf,
2221  StandardType<T>(buf.empty() ? libmesh_nullptr : &(*buf.begin())), tag);
2222 }
2223 
2224 
2225 
2226 template <typename T>
2227 inline void Communicator::send (const unsigned int dest_processor_id,
2228  const std::set<T> & buf,
2229  Request & req,
2230  const MessageTag & tag) const
2231 {
2232  this->send(dest_processor_id, buf,
2233  StandardType<T>(buf.empty() ? libmesh_nullptr : &(*buf.begin())), req, tag);
2234 }
2235 
2236 
2237 
2238 template <typename T>
2239 inline void Communicator::send (const unsigned int dest_processor_id,
2240  const std::set<T> & buf,
2241  const DataType & type,
2242  const MessageTag & tag) const
2243 {
2244  LOG_SCOPE("send()", "Parallel");
2245 
2246  std::vector<T> vecbuf(buf.begin(), buf.end());
2247  this->send(dest_processor_id, vecbuf, type, tag);
2248 }
2249 
2250 
2251 
2252 template <typename T>
2253 inline void Communicator::send (const unsigned int dest_processor_id,
2254  const std::set<T> & buf,
2255  const DataType & type,
2256  Request & req,
2257  const MessageTag & tag) const
2258 {
2259  LOG_SCOPE("send()", "Parallel");
2260 
2261  // Allocate temporary buffer on the heap so it lives until after
2262  // the non-blocking send completes
2263  std::vector<T> * vecbuf =
2264  new std::vector<T>(buf.begin(), buf.end());
2265 
2266  // Make the Request::wait() handle deleting the buffer
2267  req.add_post_wait_work
2268  (new Parallel::PostWaitDeleteBuffer<std::vector<T>>(vecbuf));
2269 
2270  this->send(dest_processor_id, *vecbuf, type, req, tag);
2271 }
2272 
2273 
2274 
2275 template <typename T>
2276 inline void Communicator::send (const unsigned int dest_processor_id,
2277  const std::vector<T> & buf,
2278  const MessageTag & tag) const
2279 {
2280  this->send(dest_processor_id, buf,
2281  StandardType<T>(buf.empty() ? libmesh_nullptr : &buf.front()), tag);
2282 }
2283 
2284 
2285 
2286 template <typename T>
2287 inline void Communicator::send (const unsigned int dest_processor_id,
2288  const std::vector<T> & buf,
2289  Request & req,
2290  const MessageTag & tag) const
2291 {
2292  this->send(dest_processor_id, buf,
2293  StandardType<T>(buf.empty() ? libmesh_nullptr : &buf.front()), req, tag);
2294 }
2295 
2296 
2297 
2298 template <typename T>
2299 inline void Communicator::send (const unsigned int dest_processor_id,
2300  const std::vector<T> & buf,
2301  const DataType & type,
2302  const MessageTag & tag) const
2303 {
2304  LOG_SCOPE("send()", "Parallel");
2305 
2306  libmesh_call_mpi
2307  (((this->send_mode() == SYNCHRONOUS) ?
2308  MPI_Ssend : MPI_Send) (buf.empty() ? libmesh_nullptr : const_cast<T*>(&buf[0]),
2309  cast_int<int>(buf.size()),
2310  type,
2311  dest_processor_id,
2312  tag.value(),
2313  this->get()));
2314 }
2315 
2316 
2317 
2318 template <typename T>
2319 inline void Communicator::send (const unsigned int dest_processor_id,
2320  const std::vector<T> & buf,
2321  const DataType & type,
2322  Request & req,
2323  const MessageTag & tag) const
2324 {
2325  LOG_SCOPE("send()", "Parallel");
2326 
2327  libmesh_call_mpi
2328  (((this->send_mode() == SYNCHRONOUS) ?
2329  MPI_Issend : MPI_Isend) (buf.empty() ? libmesh_nullptr : const_cast<T*>(&buf[0]),
2330  cast_int<int>(buf.size()),
2331  type,
2332  dest_processor_id,
2333  tag.value(),
2334  this->get(),
2335  req.get()));
2336 }
2337 
2338 
2339 template <typename Context, typename Iter>
2340 inline void Communicator::send_packed_range (const unsigned int dest_processor_id,
2341  const Context * context,
2342  Iter range_begin,
2343  const Iter range_end,
2344  const MessageTag & tag) const
2345 {
2346  // We will serialize variable size objects from *range_begin to
2347  // *range_end as a sequence of plain data (e.g. ints) in this buffer
2348  typedef typename std::iterator_traits<Iter>::value_type T;
2349 
2350  std::size_t total_buffer_size =
2351  Parallel::packed_range_size (context, range_begin, range_end);
2352 
2353  this->send(dest_processor_id, total_buffer_size, tag);
2354 
2355 #ifdef DEBUG
2356  std::size_t used_buffer_size = 0;
2357 #endif
2358 
2359  while (range_begin != range_end)
2360  {
2361  libmesh_assert_greater (std::distance(range_begin, range_end), 0);
2362 
2363  std::vector<typename Parallel::Packing<T>::buffer_type> buffer;
2364 
2365  const Iter next_range_begin = Parallel::pack_range
2366  (context, range_begin, range_end, buffer);
2367 
2368  libmesh_assert_greater (std::distance(range_begin, next_range_begin), 0);
2369 
2370  range_begin = next_range_begin;
2371 
2372 #ifdef DEBUG
2373  used_buffer_size += buffer.size();
2374 #endif
2375 
2376  // Blocking send of the buffer
2377  this->send(dest_processor_id, buffer, tag);
2378  }
2379 
2380 #ifdef DEBUG
2381  libmesh_assert_equal_to(used_buffer_size, total_buffer_size);
2382 #endif
2383 }
2384 
2385 
2386 template <typename Context, typename Iter>
2387 inline void Communicator::send_packed_range (const unsigned int dest_processor_id,
2388  const Context * context,
2389  Iter range_begin,
2390  const Iter range_end,
2391  Request & req,
2392  const MessageTag & tag) const
2393 {
2394  // Allocate a buffer on the heap so we don't have to free it until
2395  // after the Request::wait()
2396  typedef typename std::iterator_traits<Iter>::value_type T;
2397  typedef typename Parallel::Packing<T>::buffer_type buffer_t;
2398 
2399  std::size_t total_buffer_size =
2400  Parallel::packed_range_size (context, range_begin, range_end);
2401 
2402  // That local variable will be gone soon; we need a send buffer that
2403  // will stick around. I heard you like buffering so I put a buffer
2404  // for your buffer size so you can buffer the size of your buffer.
2405  std::size_t * total_buffer_size_buffer = new std::size_t;
2406  *total_buffer_size_buffer = total_buffer_size;
2407 
2408  // Delete the buffer size's buffer when we're done
2409  Request intermediate_req = request();
2410  intermediate_req.add_post_wait_work
2411  (new Parallel::PostWaitDeleteBuffer<std::size_t>(total_buffer_size_buffer));
2412  this->send(dest_processor_id, *total_buffer_size_buffer, intermediate_req, tag);
2413 
2414  // And don't finish up the full request until we're done with its
2415  // dependencies
2416  req.add_prior_request(intermediate_req);
2417 
2418 #ifdef DEBUG
2419  std::size_t used_buffer_size = 0;
2420 #endif
2421 
2422  while (range_begin != range_end)
2423  {
2424  libmesh_assert_greater (std::distance(range_begin, range_end), 0);
2425 
2426  std::vector<buffer_t> * buffer = new std::vector<buffer_t>();
2427 
2428  const Iter next_range_begin =
2429  Parallel::pack_range(context, range_begin, range_end,
2430  *buffer);
2431 
2432  libmesh_assert_greater (std::distance(range_begin, next_range_begin), 0);
2433 
2434  range_begin = next_range_begin;
2435 
2436 #ifdef DEBUG
2437  used_buffer_size += buffer->size();
2438 #endif
2439 
2440  Request next_intermediate_req;
2441 
2442  Request * my_req = (range_begin == range_end) ? &req : &next_intermediate_req;
2443 
2444  // Make the Request::wait() handle deleting the buffer
2445  my_req->add_post_wait_work
2446  (new Parallel::PostWaitDeleteBuffer<std::vector<buffer_t>>
2447  (buffer));
2448 
2449  // Non-blocking send of the buffer
2450  this->send(dest_processor_id, *buffer, *my_req, tag);
2451 
2452  if (range_begin != range_end)
2453  req.add_prior_request(*my_req);
2454  }
2455 }
2456 
2457 
2458 
2459 
2460 
2461 
2462 
2463 template <typename Context, typename Iter>
2464 inline void Communicator::nonblocking_send_packed_range (const unsigned int dest_processor_id,
2465  const Context * context,
2466  Iter range_begin,
2467  const Iter range_end,
2468  Request & req,
2469  const MessageTag & tag) const
2470 {
2471  libmesh_experimental();
2472 
2473  // Allocate a buffer on the heap so we don't have to free it until
2474  // after the Request::wait()
2475  typedef typename std::iterator_traits<Iter>::value_type T;
2476  typedef typename Parallel::Packing<T>::buffer_type buffer_t;
2477 
2478  if (range_begin != range_end)
2479  {
2480  std::vector<buffer_t> * buffer = new std::vector<buffer_t>();
2481 
2482  range_begin =
2483  Parallel::pack_range(context,
2484  range_begin,
2485  range_end,
2486  *buffer,
2487  // MPI-2 can only use integers for size
2489 
2490  if (range_begin != range_end)
2491  libmesh_error_msg("Non-blocking packed range sends cannot exceed " << std::numeric_limits<int>::max() << "in size");
2492 
2493  // Make the Request::wait() handle deleting the buffer
2494  req.add_post_wait_work
2495  (new Parallel::PostWaitDeleteBuffer<std::vector<buffer_t>>
2496  (buffer));
2497 
2498  // Non-blocking send of the buffer
2499  this->send(dest_processor_id, *buffer, req, tag);
2500  }
2501 }
2502 
2503 
2504 template <typename T>
2505 inline Status Communicator::receive (const unsigned int src_processor_id,
2506  std::basic_string<T> & buf,
2507  const MessageTag & tag) const
2508 {
2509  std::vector<T> tempbuf; // Officially C++ won't let us get a
2510  // modifiable array from a string
2511 
2512  Status stat = this->receive(src_processor_id, tempbuf, tag);
2513  buf.assign(tempbuf.begin(), tempbuf.end());
2514  return stat;
2515 }
2516 
2517 
2518 
2519 template <typename T>
2520 inline void Communicator::receive (const unsigned int src_processor_id,
2521  std::basic_string<T> & buf,
2522  Request & req,
2523  const MessageTag & tag) const
2524 {
2525  // Officially C++ won't let us get a modifiable array from a
2526  // string, and we can't even put one on the stack for the
2527  // non-blocking case.
2528  std::vector<T> * tempbuf = new std::vector<T>();
2529 
2530  // We can clear the string, but the Request::wait() will need to
2531  // handle copying our temporary buffer to it
2532  buf.clear();
2533 
2534  req.add_post_wait_work
2535  (new Parallel::PostWaitCopyBuffer<std::vector<T>,
2536  std::back_insert_iterator<std::basic_string<T>>>
2537  (tempbuf, std::back_inserter(buf)));
2538 
2539  // Make the Request::wait() then handle deleting the buffer
2540  req.add_post_wait_work
2541  (new Parallel::PostWaitDeleteBuffer<std::vector<T>>(tempbuf));
2542 
2543  this->receive(src_processor_id, tempbuf, req, tag);
2544 }
2545 
2546 
2547 
2548 template <typename T>
2549 inline Status Communicator::receive (const unsigned int src_processor_id,
2550  T & buf,
2551  const MessageTag & tag) const
2552 {
2553  LOG_SCOPE("receive()", "Parallel");
2554 
2555  // Get the status of the message, explicitly provide the
2556  // datatype so we can later query the size
2557  Status stat(this->probe(src_processor_id, tag), StandardType<T>(&buf));
2558 
2559  libmesh_call_mpi
2560  (MPI_Recv (&buf, 1, StandardType<T>(&buf), src_processor_id,
2561  tag.value(), this->get(), stat.get()));
2562 
2563  return stat;
2564 }
2565 
2566 
2567 
2568 template <typename T>
2569 inline void Communicator::receive (const unsigned int src_processor_id,
2570  T & buf,
2571  Request & req,
2572  const MessageTag & tag) const
2573 {
2574  LOG_SCOPE("receive()", "Parallel");
2575 
2576  libmesh_call_mpi
2577  (MPI_Irecv (&buf, 1, StandardType<T>(&buf), src_processor_id,
2578  tag.value(), this->get(), req.get()));
2579 }
2580 
2581 
2582 
2583 template <typename T>
2584 inline Status Communicator::receive (const unsigned int src_processor_id,
2585  std::set<T> & buf,
2586  const MessageTag & tag) const
2587 {
2588  return this->receive
2589  (src_processor_id, buf,
2590  StandardType<T>(buf.empty() ? libmesh_nullptr : &(*buf.begin())), tag);
2591 }
2592 
2593 
2594 
2595 /*
2596  * No non-blocking receives of std::set until we figure out how to
2597  * resize the temporary buffer
2598  */
2599 #if 0
2600 template <typename T>
2601 inline void Communicator::receive (const unsigned int src_processor_id,
2602  std::set<T> & buf,
2603  Request & req,
2604  const MessageTag & tag) const
2605 {
2606  this->receive (src_processor_id, buf,
2607  StandardType<T>(buf.empty() ? libmesh_nullptr : &(*buf.begin())), req, tag);
2608 }
2609 #endif // 0
2610 
2611 
2612 
2613 template <typename T>
2614 inline Status Communicator::receive (const unsigned int src_processor_id,
2615  std::set<T> & buf,
2616  const DataType & type,
2617  const MessageTag & tag) const
2618 {
2619  LOG_SCOPE("receive()", "Parallel");
2620 
2621  std::vector<T> vecbuf;
2622  Status stat = this->receive(src_processor_id, vecbuf, type, tag);
2623  buf.clear();
2624  buf.insert(vecbuf.begin(), vecbuf.end());
2625 
2626  return stat;
2627 }
2628 
2629 
2630 
2631 /*
2632  * No non-blocking receives of std::set until we figure out how to
2633  * resize the temporary buffer
2634  */
2635 #if 0
2636 template <typename T>
2637 inline void Communicator::receive (const unsigned int src_processor_id,
2638  std::set<T> & buf,
2639  const DataType & type,
2640  Request & req,
2641  const MessageTag & tag) const
2642 {
2643  LOG_SCOPE("receive()", "Parallel");
2644 
2645  // Allocate temporary buffer on the heap so it lives until after
2646  // the non-blocking send completes
2647  std::vector<T> * vecbuf = new std::vector<T>();
2648 
2649  // We can clear the set, but the Request::wait() will need to
2650  // handle copying our temporary buffer to it
2651  buf.clear();
2652 
2653  req.add_post_wait_work
2654  (new Parallel::PostWaitCopyBuffer<std::vector<T>,
2655  std::insert_iterator<std::set<T>>>
2656  (*vecbuf, std::inserter(buf,buf.end())));
2657 
2658  // Make the Request::wait() then handle deleting the buffer
2659  req.add_post_wait_work
2660  (new Parallel::PostWaitDeleteBuffer<std::vector<T>>(vecbuf));
2661 
2662  this->receive(src_processor_id, *vecbuf, type, req, tag);
2663 }
2664 #endif // 0
2665 
2666 
2667 
2668 template <typename T>
2669 inline Status Communicator::receive (const unsigned int src_processor_id,
2670  std::vector<T> & buf,
2671  const MessageTag & tag) const
2672 {
2673  return this->receive
2674  (src_processor_id, buf,
2675  StandardType<T>(buf.empty() ? libmesh_nullptr : &(*buf.begin())), tag);
2676 }
2677 
2678 
2679 
2680 template <typename T>
2681 inline void Communicator::receive (const unsigned int src_processor_id,
2682  std::vector<T> & buf,
2683  Request & req,
2684  const MessageTag & tag) const
2685 {
2686  this->receive (src_processor_id, buf,
2687  StandardType<T>(buf.empty() ? libmesh_nullptr : &(*buf.begin())), req, tag);
2688 }
2689 
2690 
2691 
2692 template <typename T>
2693 inline Status Communicator::receive (const unsigned int src_processor_id,
2694  std::vector<T> & buf,
2695  const DataType & type,
2696  const MessageTag & tag) const
2697 {
2698  LOG_SCOPE("receive()", "Parallel");
2699 
2700  // Get the status of the message, explicitly provide the
2701  // datatype so we can later query the size
2702  Status stat(this->probe(src_processor_id, tag), type);
2703 
2704  buf.resize(stat.size());
2705 
2706  // Use stat.source() and stat.tag() in the receive - if
2707  // src_processor_id is or tag is "any" then we want to be sure we
2708  // try to receive the same message we just probed.
2709  libmesh_call_mpi
2710  (MPI_Recv (buf.empty() ? libmesh_nullptr : &buf[0],
2711  cast_int<int>(buf.size()), type, stat.source(),
2712  stat.tag(), this->get(), stat.get()));
2713 
2714  libmesh_assert_equal_to (stat.size(), buf.size());
2715 
2716  return stat;
2717 }
2718 
2719 
2720 
2721 template <typename T>
2722 inline void Communicator::receive (const unsigned int src_processor_id,
2723  std::vector<T> & buf,
2724  const DataType & type,
2725  Request & req,
2726  const MessageTag & tag) const
2727 {
2728  LOG_SCOPE("receive()", "Parallel");
2729 
2730  libmesh_call_mpi
2731  (MPI_Irecv (buf.empty() ? libmesh_nullptr : &buf[0],
2732  cast_int<int>(buf.size()), type, src_processor_id,
2733  tag.value(), this->get(), req.get()));
2734 }
2735 
2736 
2737 template <typename Context, typename OutputIter, typename T>
2738 inline void Communicator::receive_packed_range (const unsigned int src_processor_id,
2739  Context * context,
2740  OutputIter out_iter,
2741  const T * output_type,
2742  const MessageTag & tag) const
2743 {
2744  typedef typename Parallel::Packing<T>::buffer_type buffer_t;
2745 
2746  // Receive serialized variable size objects as sequences of buffer_t
2747  std::size_t total_buffer_size = 0;
2748  Status stat = this->receive(src_processor_id, total_buffer_size, tag);
2749 
2750  // Use stat.source() and stat.tag() in subsequent receives - if
2751  // src_processor_id is or tag is "any" then we want to be sure we
2752  // try to receive messages all corresponding to the same send.
2753 
2754  std::size_t received_buffer_size = 0;
2755  while (received_buffer_size < total_buffer_size)
2756  {
2757  std::vector<buffer_t> buffer;
2758  this->receive(stat.source(), buffer, MessageTag(stat.tag()));
2759  received_buffer_size += buffer.size();
2761  (buffer, context, out_iter, output_type);
2762  }
2763 }
2764 
2765 
2766 
2767 // template <typename Context, typename OutputIter>
2768 // inline void Communicator::receive_packed_range (const unsigned int src_processor_id,
2769 // Context * context,
2770 // OutputIter out_iter,
2771 // Request & req,
2772 // const MessageTag & tag) const
2773 // {
2774 // typedef typename std::iterator_traits<OutputIter>::value_type T;
2775 // typedef typename Parallel::Packing<T>::buffer_type buffer_t;
2776 //
2777 // // Receive serialized variable size objects as a sequence of
2778 // // buffer_t.
2779 // // Allocate a buffer on the heap so we don't have to free it until
2780 // // after the Request::wait()
2781 // std::vector<buffer_t> * buffer = new std::vector<buffer_t>();
2782 // this->receive(src_processor_id, *buffer, req, tag);
2783 //
2784 // // Make the Request::wait() handle unpacking the buffer
2785 // req.add_post_wait_work
2786 // (new Parallel::PostWaitUnpackBuffer<std::vector<buffer_t>, Context, OutputIter>
2787 // (buffer, context, out_iter));
2788 //
2789 // // Make the Request::wait() then handle deleting the buffer
2790 // req.add_post_wait_work
2791 // (new Parallel::PostWaitDeleteBuffer<std::vector<buffer_t>>(buffer));
2792 // }
2793 
2794 template <typename Context, typename OutputIter, typename T>
2795 inline void Communicator::nonblocking_receive_packed_range (const unsigned int src_processor_id,
2796  Context * context,
2797  OutputIter out,
2798  const T * /* output_type */,
2799  Request & req,
2800  Status & stat,
2801  const MessageTag & tag) const
2802 {
2803  libmesh_experimental();
2804 
2805  typedef typename Parallel::Packing<T>::buffer_type buffer_t;
2806 
2807  // Receive serialized variable size objects as a sequence of
2808  // buffer_t.
2809  // Allocate a buffer on the heap so we don't have to free it until
2810  // after the Request::wait()
2811  std::vector<buffer_t> * buffer = new std::vector<buffer_t>(stat.size());
2812  this->receive(src_processor_id, *buffer, req, tag);
2813 
2814  // Make the Request::wait() handle unpacking the buffer
2815  req.add_post_wait_work
2816  (new Parallel::PostWaitUnpackBuffer<std::vector<buffer_t>, Context, OutputIter, T>(*buffer, context, out));
2817 
2818  // Make the Request::wait() then handle deleting the buffer
2819  req.add_post_wait_work
2820  (new Parallel::PostWaitDeleteBuffer<std::vector<buffer_t>>(buffer));
2821 }
2822 
2823 
2824 
2825 template <typename T1, typename T2>
2826 inline void Communicator::send_receive(const unsigned int dest_processor_id,
2827  const std::vector<T1> & sendvec,
2828  const DataType & type1,
2829  const unsigned int source_processor_id,
2830  std::vector<T2> & recv,
2831  const DataType & type2,
2832  const MessageTag & send_tag,
2833  const MessageTag & recv_tag) const
2834 {
2835  LOG_SCOPE("send_receive()", "Parallel");
2836 
2837  if (dest_processor_id == this->rank() &&
2838  source_processor_id == this->rank())
2839  {
2840  recv = sendvec;
2841  return;
2842  }
2843 
2844  Parallel::Request req;
2845 
2846  this->send (dest_processor_id, sendvec, type1, req, send_tag);
2847 
2848  this->receive (source_processor_id, recv, type2, recv_tag);
2849 
2850  req.wait();
2851 }
2852 
2853 
2854 
2855 template <typename T1, typename T2>
2856 inline void Communicator::send_receive(const unsigned int dest_processor_id,
2857  const T1 & sendvec,
2858  const unsigned int source_processor_id,
2859  T2 & recv,
2860  const MessageTag & send_tag,
2861  const MessageTag & recv_tag) const
2862 {
2863  LOG_SCOPE("send_receive()", "Parallel");
2864 
2865  if (dest_processor_id == this->rank() &&
2866  source_processor_id == this->rank())
2867  {
2868  recv = sendvec;
2869  return;
2870  }
2871 
2872  // MPI_STATUS_IGNORE is from MPI-2; using it with some versions of
2873  // MPICH may cause a crash:
2874  // https://bugzilla.mcs.anl.gov/globus/show_bug.cgi?id=1798
2875 #if MPI_VERSION > 1
2876  libmesh_call_mpi
2877  (MPI_Sendrecv(const_cast<T1*>(&sendvec), 1, StandardType<T1>(&sendvec),
2878  dest_processor_id, send_tag.value(), &recv, 1,
2879  StandardType<T2>(&recv), source_processor_id,
2880  recv_tag.value(), this->get(), MPI_STATUS_IGNORE));
2881 #else
2882  MPI_Status stat;
2883  libmesh_call_mpi
2884  (MPI_Sendrecv(const_cast<T1*>(&sendvec), 1, StandardType<T1>(&sendvec),
2885  dest_processor_id, send_tag.value(), &recv, 1,
2886  StandardType<T2>(&recv), source_processor_id,
2887  recv_tag.value(), this->get(), &stat));
2888 #endif
2889 }
2890 
2891 
2892 
2893 // This is both a declaration and definition for a new overloaded
2894 // function template, so we have to re-specify the default
2895 // arguments.
2896 //
2897 // We specialize on the T1==T2 case so that we can handle
2898 // send_receive-to-self with a plain copy rather than going through
2899 // MPI.
2900 template <typename T>
2901 inline void Communicator::send_receive(const unsigned int dest_processor_id,
2902  const std::vector<T> & sendvec,
2903  const unsigned int source_processor_id,
2904  std::vector<T> & recv,
2905  const MessageTag & send_tag,
2906  const MessageTag & recv_tag) const
2907 {
2908  if (dest_processor_id == this->rank() &&
2909  source_processor_id == this->rank())
2910  {
2911  LOG_SCOPE("send_receive()", "Parallel");
2912  recv = sendvec;
2913  return;
2914  }
2915 
2916  const T* example = sendvec.empty() ?
2917  (recv.empty() ? libmesh_nullptr : &recv[0]) : &sendvec[0];
2918 
2919  // Call the user-defined type version with automatic
2920  // type conversion based on template argument:
2921  this->send_receive (dest_processor_id, sendvec,
2922  StandardType<T>(example),
2923  source_processor_id, recv,
2924  StandardType<T>(example),
2925  send_tag, recv_tag);
2926 }
2927 
2928 
2929 // This is both a declaration and definition for a new overloaded
2930 // function template, so we have to re-specify the default arguments
2931 template <typename T1, typename T2>
2932 inline void Communicator::send_receive(const unsigned int dest_processor_id,
2933  const std::vector<T1> & sendvec,
2934  const unsigned int source_processor_id,
2935  std::vector<T2> & recv,
2936  const MessageTag & send_tag,
2937  const MessageTag & recv_tag) const
2938 {
2939  // Call the user-defined type version with automatic
2940  // type conversion based on template argument:
2941  this->send_receive (dest_processor_id, sendvec,
2942  StandardType<T1>(sendvec.empty() ? libmesh_nullptr : &sendvec[0]),
2943  source_processor_id, recv,
2944  StandardType<T2>(recv.empty() ? libmesh_nullptr : &recv[0]),
2945  send_tag, recv_tag);
2946 }
2947 
2948 
2949 
2950 
2951 template <typename T1, typename T2>
2952 inline void Communicator::send_receive(const unsigned int dest_processor_id,
2953  const std::vector<std::vector<T1>> & sendvec,
2954  const unsigned int source_processor_id,
2955  std::vector<std::vector<T2>> & recv,
2956  const MessageTag & /* send_tag */,
2957  const MessageTag & /* recv_tag */) const
2958 {
2959  // FIXME - why aren't we honoring send_tag and recv_tag here?
2960  send_receive_vec_of_vec
2961  (dest_processor_id, sendvec, source_processor_id, recv,
2962  no_tag, any_tag, *this);
2963 }
2964 
2965 
2966 
2967 // This is both a declaration and definition for a new overloaded
2968 // function template, so we have to re-specify the default arguments
2969 template <typename T>
2970 inline void Communicator::send_receive(const unsigned int dest_processor_id,
2971  const std::vector<std::vector<T>> & sendvec,
2972  const unsigned int source_processor_id,
2973  std::vector<std::vector<T>> & recv,
2974  const MessageTag & /* send_tag */,
2975  const MessageTag & /* recv_tag */) const
2976 {
2977  // FIXME - why aren't we honoring send_tag and recv_tag here?
2978  send_receive_vec_of_vec
2979  (dest_processor_id, sendvec, source_processor_id, recv,
2980  no_tag, any_tag, *this);
2981 }
2982 
2983 
2984 
2985 
2986 template <typename Context1, typename RangeIter, typename Context2,
2987  typename OutputIter, typename T>
2988 inline void
2989 Communicator::send_receive_packed_range (const unsigned int dest_processor_id,
2990  const Context1 * context1,
2991  RangeIter send_begin,
2992  const RangeIter send_end,
2993  const unsigned int source_processor_id,
2994  Context2 * context2,
2995  OutputIter out_iter,
2996  const T * output_type,
2997  const MessageTag & send_tag,
2998  const MessageTag & recv_tag) const
2999 {
3000  LOG_SCOPE("send_receive()", "Parallel");
3001 
3002  Parallel::Request req;
3003 
3004  this->send_packed_range (dest_processor_id, context1, send_begin, send_end,
3005  req, send_tag);
3006 
3007  this->receive_packed_range (source_processor_id, context2, out_iter,
3008  output_type, recv_tag);
3009 
3010  req.wait();
3011 }
3012 
3013 
3014 
3015 template <typename T>
3016 inline void Communicator::gather(const unsigned int root_id,
3017  const T & sendval,
3018  std::vector<T> & recv) const
3019 {
3020  libmesh_assert_less (root_id, this->size());
3021 
3022  if (this->rank() == root_id)
3023  recv.resize(this->size());
3024 
3025  if (this->size() > 1)
3026  {
3027  LOG_SCOPE("gather()", "Parallel");
3028 
3029  StandardType<T> send_type(&sendval);
3030 
3031  libmesh_call_mpi
3032  (MPI_Gather(const_cast<T*>(&sendval), 1, send_type,
3033  recv.empty() ? libmesh_nullptr : &recv[0], 1, send_type,
3034  root_id, this->get()));
3035  }
3036  else
3037  recv[0] = sendval;
3038 }
3039 
3040 
3041 
3042 template <typename T>
3043 inline void Communicator::gather(const unsigned int root_id,
3044  const std::basic_string<T> & sendval,
3045  std::vector<std::basic_string<T>> & recv,
3046  const bool identical_buffer_sizes) const
3047 {
3048  libmesh_assert_less (root_id, this->size());
3049 
3050  if (this->rank() == root_id)
3051  recv.resize(this->size());
3052 
3053  if (this->size() > 1)
3054  {
3055  LOG_SCOPE ("gather()","Parallel");
3056 
3057  std::vector<int>
3058  sendlengths (this->size(), 0),
3059  displacements(this->size(), 0);
3060 
3061  const int mysize = static_cast<int>(sendval.size());
3062 
3063  if (identical_buffer_sizes)
3064  sendlengths.assign(this->size(), mysize);
3065  else
3066  // first comm step to determine buffer sizes from all processors
3067  this->gather(root_id, mysize, sendlengths);
3068 
3069  // Find the total size of the final array and
3070  // set up the displacement offsets for each processor
3071  unsigned int globalsize = 0;
3072  for (unsigned int i=0; i < this->size(); ++i)
3073  {
3074  displacements[i] = globalsize;
3075  globalsize += sendlengths[i];
3076  }
3077 
3078  // monolithic receive buffer
3079  std::string r;
3080  if (this->rank() == root_id)
3081  r.resize(globalsize, 0);
3082 
3083  // and get the data from the remote processors.
3084  libmesh_call_mpi
3085  (MPI_Gatherv (const_cast<T*>(&sendval[0]),
3086  mysize, StandardType<T>(),
3087  this->rank() == root_id ? &r[0] : libmesh_nullptr,
3088  &sendlengths[0], &displacements[0],
3089  StandardType<T>(), root_id, this->get()));
3090 
3091  // slice receive buffer up
3092  if (this->rank() == root_id)
3093  for (unsigned int i=0; i != this->size(); ++i)
3094  recv[i] = r.substr(displacements[i], sendlengths[i]);
3095  }
3096  else
3097  recv[0] = sendval;
3098 }
3099 
3100 
3101 
3102 template <typename T>
3103 inline void Communicator::gather(const unsigned int root_id,
3104  std::vector<T> & r) const
3105 {
3106  if (this->size() == 1)
3107  {
3108  libmesh_assert (!this->rank());
3109  libmesh_assert (!root_id);
3110  return;
3111  }
3112 
3113  libmesh_assert_less (root_id, this->size());
3114 
3115  std::vector<int>
3116  sendlengths (this->size(), 0),
3117  displacements(this->size(), 0);
3118 
3119  const int mysize = static_cast<int>(r.size());
3120  this->allgather(mysize, sendlengths);
3121 
3122  LOG_SCOPE("gather()", "Parallel");
3123 
3124  // Find the total size of the final array and
3125  // set up the displacement offsets for each processor.
3126  unsigned int globalsize = 0;
3127  for (unsigned int i=0; i != this->size(); ++i)
3128  {
3129  displacements[i] = globalsize;
3130  globalsize += sendlengths[i];
3131  }
3132 
3133  // Check for quick return
3134  if (globalsize == 0)
3135  return;
3136 
3137  // copy the input buffer
3138  std::vector<T> r_src(r);
3139 
3140  // now resize it to hold the global data
3141  // on the receiving processor
3142  if (root_id == this->rank())
3143  r.resize(globalsize);
3144 
3145  // and get the data from the remote processors
3146  libmesh_call_mpi
3147  (MPI_Gatherv (r_src.empty() ? libmesh_nullptr : &r_src[0], mysize,
3148  StandardType<T>(), r.empty() ? libmesh_nullptr : &r[0],
3149  &sendlengths[0], &displacements[0],
3150  StandardType<T>(), root_id, this->get()));
3151 }
3152 
3153 
3154 template <typename T>
3155 inline void Communicator::allgather(const T & sendval,
3156  std::vector<T> & recv) const
3157 {
3158  LOG_SCOPE ("allgather()","Parallel");
3159 
3160  libmesh_assert(this->size());
3161  recv.resize(this->size());
3162 
3163  unsigned int comm_size = this->size();
3164  if (comm_size > 1)
3165  {
3166  StandardType<T> send_type(&sendval);
3167 
3168  libmesh_call_mpi
3169  (MPI_Allgather (const_cast<T*>(&sendval), 1, send_type, &recv[0], 1,
3170  send_type, this->get()));
3171  }
3172  else if (comm_size > 0)
3173  recv[0] = sendval;
3174 }
3175 
3176 
3177 
3178 template <typename T>
3179 inline void Communicator::allgather(const std::basic_string<T> & sendval,
3180  std::vector<std::basic_string<T>> & recv,
3181  const bool identical_buffer_sizes) const
3182 {
3183  LOG_SCOPE ("allgather()","Parallel");
3184 
3185  libmesh_assert(this->size());
3186  recv.assign(this->size(), "");
3187 
3188  // serial case
3189  if (this->size() < 2)
3190  {
3191  recv.resize(1);
3192  recv[0] = sendval;
3193  return;
3194  }
3195 
3196  std::vector<int>
3197  sendlengths (this->size(), 0),
3198  displacements(this->size(), 0);
3199 
3200  const int mysize = static_cast<int>(sendval.size());
3201 
3202  if (identical_buffer_sizes)
3203  sendlengths.assign(this->size(), mysize);
3204  else
3205  // first comm step to determine buffer sizes from all processors
3206  this->allgather(mysize, sendlengths);
3207 
3208  // Find the total size of the final array and
3209  // set up the displacement offsets for each processor
3210  unsigned int globalsize = 0;
3211  for (unsigned int i=0; i != this->size(); ++i)
3212  {
3213  displacements[i] = globalsize;
3214  globalsize += sendlengths[i];
3215  }
3216 
3217  // Check for quick return
3218  if (globalsize == 0)
3219  return;
3220 
3221  // monolithic receive buffer
3222  std::string r(globalsize, 0);
3223 
3224  // and get the data from the remote processors.
3225  libmesh_call_mpi
3226  (MPI_Allgatherv (const_cast<T*>(mysize ? &sendval[0] : libmesh_nullptr),
3227  mysize, StandardType<T>(),
3228  &r[0], &sendlengths[0], &displacements[0],
3229  StandardType<T>(), this->get()));
3230 
3231  // slice receive buffer up
3232  for (unsigned int i=0; i != this->size(); ++i)
3233  recv[i] = r.substr(displacements[i], sendlengths[i]);
3234 }
3235 
3236 
3237 
3238 template <typename T>
3239 inline void Communicator::allgather(std::vector<T> & r,
3240  const bool identical_buffer_sizes) const
3241 {
3242  if (this->size() < 2)
3243  return;
3244 
3245  LOG_SCOPE("allgather()", "Parallel");
3246 
3247  if (identical_buffer_sizes)
3248  {
3249  if (r.empty())
3250  return;
3251 
3252  libmesh_assert(this->verify(r.size()));
3253 
3254  std::vector<T> r_src(r.size()*this->size());
3255  r_src.swap(r);
3256  StandardType<T> send_type(&r_src[0]);
3257 
3258  libmesh_call_mpi
3259  (MPI_Allgather (&r_src[0], cast_int<int>(r_src.size()),
3260  send_type, &r[0], cast_int<int>(r_src.size()),
3261  send_type, this->get()));
3262  // libmesh_assert(this->verify(r));
3263  return;
3264  }
3265 
3266  std::vector<int>
3267  sendlengths (this->size(), 0),
3268  displacements(this->size(), 0);
3269 
3270  const int mysize = static_cast<int>(r.size());
3271  this->allgather(mysize, sendlengths);
3272 
3273  // Find the total size of the final array and
3274  // set up the displacement offsets for each processor.
3275  unsigned int globalsize = 0;
3276  for (unsigned int i=0; i != this->size(); ++i)
3277  {
3278  displacements[i] = globalsize;
3279  globalsize += sendlengths[i];
3280  }
3281 
3282  // Check for quick return
3283  if (globalsize == 0)
3284  return;
3285 
3286  // copy the input buffer
3287  std::vector<T> r_src(globalsize);
3288  r_src.swap(r);
3289 
3290  StandardType<T> send_type(&r[0]);
3291 
3292  // and get the data from the remote processors.
3293  // Pass NULL if our vector is empty.
3294  libmesh_call_mpi
3295  (MPI_Allgatherv (r_src.empty() ? libmesh_nullptr : &r_src[0], mysize,
3296  send_type, &r[0], &sendlengths[0],
3297  &displacements[0], send_type, this->get()));
3298 }
3299 
3300 
3301 
3302 template <typename T>
3303 inline void Communicator::allgather(std::vector<std::basic_string<T>> & r,
3304  const bool identical_buffer_sizes) const
3305 {
3306  if (this->size() < 2)
3307  return;
3308 
3309  LOG_SCOPE("allgather()", "Parallel");
3310 
3311  if (identical_buffer_sizes)
3312  {
3313  libmesh_assert(this->verify(r.size()));
3314 
3315  // identical_buffer_sizes doesn't buy us much since we have to
3316  // communicate the lengths of strings within each buffer anyway
3317  if (r.empty())
3318  return;
3319  }
3320 
3321  // Concatenate the input buffer into a send buffer, and keep track
3322  // of input string lengths
3323  std::vector<int> mystrlengths (r.size());
3324  std::vector<T> concat_src;
3325 
3326  int myconcatsize = 0;
3327  for (unsigned int i=0; i != r.size(); ++i)
3328  {
3329  int stringlen = cast_int<int>(r[i].size());
3330  mystrlengths[i] = stringlen;
3331  myconcatsize += stringlen;
3332  }
3333  concat_src.reserve(myconcatsize);
3334  for (unsigned int i=0; i != r.size(); ++i)
3335  concat_src.insert
3336  (concat_src.end(), r[i].begin(), r[i].end());
3337 
3338  // Get the string lengths from all other processors
3339  std::vector<int> strlengths = mystrlengths;
3340  this->allgather(strlengths, identical_buffer_sizes);
3341 
3342  // We now know how many strings we'll be receiving
3343  r.resize(strlengths.size());
3344 
3345  // Get the concatenated data sizes from all other processors
3346  std::vector<int> concat_sizes;
3347  this->allgather(myconcatsize, concat_sizes);
3348 
3349  // Find the total size of the final concatenated array and
3350  // set up the displacement offsets for each processor.
3351  std::vector<int> displacements(this->size(), 0);
3352  unsigned int globalsize = 0;
3353  for (unsigned int i=0; i != this->size(); ++i)
3354  {
3355  displacements[i] = globalsize;
3356  globalsize += concat_sizes[i];
3357  }
3358 
3359  // Check for quick return
3360  if (globalsize == 0)
3361  return;
3362 
3363  // Get the concatenated data from the remote processors.
3364  // Pass NULL if our vector is empty.
3365  std::vector<T> concat(globalsize);
3366 
3367  // We may have concat_src.empty(), but we know concat has at least
3368  // one element we can use as an example for StandardType
3369  StandardType<T> send_type(&concat[0]);
3370 
3371  libmesh_call_mpi
3372  (MPI_Allgatherv (concat_src.empty() ?
3373  libmesh_nullptr : &concat_src[0], myconcatsize,
3374  send_type, &concat[0], &concat_sizes[0],
3375  &displacements[0], send_type, this->get()));
3376 
3377  // Finally, split concatenated data into strings
3378  const T * begin = &concat[0];
3379  for (unsigned int i=0; i != r.size(); ++i)
3380  {
3381  const T * end = begin + strlengths[i];
3382  r[i].assign(begin, end);
3383  begin = end;
3384  }
3385 }
3386 
3387 
3388 
3389 template <typename T>
3390 void Communicator::scatter(const std::vector<T> & data,
3391  T & recv,
3392  const unsigned int root_id) const
3393 {
3394  libmesh_assert_less (root_id, this->size());
3395 
3396  // Do not allow the root_id to scatter a NULL vector.
3397  // That would leave recv in an indeterminate state.
3398  libmesh_assert (this->rank() != root_id || this->size() == data.size());
3399 
3400  if (this->size() == 1)
3401  {
3402  libmesh_assert (!this->rank());
3403  libmesh_assert (!root_id);
3404  recv = data[0];
3405  return;
3406  }
3407 
3408  LOG_SCOPE("scatter()", "Parallel");
3409 
3410  T * data_ptr = const_cast<T*>(data.empty() ? libmesh_nullptr : &data[0]);
3411 
3412  libmesh_call_mpi
3413  (MPI_Scatter (data_ptr, 1, StandardType<T>(data_ptr),
3414  &recv, 1, StandardType<T>(&recv), root_id, this->get()));
3415 }
3416 
3417 
3418 
3419 template <typename T>
3420 void Communicator::scatter(const std::vector<T> & data,
3421  std::vector<T> & recv,
3422  const unsigned int root_id) const
3423 {
3424  libmesh_assert_less (root_id, this->size());
3425 
3426  if (this->size() == 1)
3427  {
3428  libmesh_assert (!this->rank());
3429  libmesh_assert (!root_id);
3430  recv.assign(data.begin(), data.end());
3431  return;
3432  }
3433 
3434  LOG_SCOPE("scatter()", "Parallel");
3435 
3436  int recv_buffer_size;
3437  if (this->rank() == root_id)
3438  {
3439  libmesh_assert(data.size() % this->size() == 0);
3440  recv_buffer_size = data.size() / this->size();
3441  }
3442 
3443  this->broadcast(recv_buffer_size);
3444  recv.resize(recv_buffer_size);
3445 
3446  T * data_ptr = const_cast<T*>(data.empty() ? libmesh_nullptr : &data[0]);
3447  T * recv_ptr = recv.empty() ? libmesh_nullptr : &recv[0];
3448 
3449  libmesh_call_mpi
3450  (MPI_Scatter (data_ptr, recv_buffer_size, StandardType<T>(data_ptr),
3451  recv_ptr, recv_buffer_size, StandardType<T>(recv_ptr), root_id, this->get()));
3452 }
3453 
3454 
3455 
3456 template <typename T>
3457 void Communicator::scatter(const std::vector<T> & data,
3458  const std::vector<int> counts,
3459  std::vector<T> & recv,
3460  const unsigned int root_id) const
3461 {
3462  libmesh_assert_less (root_id, this->size());
3463 
3464  if (this->size() == 1)
3465  {
3466  libmesh_assert (!this->rank());
3467  libmesh_assert (!root_id);
3468  libmesh_assert (counts.size() == this->size());
3469  recv.assign(data.begin(), data.begin() + counts[0]);
3470  return;
3471  }
3472 
3473  std::vector<int> displacements(this->size(), 0);
3474  if (root_id == this->rank())
3475  {
3476  libmesh_assert(counts.size() == this->size());
3477 
3478  // Create a displacements vector from the incoming counts vector
3479  unsigned int globalsize = 0;
3480  for (unsigned int i=0; i < this->size(); ++i)
3481  {
3482  displacements[i] = globalsize;
3483  globalsize += counts[i];
3484  }
3485 
3486  libmesh_assert(data.size() == globalsize);
3487  }
3488 
3489  LOG_SCOPE("scatter()", "Parallel");
3490 
3491  // Scatter the buffer sizes to size remote buffers
3492  int recv_buffer_size;
3493  this->scatter(counts, recv_buffer_size, root_id);
3494  recv.resize(recv_buffer_size);
3495 
3496  T * data_ptr = const_cast<T*>(data.empty() ? libmesh_nullptr : &data[0]);
3497  int * count_ptr = const_cast<int*>(counts.empty() ? libmesh_nullptr : &counts[0]);
3498  T * recv_ptr = recv.empty() ? libmesh_nullptr : &recv[0];
3499 
3500  // Scatter the non-uniform chunks
3501  libmesh_call_mpi
3502  (MPI_Scatterv (data_ptr, count_ptr, &displacements[0], StandardType<T>(data_ptr),
3503  recv_ptr, recv_buffer_size, StandardType<T>(recv_ptr), root_id, this->get()));
3504 }
3505 
3506 
3507 
3508 template <typename T>
3509 void Communicator::scatter(const std::vector<std::vector<T>> & data,
3510  std::vector<T> & recv,
3511  const unsigned int root_id,
3512  const bool identical_buffer_sizes) const
3513 {
3514  libmesh_assert_less (root_id, this->size());
3515 
3516  if (this->size() == 1)
3517  {
3518  libmesh_assert (!this->rank());
3519  libmesh_assert (!root_id);
3520  libmesh_assert (data.size() == this->size());
3521  recv.assign(data[0].begin(), data[0].end());
3522  return;
3523  }
3524 
3525  std::vector<T> stacked_data;
3526  std::vector<int> counts;
3527 
3528  if (root_id == this->rank())
3529  {
3530  libmesh_assert (data.size() == this->size());
3531 
3532  if (!identical_buffer_sizes)
3533  counts.resize(this->size());
3534 
3535  for (std::size_t i=0; i < data.size(); ++i)
3536  {
3537  if (!identical_buffer_sizes)
3538  counts[i] = data[i].size();
3539 #ifndef NDEBUG
3540  else
3541  // Check that buffer sizes are indeed equal
3542  libmesh_assert(!i || data[i-1].size() == data[i].size());
3543 #endif
3544  std::copy(data[i].begin(), data[i].end(), std::back_inserter(stacked_data));
3545  }
3546  }
3547 
3548  if (identical_buffer_sizes)
3549  this->scatter(stacked_data, recv, root_id);
3550  else
3551  this->scatter(stacked_data, counts, recv, root_id);
3552 }
3553 
3554 
3555 
3556 template <typename T>
3557 inline void Communicator::alltoall(std::vector<T> & buf) const
3558 {
3559  if (this->size() < 2 || buf.empty())
3560  return;
3561 
3562  LOG_SCOPE("alltoall()", "Parallel");
3563 
3564  // the per-processor size. this is the same for all
3565  // processors using MPI_Alltoall, could be variable
3566  // using MPI_Alltoallv
3567  const int size_per_proc =
3568  cast_int<int>(buf.size()/this->size());
3569 
3570  libmesh_assert_equal_to (buf.size()%this->size(), 0);
3571 
3572  libmesh_assert(this->verify(size_per_proc));
3573 
3574  std::vector<T> tmp(buf);
3575 
3576  StandardType<T> send_type(&tmp[0]);
3577 
3578  libmesh_call_mpi
3579  (MPI_Alltoall (&tmp[0], size_per_proc, send_type, &buf[0],
3580  size_per_proc, send_type, this->get()));
3581 }
3582 
3583 
3584 
3585 template <typename T>
3586 inline void Communicator::broadcast (T & data, const unsigned int root_id) const
3587 {
3588  if (this->size() == 1)
3589  {
3590  libmesh_assert (!this->rank());
3591  libmesh_assert (!root_id);
3592  return;
3593  }
3594 
3595  libmesh_assert_less (root_id, this->size());
3596 
3597  LOG_SCOPE("broadcast()", "Parallel");
3598 
3599  // Spread data to remote processors.
3600  libmesh_call_mpi
3601  (MPI_Bcast (&data, 1, StandardType<T>(&data), root_id,
3602  this->get()));
3603 }
3604 
3605 
3606 template <>
3607 inline void Communicator::broadcast (bool & data, const unsigned int root_id) const
3608 {
3609  if (this->size() == 1)
3610  {
3611  libmesh_assert (!this->rank());
3612  libmesh_assert (!root_id);
3613  return;
3614  }
3615 
3616  libmesh_assert_less (root_id, this->size());
3617 
3618  LOG_SCOPE("broadcast()", "Parallel");
3619 
3620  // We don't want to depend on MPI-2 or C++ MPI, so we don't have
3621  // MPI::BOOL available
3622  char char_data = data;
3623 
3624  // Spread data to remote processors.
3625  libmesh_call_mpi
3626  (MPI_Bcast (&char_data, 1, StandardType<char>(&char_data),
3627  root_id, this->get()));
3628 
3629  data = char_data;
3630 }
3631 
3632 
3633 template <typename T>
3634 inline void Communicator::broadcast (std::basic_string<T> & data,
3635  const unsigned int root_id) const
3636 {
3637  if (this->size() == 1)
3638  {
3639  libmesh_assert (!this->rank());
3640  libmesh_assert (!root_id);
3641  return;
3642  }
3643 
3644  libmesh_assert_less (root_id, this->size());
3645 
3646  LOG_SCOPE("broadcast()", "Parallel");
3647 
3648  std::size_t data_size = data.size();
3649  this->broadcast(data_size, root_id);
3650 
3651  std::vector<T> data_c(data_size);
3652 #ifndef NDEBUG
3653  std::string orig(data);
3654 #endif
3655 
3656  if (this->rank() == root_id)
3657  for (std::size_t i=0; i<data.size(); i++)
3658  data_c[i] = data[i];
3659 
3660  this->broadcast (data_c, root_id);
3661 
3662  data.assign(data_c.begin(), data_c.end());
3663 
3664 #ifndef NDEBUG
3665  if (this->rank() == root_id)
3666  libmesh_assert_equal_to (data, orig);
3667 #endif
3668 }
3669 
3670 
3671 
3672 template <typename T>
3673 inline void Communicator::broadcast (std::vector<T> & data,
3674  const unsigned int root_id) const
3675 {
3676  if (this->size() == 1)
3677  {
3678  libmesh_assert (!this->rank());
3679  libmesh_assert (!root_id);
3680  return;
3681  }
3682 
3683  libmesh_assert_less (root_id, this->size());
3684 
3685  LOG_SCOPE("broadcast()", "Parallel");
3686 
3687  // and get the data from the remote processors.
3688  // Pass NULL if our vector is empty.
3689  T * data_ptr = data.empty() ? libmesh_nullptr : &data[0];
3690 
3691  libmesh_call_mpi
3692  (MPI_Bcast (data_ptr, cast_int<int>(data.size()),
3693  StandardType<T>(data_ptr), root_id, this->get()));
3694 }
3695 
3696 
3697 template <typename T>
3698 inline void Communicator::broadcast (std::vector<std::basic_string<T>> & data,
3699  const unsigned int root_id) const
3700 {
3701  if (this->size() == 1)
3702  {
3703  libmesh_assert (!this->rank());
3704  libmesh_assert (!root_id);
3705  return;
3706  }
3707 
3708  libmesh_assert_less (root_id, this->size());
3709 
3710  LOG_SCOPE("broadcast()", "Parallel");
3711 
3712  std::size_t bufsize=0;
3713  if (root_id == this->rank())
3714  {
3715  for (std::size_t i=0; i<data.size(); ++i)
3716  bufsize += data[i].size() + 1; // Add one for the string length word
3717  }
3718  this->broadcast(bufsize, root_id);
3719 
3720  // Here we use unsigned int to store up to 32-bit characters
3721  std::vector<unsigned int> temp; temp.reserve(bufsize);
3722  // Pack the strings
3723  if (root_id == this->rank())
3724  {
3725  for (std::size_t i=0; i<data.size(); ++i)
3726  {
3727  temp.push_back(cast_int<unsigned int>(data[i].size()));
3728  for (std::size_t j=0; j != data[i].size(); ++j)
3733  temp.push_back(data[i][j]);
3734  }
3735  }
3736  else
3737  temp.resize(bufsize);
3738 
3739  // broad cast the packed strings
3740  this->broadcast(temp, root_id);
3741 
3742  // Unpack the strings
3743  if (root_id != this->rank())
3744  {
3745  data.clear();
3746  std::vector<unsigned int>::const_iterator iter = temp.begin();
3747  while (iter != temp.end())
3748  {
3749  std::size_t curr_len = *iter++;
3750  data.push_back(std::string(iter, iter+curr_len));
3751  iter += curr_len;
3752  }
3753  }
3754 }
3755 
3756 
3757 
3758 
3759 template <typename T>
3760 inline void Communicator::broadcast (std::set<T> & data,
3761  const unsigned int root_id) const
3762 {
3763  if (this->size() == 1)
3764  {
3765  libmesh_assert (!this->rank());
3766  libmesh_assert (!root_id);
3767  return;
3768  }
3769 
3770  libmesh_assert_less (root_id, this->size());
3771 
3772  LOG_SCOPE("broadcast()", "Parallel");
3773 
3774  std::vector<T> vecdata;
3775  if (this->rank() == root_id)
3776  vecdata.assign(data.begin(), data.end());
3777 
3778  std::size_t vecsize = vecdata.size();
3779  this->broadcast(vecsize, root_id);
3780  if (this->rank() != root_id)
3781  vecdata.resize(vecsize);
3782 
3783  this->broadcast(vecdata, root_id);
3784  if (this->rank() != root_id)
3785  {
3786  data.clear();
3787  data.insert(vecdata.begin(), vecdata.end());
3788  }
3789 }
3790 
3791 
3792 
3793 template <typename T1, typename T2>
3794 inline void Communicator::broadcast(std::map<T1, T2> & data,
3795  const unsigned int root_id) const
3796 {
3797  if (this->size() == 1)
3798  {
3799  libmesh_assert (!this->rank());
3800  libmesh_assert (!root_id);
3801  return;
3802  }
3803 
3804  libmesh_assert_less (root_id, this->size());
3805 
3806  LOG_SCOPE("broadcast()", "Parallel");
3807 
3808  std::size_t data_size=data.size();
3809  this->broadcast(data_size, root_id);
3810 
3811  std::vector<T1> pair_first; pair_first.reserve(data_size);
3812  std::vector<T2> pair_second; pair_first.reserve(data_size);
3813 
3814  if (root_id == this->rank())
3815  {
3816  for (typename std::map<T1, T2>::const_iterator it = data.begin();
3817  it != data.end(); ++it)
3818  {
3819  pair_first.push_back(it->first);
3820  pair_second.push_back(it->second);
3821  }
3822  }
3823  else
3824  {
3825  pair_first.resize(data_size);
3826  pair_second.resize(data_size);
3827  }
3828 
3829  this->broadcast(pair_first, root_id);
3830  this->broadcast(pair_second, root_id);
3831 
3832  libmesh_assert(pair_first.size() == pair_first.size());
3833 
3834  if (this->rank() != root_id)
3835  {
3836  data.clear();
3837  for (std::size_t i=0; i<pair_first.size(); ++i)
3838  data[pair_first[i]] = pair_second[i];
3839  }
3840 }
3841 
3842 
3843 
3844 template <typename Context, typename OutputContext,
3845  typename Iter, typename OutputIter>
3846 inline void Communicator::broadcast_packed_range(const Context * context1,
3847  Iter range_begin,
3848  const Iter range_end,
3849  OutputContext * context2,
3850  OutputIter out_iter,
3851  const unsigned int root_id) const
3852 {
3853  typedef typename std::iterator_traits<Iter>::value_type T;
3854  typedef typename Parallel::Packing<T>::buffer_type buffer_t;
3855 
3856  do
3857  {
3858  // We will serialize variable size objects from *range_begin to
3859  // *range_end as a sequence of ints in this buffer
3860  std::vector<buffer_t> buffer;
3861 
3862  if (this->rank() == root_id)
3863  range_begin = Parallel::pack_range
3864  (context1, range_begin, range_end, buffer);
3865 
3866  // this->broadcast(vector) requires the receiving vectors to
3867  // already be the appropriate size
3868  std::size_t buffer_size = buffer.size();
3869  this->broadcast (buffer_size, root_id);
3870 
3871  // We continue until there's nothing left to broadcast
3872  if (!buffer_size)
3873  break;
3874 
3875  buffer.resize(buffer_size);
3876 
3877  // Broadcast the packed data
3878  this->broadcast (buffer, root_id);
3879 
3880  if (this->rank() != root_id)
3882  (buffer, context2, out_iter, (T*)libmesh_nullptr);
3883  } while (true); // break above when we reach buffer_size==0
3884 }
3885 
3886 
3887 #else // LIBMESH_HAVE_MPI
3888 
3889 template <typename T>
3890 inline bool Communicator::verify(const T &) const { return true; }
3891 
3892 template <typename T>
3893 inline bool Communicator::semiverify(const T *) const { return true; }
3894 
3895 template <typename T>
3896 inline void Communicator::min(T &) const {}
3897 
3898 template <typename T>
3899 inline void Communicator::minloc(T &, unsigned int & min_id) const { min_id = 0; }
3900 
3901 template <typename T>
3902 inline void Communicator::minloc(std::vector<T> & r, std::vector<unsigned int> & min_id) const
3903 { for (std::size_t i=0; i!= r.size(); ++i) min_id[i] = 0; }
3904 
3905 template <typename T>
3906 inline void Communicator::max(T &) const {}
3907 
3908 template <typename T>
3909 inline void Communicator::maxloc(T &, unsigned int & max_id) const { max_id = 0; }
3910 
3911 template <typename T>
3912 inline void Communicator::maxloc(std::vector<T> & r, std::vector<unsigned int> & max_id) const
3913 { for (std::size_t i=0; i!= r.size(); ++i) max_id[i] = 0; }
3914 
3915 template <typename T>
3916 inline void Communicator::sum(T &) const {}
3917 
3918 template <typename T>
3919 inline void Communicator::set_union(T &) const {}
3920 
3921 template <typename T>
3922 inline void Communicator::set_union(T &, const unsigned int root_id) const
3923 { libmesh_assert_equal_to(root_id, 0); }
3924 
3928 inline status Communicator::probe (const unsigned int,
3929  const MessageTag &) const
3930 { libmesh_not_implemented(); status s; return s; }
3931 
3935 template <typename T>
3936 inline void Communicator::send (const unsigned int,
3937  const T &,
3938  const MessageTag &) const
3939 { libmesh_not_implemented(); }
3940 
3941 template <typename T>
3942 inline void Communicator::send (const unsigned int,
3943  const T &,
3944  Request &,
3945  const MessageTag &) const
3946 { libmesh_not_implemented(); }
3947 
3948 template <typename T>
3949 inline void Communicator::send (const unsigned int,
3950  const T &,
3951  const DataType &,
3952  const MessageTag &) const
3953 { libmesh_not_implemented(); }
3954 
3955 template <typename T>
3956 inline void Communicator::send (const unsigned int,
3957  const T &,
3958  const DataType &,
3959  Request &,
3960  const MessageTag &) const
3961 { libmesh_not_implemented(); }
3962 
3963 template <typename Context, typename Iter>
3964 inline void Communicator::send_packed_range(const unsigned int,
3965  const Context *,
3966  Iter,
3967  const Iter,
3968  const MessageTag &) const
3969 { libmesh_not_implemented(); }
3970 
3971 template <typename Context, typename Iter>
3972 inline void Communicator::send_packed_range (const unsigned int,
3973  const Context *,
3974  Iter,
3975  const Iter,
3976  Request &,
3977  const MessageTag &) const
3978 { libmesh_not_implemented(); }
3979 
3983 template <typename T>
3984 inline Status Communicator::receive (const unsigned int,
3985  T &,
3986  const MessageTag &) const
3987 { libmesh_not_implemented(); return Status(); }
3988 
3989 template <typename T>
3990 inline void Communicator::receive(const unsigned int,
3991  T &,
3992  Request &,
3993  const MessageTag &) const
3994 { libmesh_not_implemented(); }
3995 
3996 template <typename T>
3997 inline Status Communicator::receive(const unsigned int,
3998  T &,
3999  const DataType &,
4000  const MessageTag &) const
4001 { libmesh_not_implemented(); return Status(); }
4002 
4003 template <typename T>
4004 inline void Communicator::receive(const unsigned int,
4005  T &,
4006  const DataType &,
4007  Request &,
4008  const MessageTag &) const
4009 { libmesh_not_implemented(); }
4010 
4011 template <typename Context, typename OutputIter, typename T>
4012 inline void
4013 Communicator::receive_packed_range(const unsigned int,
4014  Context *,
4015  OutputIter,
4016  const T *,
4017  const MessageTag &) const
4018 { libmesh_not_implemented(); }
4019 
4020 // template <typename Context, typename OutputIter>
4021 // inline void Communicator::receive_packed_range(const unsigned int, Context *, OutputIter, Request &, const MessageTag &) const
4022 // { libmesh_not_implemented(); }
4023 
4027 template <typename T1, typename T2>
4028 inline void Communicator::send_receive (const unsigned int send_tgt,
4029  const T1 & send_val,
4030  const unsigned int recv_source,
4031  T2 & recv_val,
4032  const MessageTag &,
4033  const MessageTag &) const
4034 {
4035  libmesh_assert_equal_to (send_tgt, 0);
4036  libmesh_assert_equal_to (recv_source, 0);
4037  recv_val = send_val;
4038 }
4039 
4046 template <typename Context1, typename RangeIter,
4047  typename Context2, typename OutputIter, typename T>
4048 inline void
4049 Communicator::send_receive_packed_range (const unsigned int dest_processor_id,
4050  const Context1 * context1,
4051  RangeIter send_begin,
4052  const RangeIter send_end,
4053  const unsigned int source_processor_id,
4054  Context2 * context2,
4055  OutputIter out_iter,
4056  const T * output_type,
4057  const MessageTag &,
4058  const MessageTag &) const
4059 {
4060  // This makes no sense on one processor unless we're deliberately
4061  // sending to ourself.
4062  libmesh_assert_equal_to(dest_processor_id, 0);
4063  libmesh_assert_equal_to(source_processor_id, 0);
4064 
4065  // On one processor, we just need to pack the range and then unpack
4066  // it again.
4067  typedef typename std::iterator_traits<RangeIter>::value_type T1;
4068  typedef typename Parallel::Packing<T1>::buffer_type buffer_t;
4069 
4070  while (send_begin != send_end)
4071  {
4072  libmesh_assert_greater (std::distance(send_begin, send_end), 0);
4073 
4074  // We will serialize variable size objects from *range_begin to
4075  // *range_end as a sequence of ints in this buffer
4076  std::vector<buffer_t> buffer;
4077 
4078  const RangeIter next_send_begin = Parallel::pack_range
4079  (context1, send_begin, send_end, buffer);
4080 
4081  libmesh_assert_greater (std::distance(send_begin, next_send_begin), 0);
4082 
4083  send_begin = next_send_begin;
4084 
4086  (buffer, context2, out_iter, output_type);
4087  }
4088 }
4089 
4093 template <typename T>
4094 inline void Communicator::gather(const unsigned int libmesh_dbg_var(root_id),
4095  const T & send_val,
4096  std::vector<T> & recv_val) const
4097 {
4098  libmesh_assert_equal_to (root_id, 0);
4099  recv_val.resize(1);
4100  recv_val[0] = send_val;
4101 }
4102 
4103 template <typename T>
4104 inline void Communicator::gather(const unsigned int libmesh_dbg_var(root_id),
4105  const std::basic_string<T> & sendval,
4106  std::vector<std::basic_string<T>> & recv,
4107  const bool /*identical_buffer_sizes*/) const
4108 {
4109  libmesh_assert_equal_to (root_id, 0);
4110  recv.resize(1);
4111  recv[0] = sendval;
4112 }
4113 
4114 template <typename T>
4115 inline void Communicator::gather(const unsigned int root_id,
4116  std::vector<T> &) const
4117 { libmesh_assert_equal_to(root_id, 0); }
4118 
4119 template <typename T>
4120 inline void Communicator::allgather(const T & send_val,
4121  std::vector<T> & recv_val) const
4122 {
4123  recv_val.resize(1);
4124  recv_val[0] = send_val;
4125 }
4126 
4127 template <typename T>
4128 inline void Communicator::allgather(std::vector<T> &,
4129  const bool) const {}
4130 
4131 template <typename T>
4132 inline void Communicator::allgather(std::vector<std::basic_string<T>> &,
4133  const bool) const {}
4134 
4135 template <typename T>
4136 inline void Communicator::scatter(const std::vector<T> & data,
4137  T & recv,
4138  const unsigned int libmesh_dbg_var(root_id)) const
4139 {
4140  libmesh_assert_equal_to (root_id, 0);
4141  recv = data[0];
4142 }
4143 
4144 
4145 template <typename T>
4146 inline void Communicator::scatter(const std::vector<T> & data,
4147  std::vector<T> & recv,
4148  const unsigned int libmesh_dbg_var(root_id)) const
4149 {
4150  libmesh_assert_equal_to (root_id, 0);
4151  recv.assign(data.begin(), data.end());
4152 }
4153 
4154 
4155 template <typename T>
4156 inline void Communicator::scatter(const std::vector<T> & data,
4157  const std::vector<int> counts,
4158  std::vector<T> & recv,
4159  const unsigned int libmesh_dbg_var(root_id)) const
4160 {
4161  libmesh_assert_equal_to (root_id, 0);
4162  libmesh_assert_equal_to (counts.size(), 1);
4163  recv.assign(data.begin(), data.begin() + counts[0]);
4164 }
4165 
4166 
4167 template <typename T>
4168 inline void Communicator::scatter(const std::vector<std::vector<T>> & data,
4169  std::vector<T> & recv,
4170  const unsigned int libmesh_dbg_var(root_id),
4171  const bool /*identical_buffer_sizes*/) const
4172 {
4173  libmesh_assert_equal_to (root_id, 0);
4174  libmesh_assert_equal_to (data.size(), 1);
4175  recv.assign(data[0].begin(), data[0].end());
4176 }
4177 
4178 
4179 
4180 template <typename T>
4181 inline void Communicator::alltoall(std::vector<T> &) const {}
4182 
4183 template <typename T>
4184 inline void Communicator::broadcast (T &,
4185  const unsigned int libmesh_dbg_var(root_id)) const
4186 { libmesh_assert_equal_to(root_id, 0); }
4187 
4188 #endif // LIBMESH_HAVE_MPI
4189 
4190 // Some of our methods are implemented indirectly via other
4191 // MPI-encapsulated methods and the implementation works with or
4192 // without MPI.
4193 
4194 template <typename Context, typename Iter, typename OutputIter>
4195 inline void Communicator::gather_packed_range(const unsigned int root_id,
4196  Context * context,
4197  Iter range_begin,
4198  const Iter range_end,
4199  OutputIter out_iter) const
4200 {
4201  typedef typename std::iterator_traits<Iter>::value_type T;
4202  typedef typename Parallel::Packing<T>::buffer_type buffer_t;
4203 
4204  bool nonempty_range = (range_begin != range_end);
4205  this->max(nonempty_range);
4206 
4207  while (nonempty_range)
4208  {
4209  // We will serialize variable size objects from *range_begin to
4210  // *range_end as a sequence of ints in this buffer
4211  std::vector<buffer_t> buffer;
4212 
4213  range_begin = Parallel::pack_range
4214  (context, range_begin, range_end, buffer);
4215 
4216  this->gather(root_id, buffer);
4217 
4219  (buffer, context, out_iter, (T*)(libmesh_nullptr));
4220 
4221  nonempty_range = (range_begin != range_end);
4222  this->max(nonempty_range);
4223  }
4224 }
4225 
4226 
4227 template <typename Context, typename Iter, typename OutputIter>
4228 inline void Communicator::allgather_packed_range(Context * context,
4229  Iter range_begin,
4230  const Iter range_end,
4231  OutputIter out_iter) const
4232 {
4233  typedef typename std::iterator_traits<Iter>::value_type T;
4234  typedef typename Parallel::Packing<T>::buffer_type buffer_t;
4235 
4236  bool nonempty_range = (range_begin != range_end);
4237  this->max(nonempty_range);
4238 
4239  while (nonempty_range)
4240  {
4241  // We will serialize variable size objects from *range_begin to
4242  // *range_end as a sequence of ints in this buffer
4243  std::vector<buffer_t> buffer;
4244 
4245  range_begin = Parallel::pack_range
4246  (context, range_begin, range_end, buffer);
4247 
4248  this->allgather(buffer, false);
4249 
4250  libmesh_assert(buffer.size());
4251 
4253  (buffer, context, out_iter, (T*)libmesh_nullptr);
4254 
4255  nonempty_range = (range_begin != range_end);
4256  this->max(nonempty_range);
4257  }
4258 }
4259 
4260 
4261 } // namespace Parallel
4262 
4263 } // namespace libMesh
4264 
4265 #endif // LIBMESH_PARALLEL_IMPLEMENTATION_H
void alltoall(std::vector< T > &r, const Communicator &comm=Communicator_World)
void scatter(const std::vector< T > &data, T &recv, const unsigned int root_id=0) const
Take a vector of local variables and scatter the ith item to the ith processor in the communicator...
data_type dataplusint_type< double >()
FakeCommunicator & Communicator_World
bool verify(const T &r) const
Verify that a local variable has the same value on all processors.
void receive_packed_range(const unsigned int src_processor_id, Context *context, OutputIter out_iter, const T *output_type, const MessageTag &tag=any_tag, const Communicator &comm=Communicator_World)
MessageTag(int tagvalue=invalid_tag)
Explicit constructor, to discourage using "magic numbers" as tags.
Definition: parallel.h:240
StandardType(const std::complex< T > *=libmesh_nullptr)
Iter pack_range(const Context *context, Iter range_begin, const Iter range_end, typename std::vector< buffertype > &buffer, std::size_t approx_buffer_size=1000000)
Encode a range of potentially-variable-size objects to a data array.
Encapsulates the MPI_Comm object.
Definition: parallel.h:657
void send_packed_range(const unsigned int dest_processor_id, const Context *context, Iter range_begin, const Iter range_end, const MessageTag &tag=no_tag, const Communicator &comm=Communicator_World)
bool semiverify(const T *r) const
Verify that a local pointer points to the same value on all processors where it is not NULL...
Communicator & operator=(const communicator &comm)
void send(const unsigned int dest_processor_id, const T &data, const MessageTag &tag=no_tag, const Communicator &comm=Communicator_World)
Encapsulates the MPI_Status struct.
Definition: parallel.h:461
void max(T &r) const
Take a local variable and replace it with the maximum of it&#39;s values on all processors.
LIBMESH_PARALLEL_INTEGER_OPS(unsigned long long)
void min(T &r) const
Take a local variable and replace it with the minimum of it&#39;s values on all processors.
data_type dataplusint_type< short int >()
StandardType(const std::pair< T1, T2 > *example=libmesh_nullptr)
static void set_lowest(T &)
Definition: parallel.h:450
void maxloc(T &r, unsigned int &max_id) const
Take a local variable and replace it with the maximum of it&#39;s values on all processors, returning the minimum rank of a processor which originally held the maximum value.
MPI_Comm communicator
Communicator object for talking with subsets of processors.
Definition: parallel.h:181
void send_receive_packed_range(const unsigned int dest_processor_id, const Context1 *context1, RangeIter send_begin, const RangeIter send_end, const unsigned int source_processor_id, Context2 *context2, OutputIter out, const T *output_type, const MessageTag &send_tag=no_tag, const MessageTag &recv_tag=any_tag) const
Send a range-of-pointers to one processor while simultaneously receiving another range from a (potent...
void allgather_packed_range(Context *context, Iter range_begin, const Iter range_end, OutputIter out_iter, const Communicator &comm=Communicator_World)
MPI_Request request
Request object for non-blocking I/O.
Definition: parallel.h:171
const class libmesh_nullptr_t libmesh_nullptr
MPI_Datatype data_type
Data types for communication.
Definition: parallel.h:166
IterBase * end
Also have a polymorphic pointer to the end object, this prevents iterating past the end...
void send_mode(const SendMode sm)
Explicitly sets the SendMode type used for send operations.
Definition: parallel.h:761
Status receive(const unsigned int src_processor_id, T &buf, const MessageTag &tag=any_tag, const Communicator &comm=Communicator_World)
std::pair< std::vector< PostWaitWork * >, unsigned int > * post_wait_work
Definition: parallel.h:559
The libMesh namespace provides an interface to certain functionality in the library.
void sum(T &r, const Communicator &comm=Communicator_World)
void gather_packed_range(const unsigned int root_id, Context *context, Iter range_begin, const Iter range_end, OutputIter out_iter, const Communicator &comm=Communicator_World)
void reference_unique_tag(int tagvalue) const
Reference an already-acquired tag, so that we know it will be dereferenced multiple times before we c...
Define data types and (un)serialization functions for use when encoding a potentially-variable-size o...
Definition: parallel.h:582
void broadcast_packed_range(const Context *context1, Iter range_begin, const Iter range_end, OutputContext *context2, OutputIter out_iter, const unsigned int root_id=0, const Communicator &comm=Communicator_World)
Real distance(const Point &p)
void nonblocking_send(const unsigned int dest_processor_id, T &buf, const DataType &type, Request &r, const MessageTag &tag=no_tag, const Communicator &comm=Communicator_World)
libmesh_assert(j)
std::unique_ptr< T > UniquePtr
Definition: auto_ptr.h:46
Status packed_range_probe(const unsigned int src_processor_id, const MessageTag &tag, bool &flag) const
Non-Blocking message probe for a packed range message.
std::size_t packed_range_size(const Context *context, Iter range_begin, const Iter range_end)
Return the total buffer size needed to encode a range of potentially-variable-size objects to a data ...
void send_receive(const unsigned int dest_processor_id, const T1 &send, const unsigned int source_processor_id, T2 &recv, const MessageTag &send_tag=no_tag, const MessageTag &recv_tag=any_tag) const
Send data send to one processor while simultaneously receiving other data recv from a (potentially di...
void barrier(const Communicator &comm=Communicator_World)
PetscErrorCode Vec x
void broadcast(T &data, const unsigned int root_id=0, const Communicator &comm=Communicator_World)
bool _is_initialized
Flag that tells if init() has been called.
Definition: libmesh.C:255
UniquePtr< Request > _prior_request
Definition: parallel.h:553
void allgather(T send, std::vector< T > &recv, const Communicator &comm=Communicator_World)
void nonblocking_send_packed_range(const unsigned int dest_processor_id, const Context *context, Iter range_begin, const Iter range_end, Request &req, const MessageTag &tag=no_tag) const
Similar to the above Nonblocking send_packed_range with a few important differences: ...
void gather_packed_range(const unsigned int root_id, Context *context, Iter range_begin, const Iter range_end, OutputIter out) const
Take a range of local variables, combine it with ranges from all processors, and write the output to ...
status probe(const unsigned int src_processor_id, const MessageTag &tag=any_tag) const
Blocking message probe.
Encapsulates the MPI tag integers.
Definition: parallel.h:227
void assign(const communicator &comm)
Utility function for setting our member variables from an MPI communicator.
void send_packed_range(const unsigned int dest_processor_id, const Context *context, Iter range_begin, const Iter range_end, const MessageTag &tag=no_tag) const
Blocking-send range-of-pointers to one processor.
void barrier() const
Pause execution until all processors reach a certain point.
unsigned int size(const data_type &type) const
Templated class to provide the appropriate MPI datatype for use with built-in C types or simple C++ c...
Definition: parallel.h:380
void gather(const unsigned int root_id, T send, std::vector< T > &recv, const Communicator &comm=Communicator_World)
void receive_packed_range(const unsigned int dest_processor_id, Context *context, OutputIter out, const T *output_type, const MessageTag &tag=any_tag) const
Blocking-receive range-of-pointers from one processor.
void duplicate(const Communicator &comm)
void send(const unsigned int dest_processor_id, const T &buf, const MessageTag &tag=no_tag) const
Blocking-send to one processor with data-defined type.
PetscErrorCode Vec Mat libmesh_dbg_var(j)
void broadcast(T &data, const unsigned int root_id=0) const
Take a local value and broadcast it to all processors.
void add_prior_request(const Request &req)
LIBMESH_INT_TYPE(char, MPI_CHAR)
data_type dataplusint_type< long double >()
Encapsulates the MPI_Datatype.
Definition: parallel.h:289
const MessageTag no_tag
Definition: parallel.h:282
void split(int color, int key, Communicator &target) const
void clear()
Free and reset this communicator.
bool verify(const T &r, const Communicator &comm=Communicator_World)
Request & operator=(const Request &other)
Encapsulates the MPI_Request.
Definition: parallel.h:517
void gather(const unsigned int root_id, const T &send, std::vector< T > &recv) const
Take a vector of length comm.size(), and on processor root_id fill in recv[processor_id] = the value ...
OStreamProxy out
void set_union(T &data, const unsigned int root_id, const Communicator &comm=Communicator_World)
void max(T &r, const Communicator &comm=Communicator_World)
void alltoall(std::vector< T > &r) const
Effectively transposes the input vector across all processors.
void nonblocking_receive_packed_range(const unsigned int src_processor_id, Context *context, OutputIter out, const T *output_type, Request &req, Status &stat, const MessageTag &tag=any_tag) const
Non-Blocking-receive range-of-pointers from one processor.
data_type & datatype()
Definition: parallel.h:486
static unsigned int packable_size(const T &object, const Context *context)
const MessageTag any_tag
Default message tag ids.
Definition: parallel.h:277
void add_post_wait_work(PostWaitWork *work)
Status receive(const unsigned int dest_processor_id, T &buf, const MessageTag &tag=any_tag) const
Blocking-receive from one processor with data-defined type.
unsigned int rank() const
Definition: parallel.h:724
void allgather_packed_range(Context *context, Iter range_begin, const Iter range_end, OutputIter out) const
Take a range of local variables, combine it with ranges from all processors, and write the output to ...
static unsigned int packed_size(BufferIter iter)
Types combined with an int.
Definition: parallel.h:194
LIBMESH_FLOAT_TYPE(float, MPI_FLOAT)
Templated class to provide the appropriate MPI reduction operations for use with built-in C types or ...
Definition: parallel.h:406
std::map< int, unsigned int > used_tag_values
Definition: parallel.h:752
void broadcast_packed_range(const Context *context1, Iter range_begin, const Iter range_end, OutputContext *context2, OutputIter out, const unsigned int root_id=0) const
Blocking-broadcast range-of-pointers to one processor.
IterBase * data
Ideally this private member data should have protected access.
static void set_highest(T &)
Definition: parallel.h:451
A class that can be subclassed to allow other code to perform work after a MPI_Wait succeeds...
Definition: parallel.h:506
static void pack(const T &object, OutputIter data_out, const Context *context)
void sum(T &r) const
Take a local variable and replace it with the sum of it&#39;s values on all processors.
void unpack_range(const typename std::vector< buffertype > &buffer, Context *context, OutputIter out, const T *output_type)
Decode a range of potentially-variable-size objects from a data array.
void maxloc(T &r, U &max_id, const Communicator &comm=Communicator_World)
LIBMESH_CONTAINER_TYPE(std::set)
static T unpack(BufferIter in, Context *ctx)
void min(T &r, const Communicator &comm=Communicator_World)
void dereference_unique_tag(int tagvalue) const
Dereference an already-acquired tag, and see if we can re-release it.
void nonblocking_receive(const unsigned int src_processor_id, T &buf, const DataType &type, Request &r, const MessageTag &tag=any_tag, const Communicator &comm=Communicator_World)
MessageTag get_unique_tag(int tagvalue) const
Get a tag that is unique to this Communicator.
void send_receive_packed_range(const unsigned int dest_processor_id, const Context1 *context1, RangeIter send_begin, const RangeIter send_end, const unsigned int source_processor_id, Context2 *context2, OutputIter out_iter, const T *output_type, const MessageTag &send_tag=no_tag, const MessageTag &recv_tag=any_tag, const Communicator &comm=Communicator_World)
status probe(const unsigned int src_processor_id, const MessageTag &tag=any_tag, const Communicator &comm=Communicator_World)
void minloc(T &r, U &min_id, const Communicator &comm=Communicator_World)
const Communicator * _comm
Definition: parallel.h:261
SendMode send_mode() const
Gets the user-requested SendMode.
Definition: parallel.h:766
void set_union(T &data, const unsigned int root_id) const
Take a container of local variables on each processor, and collect their union over all processors...
void allgather(const T &send, std::vector< T > &recv) const
Take a vector of length this->size(), and fill in recv[processor_id] = the value of send on that proc...
void send_receive(const unsigned int dest_processor_id, T1 &send, const unsigned int source_processor_id, T2 &recv, const MessageTag &send_tag=no_tag, const MessageTag &recv_tag=any_tag, const Communicator &comm=Communicator_World)
void minloc(T &r, unsigned int &min_id) const
Take a local variable and replace it with the minimum of it&#39;s values on all processors, returning the minimum rank of a processor which originally held the minimum value.