itto

PICH code except for a few changes*/ #undef FUNCNAME #define FUNCNAME MPIR_Gatherv_MV2 #undef FCNAME #define FCNAME MPL_QUOTE(FUNCNAME) int MPIR_Gatherv_MV2 ( const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, const int *displs, MPI_Datatype recvtype, int root, MPID_Comm *comm_ptr, MPIR_Errflag_t *errflag ) { int comm_size, rank; int mpi_errno = MPI_SUCCESS; int mpi_errno_ret = MPI_SUCCESS; MPI_Aint extent; int i, reqs; MPID_Request **reqarray; MPI_Status *starray; MPIU_CHKLMEM_DECL(2); rank = comm_ptr->rank;

/* check if multiple threads are calling this collective function */ MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr );

/* If rank == root, then I recv lots, otherwise I send */ if (((comm_ptr->comm_kind == MPID_INTRACOMM) && (root == rank)) || ((comm_ptr->comm_kind == MPID_INTERCOMM) && (root == MPI_ROOT))) { if (comm_ptr->comm_kind == MPID_INTRACOMM) comm_size = comm_ptr->local_size; else comm_size = comm_ptr->remote_size;

MPID_Datatype_get_extent_macro(recvtype, extent); /* each node can make sure it is not going to overflow aint */ MPIU_Ensure_Aint_fits_in_pointer(MPIU_VOID_PTR_CAST_TO_MPI_AINT recvbuf + displs[rank] * extent);

MPIU_CHKLMEM_MALLOC(reqarray, MPID_Request **, comm_size * sizeof(MPID_Request*), mpi_errno, "reqarray"); MPIU_CHKLMEM_MALLOC(starray, MPI_Status *, comm_size * sizeof(MPI_Status), mpi_errno, "starray");

reqs = 0; for (i = 0; i < comm_size; i++) { if (recvcounts[i]) { if ((comm_ptr->comm_kind == MPID_INTRACOMM) && (i == rank)) { if (sendbuf != MPI_IN_PLACE) { mpi_errno = MPIR_Localcopy(sendbuf, sendcount, sendtype, ((char *)recvbuf+displs[rank]*extent), recvcounts[rank], recvtype); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } } else { mpi_errno = MPIC_Irecv(((char *)recvbuf+displs[i]*extent), recvcounts[i], recvtype, i, MPIR_GATHERV_TAG, comm_ptr, &reqarray[reqs++]); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } } } /* ... then wait for *all* of them to finish: */ mpi_errno = MPIC_Waitall(reqs, reqarray, starray, errflag); if (mpi_errno&& mpi_errno != MPI_ERR_IN_STATUS) MPIR_ERR_POP(mpi_errno);

/* --BEGIN ERROR HANDLING-- */ if (mpi_errno == MPI_ERR_IN_STATUS) { for (i = 0; i < reqs; i++) { if (starray[i].MPI_ERROR != MPI_SUCCESS) { mpi_errno = starray[i].MPI_ERROR; if (mpi_errno) { /* for communication errors, just record the error but continue */ *errflag = MPIR_ERR_GET_CLASS(mpi_errno); MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail"); MPIR_ERR_ADD(mpi_errno_ret, mpi_errno); } } } } /* --END ERROR HANDLING-- */ }

else if (root != MPI_PROC_NULL) { /* non-root nodes, and in the intercomm. case, non-root nodes on remote side */ if (sendcount) { /* we want local size in both the intracomm and intercomm cases because the size of the roots group (group A in the standard) is irrelevant here. */ comm_size = comm_ptr->local_size;

if (comm_size >= mv2_gatherv_ssend_threshold) { mpi_errno = MPIC_Ssend(sendbuf, sendcount, sendtype, root, MPIR_GATHERV_TAG, comm_ptr, errflag); if (mpi_errno) { /* for communication errors, just record the error but continue */ *errflag = MPIR_ERR_GET_CLASS(mpi_errno); MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail"); MPIR_ERR_ADD(mpi_errno_ret, mpi_errno); } } else { mpi_errno = MPIC_Send(sendbuf, sendcount, sendtype, root, MPIR_GATHERV_TAG, comm_ptr, errflag); if (mpi_errno) { /* for communication errors, just record the error but continue */ *errflag = MPIR_ERR_GET_CLASS(mpi_errno); MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**fail"); MPIR_ERR_ADD(mpi_errno_ret, mpi_errno); } } } }

fn_exit

/* check if multiple threads are calling this collective function */ MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr ); MPIU_CHKLMEM_FREEALL(); if (mpi_errno_ret) mpi_errno = mpi_errno_ret; else if (*errflag) MPIR_ERR_SET(mpi_errno, MPI_ERR_OTHER, "**coll_fail"); return mpi_errno;

fn_fail

goto fn_exit; }