技术开发 频道

二维稳态热传导的问题CPU/GPU并行求解

  本文使用了MPI和CUDA两种方法分别进行并行计算。两种并行方法的并行策略稍有不同。MPI是适用于计算机集群的并行计算方法。在使用MPI实现并行计算时的策略如上所述。为了提高PI的并行效率,降低数据传输引起的时间损耗,具体实现时适用了异步传输函数。

while(totalCount < countGoal)

  {
  step
++;
  
//Synchronize boundary data
  
if (nodeIndex % 2 == 0)
  {
  
//Next
  
if (nodeIndex != nodeCount - 1)
  {
  MPI_Isend(m
+ jobEnd * n - n, n, MPI_DOUBLE, nodeIndex + 1,
  
0, MPI_COMM_WORLD, &requestSendNext);
  MPI_Irecv(m
+ jobEnd * n, n, MPI_DOUBLE, nodeIndex + 1,
  
1, MPI_COMM_WORLD, &requestReceiveNext);
  }

  
//Previous
  
if (nodeIndex != 0)
  {
  MPI_Isend(m
+ jobStartingPoint * n, n, MPI_DOUBLE,
  nodeIndex
- 1, 1, MPI_COMM_WORLD, &requestSendPrevious)
  MPI_Irecv(m
+ jobStartingPoint * n - n, n , MPI_DOUBLE,
  nodeIndex
- 1, 0, MPI_COMM_WORLD,
  
&requestReceivePrevious);
  }
  }

  
else
  {
  
//Previous
  MPI_Irecv(m
+ jobStartingPoint * n - n, n, MPI_DOUBLE,
  nodeIndex
- 1, 0, MPI_COMM_WORLD,
  
&requestReceivePrevious);
  MPI_Isend(m
+ jobStartingPoint * n, n, MPI_DOUBLE,
  nodeIndex
- 1, 1, MPI_COMM_WORLD, &requestSendPrevious);
  
//Next
  
if (nodeIndex != nodeCount - 1)
  {
  MPI_Irecv(m
+ jobEnd * n, n, MPI_DOUBLE,
  nodeIndex
+ 1, 1, MPI_COMM_WORLD, &requestSendNext);
  MPI_Isend(m
+ jobEnd * n - n, n, MPI_DOUBLE,
  nodeIndex
+ 1, 0, MPI_COMM_WORLD, &requestReceiveNext);
  }
  }

  
//Compute Inner Data
  localCount
= 0;
  
for(int i = jobStartingPoint + 1; i < jobEnd - 1; i++)
  rowDataIteration(n,
*step, epsilon, i, m, w, &localCount);
  
//Compute boundary data
  
int rowIndex;
  
if (nodeIndex == 0)
  {
  rowIndex
= jobStartingPoint;
  rowDataIteration(n,
*step, epsilon, rowIndex, m, w, &localCount);
     MPI_Wait(
&requestReceiveNext, &statusNext);
  rowIndex
= jobEnd - 1;
  rowDataIteration(n,
*step, epsilon, rowIndex, m, w, &localCount);
  MPI_Wait(
&requestSendNext, &statusNext);
  }
  
else if (nodeIndex == nodeCount - 1)
  {
  rowIndex
= jobEnd - 1;
  rowDataIteration(n,
*step, epsilon, rowIndex, m, w, &localCount);
  MPI_Wait(
&requestReceivePrevious, &statusPrevious);
  rowIndex
= jobStartingPoint;
  rowDataIteration(n,
*step, epsilon, rowIndex, m, w, &localCount);
  MPI_Wait(
&requestSendPrevious, &statusPrevious);
  }

  
else
  {
  
//还可以MPI_Waitany()来提高性能***
  MPI_Wait(
&requestReceivePrevious, &statusPrevious);
  rowIndex
= jobStartingPoint;
  rowDataIteration(n,
*step, epsilon, rowIndex, m, w, &localCount);
  MPI_Wait(
&requestReceiveNext, &statusNext);
  rowIndex
= jobEnd - 1;
  rowDataIteration(n,
*step, epsilon, rowIndex, m, w, &localCount);
  MPI_Wait(
&requestSendPrevious, &statusPrevious);
  MPI_Wait(
&requestSendNext, &statusNext);
  }

  totalCount
= 0;
  MPI_Allreduce(
&localCount, &totalCount, 1,
  MPI_INT, MPI_SUM, MPI_COMM_WORLD);
  temp
= m; m = w; w = temp;
  }
3
相关文章