You are here: Foswiki>Main/Cimec Web>GameOfLife (24 Dec 2020, MarioStorti)Edit Attach
Comentarios: Esta version del codigo es best effort en el sentido de que he usado todas las posibilidades de C/C++ para obtener un codigo lo mas rapido posible. Por un lado esto hace que la comparacion con otros programas pueda ser un poco distorsionada ya que incluso en modo secuencial hay una gran ganancia. En principio el hacer el codigo muy rapido hace mas dificil obtener altas eficiencias en la paralelizacion, ya que el tiempo de comunicacion/sincronizacion pesa mas con respecto al de calculo. De todas formas he obtenido eficiencias del orden del 98% usando balance de carga en tableros de 4000x4000 celdas. (ver ProcessorPerformance)

//  Copyright (C) 1999-2002, Mario Alberto Storti, Centro
//  Internacional de Metodos Numericos en Ingenieria
//  (CIMEC-Argentina), Universidad Nacional del Litoral
//  (UNL-Argentina), Consejo Nacional de Investigaciones Cientificas y
//  Tecnicas (UNL-Argentina).
//  
//  This program is  free software; you can redistribute  it and/or modify
//  it under the  terms of the GNU General Public  License as published by
//  the Free Software Foundation; either  version 2 of the License, or (at
//  your option) any later version.
//  
//  This program  is distributed in the  hope that it will  be useful, but
//  WITHOUT   ANY  WARRANTY;   without  even   the  implied   warranty  of
//  MERCHANTABILITY  or FITNESS  FOR A  PARTICULAR PURPOSE.   See  the GNU
//  General Public License for more details.
//  
//  You  should have received  a copy  of the  GNU General  Public License
//  along  with  this  program;  if   not,  write  to  the  Free  Software
//  Foundation, Inc.,  59 Temple Place, Suite 330,  Boston, MA 02111-1307,
//  USA.

// $Id: GameOfLife.txt,v 1.4 2014/09/05 23:06:01 MarioStorti Exp $
#define _GNU_SOURCE
#include <mpi.h>
#include <cstdio>
#include <cassert>
#include <unistd.h>
#include <stdlib.h>
#include <ctype.h>
#include <cstring>
#include <vector>
#include <cmath>
#include <time.h>
#include <sys/time.h>

// Simulates the `Game of Life' in parallel using MPI. 

/// This represents the board. 
class Board {
private:
  // N is the size of the board, so that it has N x N cells NN = N+2
  // is the size with two rows added at each end for simulating the
  // walls of the board.  We use a 0-based (C-style) indices i,j for
  // rows and columns.  Each processor owns a range of consecutive
  // rowns with indices i1<= i <i2. Currently the number of rows in
  // each processor is almost the same (no load balance). For
  // instance, if N=20 and size=2, then i1=0, i2=10 in processor 0 and
  // i1=10, i2=20 in processor 1.  So that each processor owns i2-i1
  // rows. We store a board that has one additional row at each end
  // (so called `ghost' rows), so that in fact at each processor we
  // store i2-i1+2 rows.  We define I1=i1-1 and I2=i2+1 so that the
  // rows stored at this processor are I1 <= i < I2. Ghost rows I1 and
  // I2-1 are computed at processors `myrank-1' and `myrank+1'. So
  // that they have to be transmitted at each time step with MPI.  The
  // flag `periodic' indicates whether we use periodic boundary
  // conditions or not (not used currently though!!).
  int N,NN,i1,i2,I1,I2,size,myrank,periodic;
  // Weights of processors
  vector<double> w;
  // Number of rows in each processor
  vector<int> rows_proc;
  // This arraus store the board. `board2' is a copy of the board for
  // when updating the board. 
  char **board,*row1,*row2;
  // returns the position in the board of cell j,k
  inline char *pos(int j, int k) { return &board[(k-I1)][j+1]; }
  void print_row(int k);
public:
  // Read weights file from "weights.dat"
  void read_weights_file(int flag);
  // Initializes the board.
  void init(int N_,int periodic=0);
  // Sets the the state of a cell. 
  void setv(int j,int k,int val) {
    if (k>=i1 && k<i2) *pos(j,k) = val;
  }
  // Prints the board
  void print();
  // Advances the board one generation.
  void advance();
  Board() { board = NULL; }
  ~Board();
};

void Board::read_weights_file(int flag) {
  // Read processors speed from file
  double r;
  char *line=NULL;
  size_t n=0;
  w.clear();
  if (flag) {
    // if (myrank==0) printf("Reading weights file\n");
    FILE *fid = fopen("weights.dat","r");
    assert(fid);
    int p=0;
    while (1) {
      int nread = getline(&line,&n,fid);
      if (nread == EOF) break;
      sscanf(line,"%lf",&r);
      // if (myrank==0) printf("[%d] weight %f",++p,r);
      w.push_back(r);
    }
    fclose(fid);
    if (line) free(line);
  }
}

void Board::print() {
  // This is done in parallel and may not work
  // correctly sometimes. 

  // Loop over processors. This is synchronized by the barrier below. 
  for (int r=0; r<size; r++ ) {
    if (r==myrank) {
      for (int k=i1; k<i2; k++) {
   char *p = pos(0,k), *pe=p+N;
   while (p<pe) {
     putc((*p++ ? '*' : '.'),stdout);
     putc(' ',stdout);
   }
   putc('\n',stdout);
      }
      fflush(stdout);
    }
    MPI_Barrier(MPI_COMM_WORLD);
    if (myrank==0) fflush(stdout);
  }
  if (myrank==0) printf("\n");
}

void Board::print_row(int k) {
  char *p = pos(0,k), *pe = p+N;
  printf("row %3d -> ",k);
  while (p < pe) {
    putc((*p++ ? '*' : '.'),stdout);
    putc(' ',stdout);
  }
  putc('\n',stdout);
}

// Computes next generation 
void Board::advance() {

#define SEND(row,proc)    \
      MPI_Send(pos(0,(row)),N,MPI_CHAR,(proc),0,MPI_COMM_WORLD)
#define RECEIVE(row,proc) \
    MPI_Recv(pos(0,(row)),N,MPI_CHAR,(proc),MPI_ANY_TAG,MPI_COMM_WORLD,&stat)

  // First stage 2j -> 2j+1
  MPI_Status stat;
  if (myrank % 2 == 0 ) {
    if (myrank<size-1) {
      SEND(i2-1,myrank+1);
      RECEIVE(I2-1,myrank+1);
    }
    if (myrank>0) {
      SEND(i1,myrank-1);
      RECEIVE(I1,myrank-1);
    }
  } else {
    RECEIVE(I1,myrank-1);
    SEND(i1,myrank-1);
    if (myrank<size-1) {
      RECEIVE(I2-1,myrank+1);
      SEND(i2-1,myrank+1);
    }
  }

  // Initialize row2
  { char *q = row2, *qe = q+NN;
  while (q<qe) *q++ = 0;
  }

  // Loop over rows in this processor (not included the `ghost' ones
  // (I1 and I2-1)
  int k; char *tmp;
  register char SW,S,SE, W,O,E, NW,NN,NE;
  for (k=i1; k<i2; k++) {
    char *p = pos(0,k), 
      *pe = p+N, 
      *pNE = pos(1,k+1), 
      *pSE = pos(1,k-1),
      *pp = row1+1;
    SW = *(pSE-2);
    S  = *(pSE-1);
    SE = *(pSE  );
    
    W = *(p-1);
    O = *(p  );
    E = *(p+1);

    NW = *(pNE-2);
    NN  = *(pNE-1);
    NE = *(pNE  );
    
    // Make it periodic
    if (periodic) {
      *(p-1) = *(pe-1);
      *pe = *p;
      assert(size==1); // not implemented yet
    }

    // Cells computed in this processor in this row have 
    // p <= address < pe
    int alive;
    while (p < pe) {
      // Neighbors at positions one row below
      alive = SW + S + SE + W + E + NW + NN + NE;
      
      if (alive==0) {      // We treat this case before, perhaps we gain
   *pp = 0;      // some acceleration. Cell becomes dead
      } else if (alive==3) {
   *pp = 1;      // Cell becomes alive
      } else if (alive!=2) {
   *pp = 0;      // Cell becomes dead
      } else *pp = *p;      // cell remains in its state
      p++; pSE++; pNE++; pp++;   // update positions in board to East 
      SW = S; S = SE ; SE = *pSE;
      NW = NN; NN = NE ; NE = *pNE;
      W = O; O = E; E = *(p+1);
    }
    tmp = board[k-1-I1];
    board[k-1-I1] = row2;
    row2 = row1;
    row1 = tmp;
  }
  tmp = board[k-1-I1];
  board[k-1-I1] = row2;
  row2 = row1;
  row1 = tmp;

#if 0
  // Swap boards
  char *temp = board;
  board = board2;
  board2 = temp;
#endif
}

// Initialize board
void Board::init(int N_,int per=0) {

  // size = number of processors
  MPI_Comm_size(MPI_COMM_WORLD,&size);
  // my rank in the row of processors
  MPI_Comm_rank(MPI_COMM_WORLD,&myrank);

  // flags whther periodic b.c.'s are used or not
  periodic = per;
  // size of board
  N = N_;

  if (w.size()>0 && w.size()<size ) {
    if (myrank==0) printf("number of weights in \"weights\" "
           "file doesn't match number of procs.\n");
    MPI_Finalize();
    exit(1);
  } 
  if (w.size()==0) w.resize(size,1.);
  rows_proc.resize(size);
  double sum_w = 0., carry=0., a;
  for (int p=0; p<size; p++) sum_w += w[p];
  int j = 0;
  double tol = 1e-8;
  for (int p=0; p<size; p++) {
    w[p] /= sum_w;
    a = w[p]*N + carry + tol;
    rows_proc[p] = int(floor(a));
    carry = a - rows_proc[p] - tol;
    if (p==myrank) {
      i1 = j;
      i2 = i1 + rows_proc[p];
    }
    j += rows_proc[p];
  }
  assert(j==N);
  assert(fabs(carry) < tol);
  
  I1=i1-1;
  I2=i2+1;

  // Alloc memory
  NN = N+2;         // number of stored rows (include ghosts)
  board  = new (char *)[I2-I1]; // allocate memory for boards
  for (int k=0; k<I2-I1; k++) board[k] = new char[NN];
  row1 = new char[NN];
  row2 = new char[NN];
  
  // Initialize board
  for (int k=I1; k<I2; k++) {
    char *p = pos(-1,k), *pe = p+N+2;
    while (p < pe) *p++ = 0;
  }
}

// Destructor
Board::~Board() {
  for (int k=0; k<I2-I1; k++) 
    delete[] board[k];
  delete[] board;
  board=NULL;
  delete[] row1;
  row1 = NULL;
  delete[] row2;
  row2 = NULL;
}

double etime(timeval &x,timeval &y) {
  return double(y.tv_sec)-double(x.tv_sec)
    +(double(y.tv_usec)-double(x.tv_usec))/1e6;
}

// Main program
int main(int argc,char **args) {
  int size,myrank;
  char *pattern="cross";;

  // Initializes MPI
  MPI_Init(&argc,&args);
  // size = number of processors
  MPI_Comm_size(MPI_COMM_WORLD,&size);
  // my rank in the row of processors
  MPI_Comm_rank(MPI_COMM_WORLD,&myrank);

  int N=20,nstep=30,opt_q=0,opt_v=0,opt_w=0;
  char c;

  while ((c = getopt(argc, args, "N:s:hqp:vw")) != -1) {
    switch (c) {
    case 'h':
      if (myrank==0) {
   printf(" usage: $ mpirun [OPTIONS] life\n\n"
          "MPI options: -np <number-of-processors>\n"
          "             -machinefile <machine-file>\n\n"
          "Other options: -N <number of columns/rows>\n"
          "               -s <numer of time steps>\n"
          "               -q             # quiet (no output)\n"
          "               -v             # verbose (print board at each step)\n"
          "               -p <pattern>   # may be ('cross',line10)\n"
          "               -w             # read weights from \"weigths.dat\"\n"
          "               -h             # print this help\n"
          );
      }
      MPI_Finalize();
    case 'N':
      sscanf(optarg,"%d",&N);
      break;
    case 'w':
      opt_w = 1;
      break;
    case 's':
      sscanf(optarg,"%d",&nstep);
      break;
    case 'q':
      opt_q = 1;
      break;
    case 'v':
      opt_v = 1;
      break;
    case 'p':
      pattern = new char[strlen(optarg+1)];
      strcpy(pattern,optarg);
      break;
    case '?':
      if (isprint (optopt))
   fprintf (stderr, "Unknown option `-%c'.\n", optopt);
      else
   fprintf (stderr,
       "Unknown option character `\\x%x'.\n",
       optopt);
      return 1;
    default:
      if (isprint (optopt))
   fprintf (stderr, "Unknown option `-%c'.\n", optopt);
      else
   fprintf (stderr,
       "Unknown option character `\\x%x'.\n",
       optopt);
      abort ();
    }
  }

  /// time related quantities
  struct timeval start, end;

  // Defines and initializes the board
  Board board;
  board.read_weights_file(opt_w);
  board.init(N);

  if (!strcmp(pattern,"cross")) {
    for (int k=0; k<N; k++) {
      board.setv(N/2,k,1);
      board.setv(k,N/2,1);
    }
  } else if (!strcmp(pattern,"line10")) {
    assert(N>20);
    for (int k=-5; k<5; k++) board.setv(N/2,N/2+k,1);
  } else if (!strcmp(pattern,"block")) {
    assert(N>3);
    board.setv(N/2  ,N/2  ,1);
    board.setv(N/2  ,N/2-1,1);
    board.setv(N/2-1,N/2  ,1);
    board.setv(N/2-1,N/2-1,1);
  } else {
    if (myrank==0) 
      printf("Not known pattern \"%s\"\n",pattern);
    MPI_Finalize();
    exit(0);
  }

  if (!opt_q) board.print();
  gettimeofday (&start,NULL);
  for (int k=0; k<nstep; k++) {
    board.advance();
    if (opt_v) {
      if (myrank==0) printf("-- Generation %d:\n",k+1); 
      board.print();
    }
  }
  if (myrank==0) {
    gettimeofday (&end,NULL);
    double elapsed = etime(start,end);
    double rate = double(N)*double(N)*double(nstep)/elapsed/1e6;
    printf("N %d, steps %d, elapsed %f, rate: %f Mcell/sec\n",
      N,nstep,elapsed,rate);
  }

  gettimeofday (&end,NULL);
  if (!opt_q && !opt_v) {
    if (myrank==0) printf("-- Final board:\n"); 
    board.print();
  }
  MPI_Finalize();
}

-- MarioStorti - 03 May 2002
Topic revision: r5 - 24 Dec 2020, MarioStorti
This site is powered by FoswikiCopyright © by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Foswiki? Send feedback