subroutine solmpj(p1)
! version: working version, barrou's removed,
! correct timing fft's
! AAPC with MPI-provided routines
! uses only 2 AAPC, using MPI-rovided routines,
! to pre-distribute the arrays s.t. complete
! 2-D planes are present on each processor
! uses ALLTOALL instead of ALLTOALLV
! ONLY distribution in j-direction allowed
! NOTE: input array p1 is supposed to have the ip1ray distribution,
! i.e. the entire range of the first index must be present on
! each processor
!******************************************************************
!******************** FAST POISSON SOLVER ************************
!***** *****
!*** P_xx + P_yy + P_zz =f(x,y,z) ***
!**** *****
!******************************************************************
! FOURIER TRANSFORMS IN X AND Y DIRECTION GIVE:
! a^2 P + b^2 P + P_zz = F(x,y,z) = FFT_i [ FTT_j (f(x,y,z))]
! where a and b are the KNOWN eigenvalues, and P_zz is
! P_zz =[ P_{i,j,k+1} - 2 P_{i,j,k} +P_{i,j,k-1} ] / (dz * dz)
! a^2 P + b^2 +P_zz =
! [P_{i,j,k+1}-(2+a^2+ b^2) P_{i,j,k}+P_{i,j,k-1}]/(dz*dz)=F( x,y,z)
! The equation above results in a tridiagonal system in k which
! can be solved with Gaussian elemination --> P
! The P we have found with the Gaussian elemination is still in
! the Fourier Space and 2 backward FFTS are necessary to compute
! the physical P
!******************************************************************
!******************************************************************
!******************************************************************
!**** Programmer: Bendiks Jan Boersma ******
!**** email : b.j.boersma@wbmt.tudelft.nl ******
!**** ******
!**** USES : VFFTPACK (netlib) ******
!**** : FFTPACK (netlib) ******
!**** (B.J. Boersma & L.J.P. Timmermans) ******
!**** ******
!******************************************************************
!******************************************************************
! mpi-version, no master region for timing
! copy times all included
use modmpi, only : myid,comm3d,mpierr,nprocs, barrou
use modglobal, only : imax,jmax,kmax,isen,jtot,pi,dyi,dzf,dzh, dxfi, kb, ke, kh
!, rhoa
! use modfields, only : rhobf, rhobh
implicit none
real :: dxi
integer :: i1, j1, k1
! real, intent(inout), dimension(:,:,:) :: p1
real p1(0:imax+1,0:jmax+1,0:kmax+1)
real, allocatable, dimension(:,:,:) :: d,p2
real, allocatable, dimension(:,:,:) :: xyzrt
real, allocatable, dimension(:) :: xrt,yrt,a,b,c,FFTI,FFTJ,winew,wjnew, rhobf, rhobh
real z,ak,bk,bbk,fac
integer jv
integer i, j, k
!real dzl(ke+kh-(kb-kh)),dzhl(ke+kh-(kb-kh))
real dzl(0:kmax+1),dzhl(1:kmax+1)
dxi = dxfi(1)
dzl(0:kmax+1) = dzf(kb-kh:ke+kh)
dzhl(1:kmax+1) = dzh(kb:ke+kh)
i1 = imax+1
j1 = jmax+1
k1 = kmax+1
! allocate(p1(0:i1,0:j1,0:k1))
! p and d distributed equally:
allocate(d(imax,jmax,kmax))
! re-distributed p:
allocate(p2(isen,jtot,kmax))
! re-distributed p1:
allocate(rhobf(1:kmax), rhobh(1:kmax+1))
allocate(xyzrt(0:i1,0:j1,0:k1),xrt(0:i1),yrt(0:jtot+1))
allocate(a(0:kmax+1),b(0:kmax+1),c(0:kmax+1))
allocate(FFTI(imax),FFTJ(jtot),winew(2*imax+15),wjnew(2*jtot+15))
rhobf=1; rhobh = 1;
call MPI_COMM_RANK( comm3d, myid, mpierr )
call MPI_COMM_SIZE( comm3d, nprocs, mpierr )
call rffti(imax,winew)
call rffti(jtot,wjnew)
! call barrou()
!FFT ---> I direction
fac = 1./sqrt(imax*1.)
do k=1,kmax
do j=1,jmax
do i=1,imax
FFTI(i) =p1(i,j,k)
end do
call rfftf(imax,FFTI,winew)
do i=1,imax
! ATT: First back to p1, then re-distribution!!!
p1(i,j,k)=FFTI(i)*fac
end do
end do
end do
call ALL_ALL_j(p1,p2,0)
!FFT ---> J direction
fac = 1./sqrt(jtot*1.)
do i=1,isen
do k=1,kmax
do j=1,jtot
FFTJ(j) =p2(i,j,k)
end do
call rfftf(jtot,FFTJ,wjnew)
do j=1,jtot
! ATTT back to pl
p2(i,j,k)=FFTJ(j)*fac
end do
end do
end do
! call barrou()
call ALL_ALL_j(p1,p2,1)
! call barrou()
! Generate Eigenvalues (xrt and yrt )
! I --> direction
fac = 1./(2.*imax)
do i=3,imax,2
xrt(i-1)=-4.*dxi*dxi*(sin(float((i-1))*pi*fac))**2
xrt(i) = xrt(i-1)
end do
xrt(1 ) = 0.
xrt(imax ) = -4.*dxi*dxi
! J --> direction
fac = 1./(2.*jtot)
do j=3,jtot,2
yrt(j-1)=-4.*dyi*dyi*(sin(float((j-1))*pi*fac))**2
yrt(j )= yrt(j-1)
end do
yrt(1 ) = 0.
yrt(jtot ) = -4.*dyi*dyi
! Generate tridiagonal matrix
! NOTE -- dzfm dzh are defined from 0 -- hence ..-1
do k=1,kmax
! SB fixed the coefficients
a(k)=rhobh(k)/(dzl(k)*dzhl(k))
c(k)=rhobh(k+1)/(dzl(k)*dzhl(k+1))
b(k)=-(a(k)+c(k))
end do
b(1 )=b(1)+a(1)
a(1 )=0.
b(kmax)=b(kmax)+c(kmax)
c(kmax)=0.
do k=1,kmax
do j=1,jmax
jv = j + myid*jmax
do i=1,imax
xyzrt(i,j,k)= rhobf(k)*(xrt(i)+yrt(jv)) !!! LH
end do
end do
end do
! SOLVE TRIDIAGONAL SYSTEMS WITH GAUSSIAN ELEMINATION
do j=1,jmax
jv = j + myid*jmax
do i=1,imax
z = 1./(b(1)+xyzrt(i,j,1))
d(i,j,1) = c(1)*z
p1(i,j,1) = p1(i,j,1)*z
end do
end do
do k=2,kmax-1
do j=1,jmax
jv = j + myid*jmax
do i=1,imax
bbk = b(k)+xyzrt(i,j,k)
z = 1./(bbk-a(k)*d(i,j,k-1))
d(i,j,k) = c(k)*z
p1(i,j,k) = (p1(i,j,k)-a(k)*p1(i,j,k-1))*z
end do
end do
end do
ak =a(kmax)
bk =b(kmax)
do j=1,jmax
jv = j + myid*jmax
do i=1,imax
bbk = bk +xyzrt(i,j,kmax)
z = bbk-ak*d(i,j,kmax-1)
if(z/=0.) then
p1(i,j,kmax) = (p1(i,j,kmax)-ak*p1(i,j,kmax-1))/z
else
p1(i,j,kmax) =0.
end if
end do
end do
do k=kmax-1,1,-1
do j=1,jmax
do i=1,imax
p1(i,j,k) = p1(i,j,k)-d(i,j,k)*p1(i,j,k+1)
end do
end do
end do
! call barrou()
! MPI_ALL CALL!!!
call ALL_ALL_j(p1,p2,0)
! call barrou()
! BACKWARD FFT ---> I direction
! BACKWARD FFT ---> J direction
fac = 1./sqrt(jtot*1.)
do i=1,isen
do k=1,kmax
do j=1,jtot
! ATT, ADAPTED!!!
FFTJ(j) =p2(i,j,k)
end do
call rfftb(jtot,FFTJ,wjnew)
do j=1,jtot
! ATT back to p2!!!
p2(i,j,k)=FFTJ(j)*fac
end do
end do
end do
! call barrou()
call ALL_ALL_j(p1,p2,1)
fac = 1./sqrt(imax*1.)
do k=1,kmax
do j=1,jmax
do i=1,imax
FFTI(i) =p1(i,j,k)
end do
call rfftb(imax,FFTI,winew)
do i=1,imax
! ATT back to p1 !!!
p1(i,j,k)=FFTI(i)*fac
end do
end do
end do
deallocate(d,p2,xyzrt,xrt,yrt,a,b,c,FFTI,FFTJ,winew,wjnew)
! call barrou()
return
end subroutine solmpj